# Sports Data Analysis

In [1]:
# Import necessary libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
# Load dataset from csv file

df = pd.read_csv("../DATA/sports_data.csv")
df.head()

Unnamed: 0,Name,Sport,Goals,Assists,Fouls,Minutes Played,Yellow,Red Cards,Team
0,Alex,Basketball,4,5,8,36,1,0,A
1,Bob,Soccer,0,2,1,31,0,0,B
2,Charlie,Basketball,2,2,6,34,1,0,A
3,David,Soccer,1,7,6,9,0,0,B
4,Eve,Basketball,8,2,6,29,0,0,A


In [3]:
# Names of columns with object dtype
column_names = []
for (columns, values) in df.items():
    if values.dtypes == 'object':
        column_names.append(columns)
print(column_names)

['Name', 'Sport', 'Team']


In [4]:
# Memory usage of each column

df.memory_usage(deep=True)

Index              132
Name              1075
Sport             1140
Goals              160
Assists            160
Fouls              160
Minutes Played     160
Yellow             160
Red Cards          160
Team              1000
dtype: int64

In [5]:
# Sum of memory usage in kilobytes

df.memory_usage(deep=True).sum()/1024

4.2060546875

In [6]:
# Convert column with object dtype to category
df2 = df.astype({'Name':'category','Sport':'category','Team':'category'})
df.dtypes

Name              object
Sport             object
Goals              int64
Assists            int64
Fouls              int64
Minutes Played     int64
Yellow             int64
Red Cards          int64
Team              object
dtype: object

In [7]:
# Sum of memory usage in kilobytes of df2
df2.memory_usage(deep=True).sum()/1024

3.1376953125

In [8]:
# Rename Yellow column to to Yellow Card

df2 = df2.rename(columns={'Yellow':'Yellow Card'})
df2.head()

Unnamed: 0,Name,Sport,Goals,Assists,Fouls,Minutes Played,Yellow Card,Red Cards,Team
0,Alex,Basketball,4,5,8,36,1,0,A
1,Bob,Soccer,0,2,1,31,0,0,B
2,Charlie,Basketball,2,2,6,34,1,0,A
3,David,Soccer,1,7,6,9,0,0,B
4,Eve,Basketball,8,2,6,29,0,0,A


In [9]:
# Dataframe of player with over 6 fouls
players_fouls = df2.query("Fouls > 6").reset_index(drop=True)
players_fouls

Unnamed: 0,Name,Sport,Goals,Assists,Fouls,Minutes Played,Yellow Card,Red Cards,Team
0,Alex,Basketball,4,5,8,36,1,0,A
1,George,Basketball,5,8,8,3,1,0,A
2,Harry,Soccer,9,6,8,13,1,0,B
3,Ivan,Basketball,7,9,8,30,1,0,A
4,Katie,Basketball,7,8,9,6,0,0,A
5,Nate,Soccer,0,0,7,44,1,0,B
6,Olivia,Basketball,3,0,8,5,1,0,A


In [10]:
# Save the dataframe to csv file
players_fouls.to_csv("../DATA/data/players_with_over6_fouls.csv")