In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import r2_score
from pingouin import corr
import dataframe_image as dfi
import seaborn as sns

### Chelsea 2021 Roster Analysis

In [2]:
ch_df = pd.read_csv('chelsea_champions_league_roster_2021.csv')

In [3]:
ch_df.drop(columns=['Unnamed: 0'], inplace=True)

In [4]:
ch_df.head()

Unnamed: 0,short_name,dob,club_name,overall,Month,MonthNumber,DayOfYear
0,N. Kanté,1991-03-29,Chelsea,88,March,3,88
1,K. Havertz,1999-06-11,Chelsea,85,June,6,162
2,T. Werner,1996-03-06,Chelsea,85,March,3,66
3,H. Ziyech,1993-03-19,Chelsea,85,March,3,78
4,Thiago Silva,1984-09-22,Chelsea,85,September,9,266


###### Describe

In [5]:
ch_df.describe()

Unnamed: 0,overall,MonthNumber,DayOfYear
count,33.0,33.0,33.0
mean,79.212121,7.575758,213.757576
std,4.248217,3.766881,115.373044
min,71.0,1.0,10.0
25%,76.0,4.0,101.0
50%,79.0,8.0,240.0
75%,82.0,11.0,312.0
max,88.0,12.0,362.0


In [6]:
ch_df['dob'] = pd.to_datetime(ch_df['dob'])

In [7]:
ch_df['Month'] = ch_df['dob'].dt.month_name()

In [8]:
chelseaMonthNumber = ch_df.groupby('MonthNumber')['MonthNumber'].count()

In [9]:
chelseaMonthNumber

MonthNumber
1     3
3     5
4     1
5     1
6     2
7     2
8     3
9     4
10    3
11    1
12    8
Name: MonthNumber, dtype: int64

In [10]:
chelseaMonthdf = pd.DataFrame([[i, chelseaMonthNumber[i]] for i in chelseaMonthNumber.index])

In [11]:
chelseaMonthdf.rename(columns={0:'Month', 1: 'Total Players'}, inplace=True)

In [19]:
chelseaMonthdf.to_csv('ch_monthNumerical.csv')

In [20]:
chelseaMonthdf

Unnamed: 0,Month,Total Players
0,1,3
1,3,5
2,4,1
3,5,1
4,6,2
5,7,2
6,8,3
7,9,4
8,10,3
9,11,1


In [13]:
ch_df.groupby('Month')['Month'].count()

Month
April        1
August       3
December     8
January      3
July         2
June         2
March        5
May          1
November     1
October      3
September    4
Name: Month, dtype: int64

In [14]:
monthDistChelsea = pd.DataFrame(([['January', 3],
            ['February', 0],
            ['March', 5],
            ['April', 1],
            ['May', 1],
            ['June', 2],
            ['July', 2],
             ['August', 3],
             ['September', 4],
             ['October', 3],
             ['November', 1],
             ['December', 8]]
            ))

In [15]:
monthDistChelsea.rename(columns={'Month':'Months', 1:'Total Players'}, inplace=True)

###### Bar Plot

In [17]:
sns.barplot(x='Month', y='Total Players', data=monthDistChelsea)
plt.xticks(rotation=90)
plt.title('Chelsea 2021 Champions League Team')

ValueError: Could not interpret input 'Month'

In [None]:
monthDistChelsea.to_csv('Chelsea')

###### Percentage Calculation

In [None]:
(monthDistChelsea['Total Players'].iloc[6:].sum()/monthDistChelsea['Total Players'].sum())*100

###### Regression Line

In [None]:
X = chelseaMonthdf['Month']
Y = chelseaMonthdf['Total Players']

In [None]:
sns.set_style('whitegrid')
plt.title('Chelsea 2021 Champions League Team month of birth')
plt.ylabel('Total Players')
plt.xlabel('Month')
plt.plot(X,Y,"b.", ms=10, mec="k")
z = np.polyfit(X, Y, 1)
y_hat = np.poly1d(z)(X)

plt.plot(X, y_hat, "r--", lw=1)
text = f"$y={z[0]:0.3f}\;x{z[1]:+0.3f}$\n$R^2 = {r2_score(Y,y_hat):0.3f}$"
plt.gca().text(1.05, 0.95, text,transform=plt.gca().transAxes,
     fontsize=14, verticalalignment='top')

###### Pearsons

In [None]:
dfi.export(chelseaMonthdf[['Month', 'Total Players']].corr(), 'ChelseaNumericMonthCorr.png' )

In [18]:
monthDistChelsea

Unnamed: 0,0,Total Players
0,January,3
1,February,0
2,March,5
3,April,1
4,May,1
5,June,2
6,July,2
7,August,3
8,September,4
9,October,3
