In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import numpy as np
import pandas as pd 
from matplotlib import pyplot as plt
import seaborn as sns
plt.style.use('seaborn')

%matplotlib inline

In [None]:
df = pd.read_csv('../input/english-premier-league202021/EPL_20_21.csv')
df.shape

In [None]:
df.info()

In [None]:
df.isna().sum()

In [None]:
df.head()

# **PL Club's Stats**

In [None]:
clubs = df['Club'].unique()

In [None]:
teams = df.groupby('Club')

In [None]:
def get_total_category_data(col):
    total = []
    for club in clubs:
        total.append(teams.get_group(club)[col].sum())
    data = pd.DataFrame({'Club':clubs,f'Total {col}':total})
    return data

# **Total goals by each club**

In [None]:
total_goals = get_total_category_data('Goals')
total_goals

## **Club with highest Goals**

In [None]:
total_goals[total_goals['Total Goals'] == total_goals['Total Goals'].max()]

## **Club with least Goals**

In [None]:
total_goals[total_goals['Total Goals'] == total_goals['Total Goals'].min()]

In [None]:
plt.figure(figsize=(10,10))
sns.barplot(data=total_goals,x='Total Goals',y='Club')
plt.title("Club and Total Goals")
plt.show()

# **Total Assists of each club**

In [None]:
total_assists = get_total_category_data('Assists')
total_assists

# **Club with highest Assists**

In [None]:
total_assists[total_assists['Total Assists'] == total_assists['Total Assists'].max()]

# **Club with Least Assists**

In [None]:
total_assists[total_assists['Total Assists'] == total_assists['Total Assists'].min()]

In [None]:
plt.figure(figsize=(10,10))
sns.barplot(data=total_assists,x='Total Assists',y='Club')
plt.title("Club and Total Assists")
plt.show()

# **Penalties Attempted vs Penalties Scored**

In [None]:
total_Pgoals = get_total_category_data('Penalty_Goals') 
total_Pattempted = get_total_category_data('Penalty_Attempted')

In [None]:
penalties = total_Pattempted.merge(total_Pgoals,on='Club')

In [None]:
penalties

In [None]:
y=np.arange(20)
plt.figure(figsize=(10,15))
plt.barh(y,penalties['Total Penalty_Attempted'], label='Penalties Attempted',color='green',height=0.4)
plt.barh(y+0.4,penalties['Total Penalty_Goals'], label='Penalties Scored',color='blue',height=0.4)
plt.title("Penalties Attempted vs Penalties Scored")
plt.yticks(y,penalties['Club'])
plt.ylabel('Club')
plt.xlabel("Number")
plt.gca().invert_yaxis()
plt.legend()
plt.show()

# **Yellow cards vs Red cards**

In [None]:
Ycards = get_total_category_data('Yellow_Cards')
Rcards = get_total_category_data('Red_Cards')

In [None]:
cards = Ycards.merge(Rcards,on='Club')
cards

In [None]:
y=np.arange(20)
plt.figure(figsize=(10,15))
plt.barh(y,cards['Total Yellow_Cards'], label='Yellow Cards',color='yellow',height=0.4)
plt.barh(y+0.4,cards['Total Red_Cards'], label='Red Cards',color='red',height=0.4)
plt.title("Yellow cards vs Red cards")
plt.yticks(y,cards['Club'])
plt.ylabel('Club')
plt.xlabel("Number")
plt.gca().invert_yaxis()
#plt.xscale('log')
plt.legend()
plt.show()

# **Club with highest Yellow Cards**

In [None]:
highest_yellow = cards['Total Yellow_Cards'] == cards['Total Yellow_Cards'].max()
cards[['Club','Total Yellow_Cards']][highest_yellow ]

# **Club with highest Red Cards**

In [None]:
highest_red = cards['Total Red_Cards'] == cards['Total Red_Cards'].max()
cards[['Club','Total Red_Cards']][highest_red ]

# **Passes Attempted**

In [None]:
total_passes = get_total_category_data('Passes_Attempted')
total_passes

In [None]:
highest_passes = total_passes['Total Passes_Attempted'] == total_passes['Total Passes_Attempted'].max()
total_passes[highest_passes]

In [None]:
plt.figure(figsize=(10,10))
sns.barplot(data=total_passes,x='Total Passes_Attempted',y='Club')
plt.title("Club and Total Passes")
plt.show()

# **Club and Nationality**

In [None]:
for club in clubs:
    sns.countplot(data=teams.get_group(club)[['Nationality']],y='Nationality')
    plt.title(club)
    plt.show()

# **Player's stats**

# **Top 5s**

In [None]:
top5 = lambda df,by :df.sort_values(by=by,ascending=False)[:5]

# **Top 5 Goal Scorers**

In [None]:
goal_scorers = df[['Name','Club','Goals','Penalty_Goals']]
top5(goal_scorers, 'Goals')

# **Top 5 most Assist makers**

In [None]:
assist_providers = df[['Name','Club','Assists']]
top5(assist_providers, 'Assists')

# **Top 5 passes attempted**

In [None]:
passes_attempted = df[['Name', 'Club', 'Position', 'Passes_Attempted']]
top5(passes_attempted,'Passes_Attempted')

# **Top 5 yellow cards**

In [None]:
yellow_cards = df[['Name','Club','Yellow_Cards']]
top5(yellow_cards,'Yellow_Cards')

# **Top 5 red cards**

In [None]:
red_cards = df[['Name','Club','Red_Cards']]
top5(red_cards,'Red_Cards')

# **Top 5 Most Minutes played**

In [None]:
most_time_played = df[['Name','Club','Mins']]
top5(most_time_played,'Mins')

# **Top Scorer of each club**

In [None]:
def club_top_player(category):
    names = []
    top = []
    for club in clubs:
        gs = teams.get_group(club)[['Name','Club',category]]
        top_player = gs[gs[category] == gs[category].max()][:1]
        names.append(list(top_player['Name'])[0])
        top.append(list(top_player[category])[0])
    tdf = pd.DataFrame({'Club':clubs,'Name':names,f'{category}':top})
    return tdf

In [None]:
#Top scorer of each club
club_top_player(category='Goals')

In [None]:
#Top assist provider of each club
club_top_player(category='Assists')

In [None]:
#Most matches played for each club
club_top_player(category='Matches')

In [None]:
#Most time played 
club_top_player(category='Mins')

In [None]:
#Highest Passes Attempted player of each club
club_top_player(category='Passes_Attempted')