# Exploration in the English Premier League dataset for the 2020-2021 Season

# Exploratory Data Analysis

In [None]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt 
%matplotlib inline

In [None]:
epl = pd.read_csv('../input/english-premier-league202021/EPL_20_21.csv')

In [None]:
epl.info()

In [None]:
epl.describe()

In [None]:
epl.head(4)

### Check for missing values

In [None]:
sns.heatmap(epl.isnull(),yticklabels=False,cbar=False,cmap='viridis') 
#no nulls

# Which club was the most aggressive on the pitch?

In [None]:
epl['Position'].unique() 

In [None]:
#some players played 2 positions
#I will exclude any secondary position and only keep defense as the primary position
epl.loc[epl['Position'].str.contains('DF'), 'Position'] = 'DF' 

In [None]:
#creating data for defenders 
defender = epl[epl['Position'] == 'DF']

In [None]:
defender.describe()

In [None]:
defender.head(1)

In [None]:
#to figure out the most aggresive players, I will create a new column named 'Total_Cards'
defender['Total_Cards'] = (defender['Yellow_Cards'] + defender['Red_Cards'])

In [None]:
defender.head(1)

In [None]:
#plotting boxplot
plt.figure(figsize=(12,4))
sns.boxplot(x='Club',y='Total_Cards',data=defender)
plt.xticks(rotation=90)

 ### Aston Villa had the highest percentile of total cards

In [None]:
#creating dataframe for Aston Villa
av = defender[defender['Club']== 'Aston Villa']

In [None]:
av.describe()

In [None]:
av['Total_Cards'].mean().round(2)

In [None]:
defender['Total_Cards'].mean().round(2)

### The EPL avergage of total cards for all Defenders was 2.57. The average for Aston Villa defenders was 4.43.

In [None]:
#which AV player has most cards
av[av['Total_Cards']==av['Total_Cards'].max()] 

### Aston Villa appears to be the most aggressive club on the pitch, and Matty Cash recieved the most Total Cards for Aston Villa

## Who were the top 10 most aggressive defenders in the EPL?

In [None]:
top_10_agg = defender.nlargest(10,'Total_Cards')

In [None]:
plt.figure(figsize=(12,4))
sns.barplot(x='Name',y='Total_Cards',data=top_10_agg)
plt.xticks(rotation=45)

 ### Harry Maquire can be considered as the top aggressive defender in the EPL with a total of 11 cards

# Now which club had the most goals scored by a defender?

In [None]:
defender['Goals'].groupby(defender['Club']).sum().sort_values(ascending=False).plot(kind='bar')

### Chelsea are the club with the most goals scored by a defender (17)

# Which Defender Scored the most goals in the EPL?

In [None]:
defender[['Name','Club','Goals']].nlargest(1,'Goals')

### Stuart Dallas, from Leeds United, was the highest scoring defender in the EPL

# Who were the 10 top playmakers in the EPL?

In [None]:
team_assists = epl['Assists'].groupby(epl['Club']).sum().sort_values(ascending=False) #total team assists 
team_assists

In [None]:
team_assists.plot(kind = 'bar')

### Manchester City leads all teams in total assists 

In [None]:
top_10_assists = epl[['Name','Club','Assists']].nlargest(10,'Assists')
top_10_assists

### Harry Kane led the EPL in assists (14). Both Tottenham and Manchester United have 2 players within the top 10 assists 

# Who were the top 10 players with most attempted passes?

In [None]:
most_pass_att = epl[['Name','Club','Passes_Attempted','Position']].nlargest(10,'Passes_Attempted')
most_pass_att

### Top 2 players with most pass attempts are from Liverpool and both are defenders

# Which players had the most accurate passes excluding the goal keeper and the defenders?

In [None]:
pass_acc = epl[(epl['Position'] != 'GK') & (epl['Position'] != 'DF')]

In [None]:
pass_acc[['Name','Club','Perc_Passes_Completed','Passes_Attempted']].nlargest(10,'Perc_Passes_Completed')

### There are players with very few passes attempted which heavily skews their percentage of passes completed. I will keep players who have a minimum of 1000 passes attempted

In [None]:
pass_acc = pass_acc[pass_acc['Passes_Attempted'] >= 1000]
pass_acc[['Name','Club','Perc_Passes_Completed','Passes_Attempted']].nlargest(10,'Perc_Passes_Completed')

### Above are the top 10 passes in the EPL

# Which nation had the most aggressive players?
### For this question, I will use the same Total_Cards col used earlier

In [None]:
epl['Total_Cards'] = (epl['Yellow_Cards'] + epl['Red_Cards'])

In [None]:
epl['Total_Cards'].groupby(epl['Nationality']).sum().sort_values(ascending=False).nlargest(10)

### English players by far are the most aggressive, but this makes sense due to the amount of english players in the epl

In [None]:
#the counts of player nationality in EPL
epl['Nationality'].value_counts().nlargest(5)

In [None]:
epl['Nationality'].value_counts().nlargest(5).plot(kind='bar')

### There are 192 players in that EPL that are English, and the second most are those who are French (31)