# <Center>Premier League Player Analysis<Center>

# Importing the Libraries

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import plotly.figure_factory as ff
import plotly.graph_objects as go
import numpy as np
import plotly.express as px
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Dataset

**Dataset inludes the Statistics of the Premier League Players. As seen from the below, the dataset includes these features.**

In [None]:
data= pd.read_csv('../input/english-premier-league202021/EPL_20_21.csv')
data.head()

# Top 10 Goal Kings

In [None]:
fig_bar = px.bar(data_frame=data.nlargest(10, 'Goals')[['Name', 'Goals']],
                 x='Name', y='Goals', color='Goals', text='Goals')
fig_bar.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig_bar.update_layout(title_text='Top 10 Goal Kings of the League',  # Main title for the project
                      title_x=0.5, title_font=dict(size=30))  # Location and the font size of the main title
fig_bar.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig_bar.show()

**According to graph above, the Goal King of the season is Harry Kane with 23 goals. Mohamed Salah followed him with 22 goals and so on.**

# Top 10 Assist Kings

In [None]:
fig_bar = px.bar(data_frame=data.nlargest(10, 'Assists')[['Name', 'Assists']],
                 x='Name', y='Assists', color='Assists', text='Assists')
fig_bar.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig_bar.update_layout(title_text='Top 10 Assist Kings of the League',  # Main title for the project
                      title_x=0.5, title_font=dict(size=30))  # Location and the font size of the main title
fig_bar.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig_bar.show()

**According to graph above, the Assist King of the season is Harry Kane with 14 assists. Kevin De Bruyne followed him with 12 assists and, Bruno Fernandes also has 12 assists.**

**Harry Kane is both the Goal King and Assist King of this season. Congrats for his success!**

# Top 10 DF Players that have the most Red Card

In [None]:
DF_players = data[data['Position'].str.contains("DF")]
DF_players_red = DF_players.nlargest(10, 'Red_Cards')[['Name', 'Red_Cards', 'Yellow_Cards']]
fig = px.bar(DF_players_red, x="Name", y=["Red_Cards", "Yellow_Cards"],
             color_discrete_map={
                 "Red_Cards": "red",
                 "Yellow_Cards": "yellow"}

             )
fig.update_layout(title_text='Top 10 DF Players who have the most Red Card',
                  title_x=0.5, title_font=dict(size=30))
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.show()

**Lewis Dunk is the top DF player who has the most red cards of this season.**

# Top 10 DF Players that have the most Yellow Card

In [None]:
DF_players_yellow = DF_players.nlargest(10, 'Yellow_Cards')[['Name', 'Red_Cards', 'Yellow_Cards']]
fig = px.bar(DF_players_yellow, x="Name", y=["Red_Cards", "Yellow_Cards"],
             color_discrete_map={
                 "Red_Cards": "red",
                 "Yellow_Cards": "yellow"}
             )
fig.update_layout(title_text='Top 10 DF Players who have the most Yellow Card',
                  title_x=0.5, title_font=dict(size=30))
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.show()

**Harry Maguire is the top DF player who has the most yellow cards of this season.**

# Top 10 Players due to Ages

In [None]:
fig_bar = px.bar(data_frame=data.nlargest(10, 'Age')[['Name', 'Age']],
                 x='Name', y='Age', color='Age', text='Age')
fig_bar.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig_bar.update_layout(title_text='Top 10 Players due to Ages',  # Main title for the project
                      title_x=0.5, title_font=dict(size=30))  # Location and the font size of the main title
fig_bar.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig_bar.show()

**Willy Cabarello was the oldest player of this season.**

# Density Plot of the Ages

In [None]:
plt.figure(figsize=(15, 8))
sns.distplot(data['Age'], hist=True, color='red')
plt.xlabel("Ages", fontsize=12)
plt.ylabel('Density', fontsize=12)
plt.title("Density Plot of the Ages", fontsize=16)

# Top 10 Players due to Passes Attempted

In [None]:
fig_bar = px.bar(data_frame=data.nlargest(10, 'Passes_Attempted')[['Name', 'Passes_Attempted']],
                 x='Name', y='Passes_Attempted', color='Passes_Attempted', text='Passes_Attempted')
fig_bar.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig_bar.update_layout(title_text='Top 10 Players due to Passes Attempted',  # Main title for the project
                      title_x=0.5, title_font=dict(size=30))  # Location and the font size of the main title
fig_bar.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig_bar.show()

# Top 10 Average Red and Yellow Cards due to Nationality

In [None]:
Nat_Card_avr = data.groupby(by=['Nationality']).mean()
Nat_Card_avr_top = Nat_Card_avr.nlargest(10, 'Red_Cards')[['Red_Cards', 'Yellow_Cards']]
fig = px.bar(Nat_Card_avr_top, x=Nat_Card_avr_top.index, y=["Red_Cards", "Yellow_Cards"],
             color_discrete_map={
                 "Red_Cards": "red",
                 "Yellow_Cards": "yellow"}

             )
fig.update_layout(title_text='Top 10 Average Red and Yellow Cards due to Nationality',
                  title_x=0.5, title_font=dict(size=30))
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.show()

# Nationalities of the League

In [None]:
Nationality = data.groupby(pd.Grouper(key='Nationality')).size().reset_index(name='count')
fig = px.treemap(Nationality, path=['Nationality'], values='count')
fig.update_layout(title_text='Nationalities of the League',
                  title_x=0.5, title_font=dict(size=30)
                  )
fig.update_traces(textinfo="label+value")
fig.show()

# Number of Players for each Club

In [None]:
NumberofPlayers = data.groupby(pd.Grouper(key='Club')).size().reset_index(name='count')
fig = px.treemap(NumberofPlayers, path=['Club'], values='count')
fig.update_layout(title_text='Number of Players for each Club',
                  title_x=0.5, title_font=dict(size=30)
                  )
fig.update_traces(textinfo="label+value")
fig.show()

# Goals by the Teams

In [None]:
goalsbyteam = data['Goals'].groupby(data['Club']).sum().sort_values(ascending=False).to_frame()
fig = px.bar(data_frame=goalsbyteam, x=goalsbyteam.index, y='Goals', color='Goals')
fig.update_layout(title_text='Number of Goals by the Teams',
                  title_x=0.5, title_font=dict(size=30))
fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.show()

# Number of Assists by the Teams

In [None]:
assistsbyteam = data['Assists'].groupby(data['Club']).sum().sort_values(ascending=False).to_frame()
fig = px.bar(data_frame=assistsbyteam, x=assistsbyteam.index, y='Assists', color='Assists')
fig.update_layout(title_text='Number of Assists by the Teams',
                  title_x=0.5, title_font=dict(size=30))
fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.show()

# Number of Red Cards by the Teams

In [None]:
redcardbyteam = data['Red_Cards'].groupby(data['Club']).sum().sort_values(ascending=False).to_frame()
fig = px.bar(data_frame=redcardbyteam, x=redcardbyteam.index, y='Red_Cards', color='Red_Cards')
fig.update_layout(title_text='Number of Red Cards by the Teams',
                  title_x=0.5, title_font=dict(size=30))
fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.show()

# Number of Yellow Cards by the Teams

In [None]:
yellowcardbyteam = data['Yellow_Cards'].groupby(data['Club']).sum().sort_values(ascending=False).to_frame()
fig = px.bar(data_frame=yellowcardbyteam, x=yellowcardbyteam.index, y='Yellow_Cards', color='Yellow_Cards')
fig.update_layout(title_text='Number of Yellow Cards by the Teams',
                  title_x=0.5, title_font=dict(size=30))
fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.show()

# Density Plot of the Matches and Starts

In [None]:
plt.figure(figsize=(15, 8))
sns.distplot(data['Matches'], color='red')
sns.distplot(data['Starts'], color='blue')
plt.xlabel("Matches and Starts", fontsize=12)
plt.ylabel('Density', fontsize=12)
plt.legend(['Matches', 'Starts'], loc='upper right')
plt.title("Density Plot of the Matches and Starts", fontsize=16)

# Distribution of the Goals

In [None]:
Grouped_NumofGoals = data.groupby(pd.Grouper(key='Goals')).size().reset_index(name='count')
labels = Grouped_NumofGoals['Goals'].values
values = Grouped_NumofGoals['count'].values

fig = go.Figure(data=[go.Pie(labels=labels, values=values, opacity=0.8)])
fig.update_traces(textinfo='percent+label', marker=dict(line=dict(color='#000000', width=2)))
fig.update_layout(title_text='Distribution of the Goals', title_x=0.5, title_font=dict(size=32))
fig.show()

# All Scored DF Players

In [None]:
DF_players_scored = data[data['Position'].str.contains("DF")]
DF_players_scored = DF_players_scored.drop(DF_players_scored.index[DF_players_scored['Goals'] == 0])
fig = px.bar(data_frame=DF_players_scored, x='Name', y='Goals', color='Goals')
fig.update_layout(title_text='Defenders with most goals!! (Wow!)',
                  title_x=0.5, title_font=dict(size=30))
fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.show()

# Scored and Assisted GK Players

In [None]:
GK_players = data[data['Position'].str.contains("GK")]
GK_players_top = GK_players.nlargest(10, 'Goals')[['Name', 'Goals', 'Assists']]
fig = px.bar(GK_players_top, x="Name", y=["Goals", "Assists"],
             color_discrete_map={
                 "Red_Cards": "red",
                 "Yellow_Cards": "yellow"}

             )
fig.update_layout(title_text='Scored and Assisted GK Players',
                  title_x=0.5, title_font=dict(size=30))
fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.show()

# Graph of Players who have the highest Penalty_Goals/Penalty_Attempted Ratio

In [None]:
PenaltyPerAttempted = pd.concat([data['Name'], 1/(data['Penalty_Attempted']/data['Penalty_Goals'])], axis=1)
PenaltyPerAttempted = PenaltyPerAttempted.replace([np.inf], np.nan).dropna(axis=0)
fig = px.bar(data_frame=PenaltyPerAttempted, x='Name', y=0, color=0)
fig.update_layout(title_text='Graph of Players who have the highest Penalty_Goals/Penalty_Attempted Ratio',
                  title_x=0.5, title_font=dict(size=20))
fig.update_layout(xaxis={'categoryorder': 'total descending'})
fig.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig.show()

# Top 25 Players due to Percentage of Passes Completed

In [None]:
fig_bar = px.bar(data_frame=data.nlargest(25, 'Perc_Passes_Completed')[['Name', 'Perc_Passes_Completed']],
                 x='Name', y='Perc_Passes_Completed', color='Perc_Passes_Completed', text='Perc_Passes_Completed')
fig_bar.update_traces(marker=dict(line=dict(color='#000000', width=2)))
fig_bar.update_layout(title_text='Top 25 Players due to Percentage of Passes Completed',
                      title_x=0.5, title_font=dict(size=30))
fig_bar.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig_bar.show()

# Correlation Graph

In [None]:
plt.figure(figsize=(15, 8))
correlation = sns.heatmap(data.corr(), vmin=-1, vmax=1, annot=True, linewidths=1, linecolor='black')
correlation.set_title('Correlation Graph of the Dataset', fontdict={'fontsize': 24})