In [None]:
import numpy as np 
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib
import matplotlib.pyplot as plt # Visualization
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(5, 5)
import seaborn as sns # Visualization
sns.set(rc={'figure.figsize':(17.7,8.27)})

from IPython.display import display, HTML # IPython notebook display

import os

import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [None]:
data = pd.read_csv('../input/fifa-21-complete-player-dataset/players_21.csv')
data.head()

In [None]:
skills = []
for i in data.columns:
    skills.append(i)

In [None]:
skills

In [None]:
data.isnull().sum()

In [None]:
to_impute_by_mean = data.loc[:, ['dribbling','defending', 'passing', 'pace', 'shooting', 'physic']]
for i in to_impute_by_mean.columns:
    data[i].fillna(data[i].mean(), inplace=True)

In [None]:
data.isnull().sum()

In [None]:
plt.figure(figsize=(22, 8))
ax = sns.countplot(x='height_cm', data=data)
ax.set_title(label = 'Count of players on Basis of Height', fontsize = 20)
ax.set_xlabel(xlabel = 'Height in Foot per inch', fontsize = 16)
ax.set_ylabel(ylabel = 'Count', fontsize = 16)
plt.show()

In [None]:
plt.figure(figsize = (22, 7))
plt.xlabel('Height', fontsize=20)
plt.ylabel('Dribbling', fontsize=20)
plt.title('Height vs Dribbling', fontsize = 25)
sns.barplot('height_cm', 'dribbling', data=data.sort_values('height_cm', ascending=False, inplace=False), alpha=0.6)
plt.show()

In [None]:
plt.figure(figsize = (22, 7))
plt.xlabel('Weight_kg', fontsize=20)
plt.ylabel('Dribbling', fontsize=20)
plt.title('Weight vs Dribbling', fontsize = 25)
sns.barplot('weight_kg','dribbling', data=data.sort_values('weight_kg'), alpha=0.6)
plt.show()

# To show Different Work rate of the players participating in the FIFA 2019

In [None]:
plt.figure(figsize=(15, 7))
#plt.style.use('_classic_test')

sns.countplot('work_rate', data=data, palette='Set1')
plt.title('Different work rates of the Players Participating in the FIFA 2021', fontsize = 20)
plt.xlabel('Work rates associated with the players', fontsize = 16)
plt.ylabel('count of Players', fontsize = 16)
plt.show()

# Messi vs Ronaldo

In [None]:
data.columns

In [None]:
skills = ['pace',
 'shooting',
 'passing',
 'dribbling',
 'defending',
 'physic',
 'player_traits',
 'attacking_crossing',
 'attacking_finishing',
 'attacking_heading_accuracy',
 'attacking_short_passing',
 'attacking_volleys',
 'skill_dribbling',
 'skill_curve',
 'skill_fk_accuracy',
 'skill_long_passing',
 'skill_ball_control',
 'movement_acceleration',
 'movement_sprint_speed',
 'movement_agility',
 'movement_reactions',
 'movement_balance',
 'power_shot_power',
 'power_jumping',
 'power_stamina',
 'power_strength',
 'power_long_shots']

In [None]:
messi = data.loc[data.short_name == 'L. Messi']
messi = pd.DataFrame(messi, columns=skills)
ronaldo = data.loc[data.short_name == 'Cristiano Ronaldo']
ronaldo = pd.DataFrame(ronaldo, columns=skills)

plt.figure(figsize=(15,8))
sns.pointplot(data=messi,color='blue',alpha=0.6)
sns.pointplot(data=ronaldo, color='red', alpha=0.6)
plt.xticks(rotation=90)
plt.xlabel('Skills', fontsize=20)
plt.ylabel('Skill value', fontsize=20)
plt.title('Messi vs Ronaldo', fontsize = 25)
plt.grid()

# Top 10 Players based on Overall skills

In [None]:
display(
HTML(data.sort_values('overall', ascending=False)[['short_name', 'overall']][:10].to_html(index=False)))

# Top 5 Nations with overall best player

In [None]:
top_Nations = data.groupby(['nationality'])['overall'].max().sort_values(ascending=False).head()
top_Nations

# Top 5 clubs with overall best player

In [None]:
top_clubs = data.groupby(['club_name'])['overall'].max().sort_values(ascending = False)
top_clubs.head(5)

# Age distribution of players in the clubs

In [None]:
top_club_names = ('FC Barcelona', 'Juventus', 'Paris Saint-Germain', 'Chelsea', 'Manchester City')
clubs = data.loc[data['club_name'].isin(top_club_names) & data['age']]
fig, ax = plt.subplots()
fig.set_size_inches(20, 10)
ax = sns.boxenplot('club_name', 'age', data=clubs)
ax.set_title(label='Age distribution in the top 5 clubs', fontsize=25)
plt.xlabel('Clubs', fontsize=20)
plt.ylabel('Age', fontsize=20)
plt.grid()

# Age distribution of players in countries

In [None]:
countries_names = ('France', 'Brazil', 'Germany', 'Belgium', 'Spain', 'Netherlands', 'Argentina', 'Portugal', 'Chile', 'Colombia')
countries = data.loc[data['nationality'].isin(countries_names) & data['age']]
fig, ax = plt.subplots()
fig.set_size_inches(20, 10)
ax = sns.boxenplot(x="nationality", y="age", data=countries)
ax.set_title(label='Age distribution in countries', fontsize=25)
plt.xlabel('countries', fontsize=20)
plt.ylabel('age', fontsize=20)
plt.grid()

# finding the best players for each performance criteria

In [None]:
pr_cols=['pace',
 'shooting',
 'passing',
 'dribbling',
 'defending',
 'physic',
 'attacking_crossing',
 'attacking_finishing',
 'attacking_heading_accuracy',
 'attacking_short_passing',
 'attacking_volleys',
 'skill_dribbling',
 'skill_curve',
 'skill_fk_accuracy',
 'skill_long_passing',
 'skill_ball_control',
        'movement_acceleration',
 'movement_sprint_speed',
 'movement_agility',
 'movement_reactions',
 'movement_balance',
 'power_shot_power',
 'power_jumping',
 'power_stamina',
 'power_strength',
 'power_long_shots']

In [None]:
i=0
while i < len(pr_cols):
    print('Best {0} : {1}'.format(pr_cols[i], data.loc[data[pr_cols[i]].idxmax()][2]))
    i=i+1

# Every Nations' Player and their overall scores

In [None]:
some_countries = ('England', 'Germany', 'Spain', 'Argentina', 'France', 'Brazil', 'Italy', 'Columbia') # defining a tuple consisting of country names
data_countries = data.loc[data['nationality'].isin(some_countries) & data['overall']] # extracting the overall data of the countries selected in the line above
data_countries.head()

In [None]:
plt.rcParams['figure.figsize'] = (15, 7)
ax = sns.barplot(x = data_countries['nationality'], y = data_countries['overall'], palette = 'spring') # creating a bargraph
ax.set_xlabel(xlabel = 'Countries', fontsize = 9)
ax.set_ylabel(ylabel = 'Overall Scores', fontsize = 9)
ax.set_title(label = 'Distribution of overall scores of players from different countries', fontsize = 20)
plt.show()

# Every Clubs' Player and their overall scores

In [None]:
some_clubs = ('CD Leganés', 'Southampton', 'RC Celta', 'Empoli', 'Fortuna Düsseldorf', 'Manchestar City',
             'Tottenham Hotspur', 'FC Barcelona', 'Valencia CF', 'Chelsea', 'Real Madrid') # creating a tuple of club names

data_clubs = data.loc[data['club_name'].isin(some_clubs) & data['overall']] # extracting the overall data of the clubs selected in the line above

data_clubs.head()

In [None]:
plt.rcParams['figure.figsize'] = (15, 8)
ax = sns.barplot(x = data_clubs['club_name'], y = data_clubs['overall'], palette = 'inferno') # creating a boxplot
ax.set_xlabel(xlabel = 'Some Popular Clubs', fontsize = 9)
ax.set_ylabel(ylabel = 'Overall Score', fontsize = 9)
ax.set_title(label = 'Distribution of Overall Score in Different popular Clubs', fontsize = 20)
plt.xticks(rotation = 90)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (15, 8)
ax = sns.boxplot(x = data_clubs['club_name'], y = data_clubs['overall'], palette = 'inferno') # creating a boxplot
ax.set_xlabel(xlabel = 'Some Popular Clubs', fontsize = 9)
ax.set_ylabel(ylabel = 'Overall Score', fontsize = 9)
ax.set_title(label = 'Distribution of Overall Score in Different popular Clubs', fontsize = 20)
plt.xticks(rotation = 90)
plt.show()

In [None]:
import plotly.express as px

# Nationwise Player counts and Average Potential

In [None]:
cnt_best_avg=data.groupby('nationality').apply(lambda x:np.average(x['overall'])).reset_index(name='Overall Ratings')

cnt_best_cnt=data.groupby('nationality').apply(lambda x:x['overall'].count()).reset_index(name='Player Counts')
snt_best_avg_cnt=pd.merge(cnt_best_avg,cnt_best_cnt,how='inner',left_on='nationality',right_on='nationality')
sel_best_avg_cnt=snt_best_avg_cnt[snt_best_avg_cnt['Player Counts']>=200]
sel_best_avg_cnt.sort_values(by=['Overall Ratings','Player Counts'],ascending=[False,False])
px.scatter(sel_best_avg_cnt,x='Overall Ratings',y='Player Counts',color='Player Counts',size='Overall Ratings',hover_data=['nationality'],title='Nationwise Player counts and Average Potential')

In [None]:
top_play=data[['short_name','overall',"age",'club_name']]
top_play.sort_values(by='overall',ascending=False,inplace=True)
top_30_play=top_play[:100]
fig=px.scatter(top_30_play,x='age',y='overall',color='age',size='overall',hover_data=['short_name','club_name'],title='Top Football Players in the FIFA 21 game')
fig.show()

In [None]:
cnt_best_avg=data.groupby('club_name').apply(lambda x:np.average(x['overall'])).reset_index(name='Overall Ratings')
cnt_best_cnt=data.groupby('club_name').apply(lambda x:x['overall'].count()).reset_index(name='Player Counts')
snt_best_avg_cnt=pd.merge(cnt_best_avg,cnt_best_cnt,how='inner',left_on='club_name',right_on='club_name')
sel_best_avg_cnt=snt_best_avg_cnt[snt_best_avg_cnt['Player Counts']>=25]
sel_best_avg_cnt.sort_values(by=['Overall Ratings','Player Counts'],ascending=[False,False])
px.scatter(sel_best_avg_cnt,x='Overall Ratings',y='Player Counts',color='Player Counts',size='Overall Ratings',hover_data=['club_name'],title='Clubwise player counts and Average Potential')