# **FIFA 21 DATA EXPLORATION, ANALYSIS AND VISUALISATION**

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 1. Importing Libraries & Data Exploration

In [None]:
from IPython.display import display, HTML 
import matplotlib
import matplotlib.pyplot as plt # Visualization
fig = matplotlib.pyplot.gcf()
fig.set_size_inches(5, 5)
import seaborn as sns # Visualization
sns.set_style(style='darkgrid')
import plotly.express as px
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [None]:
data =  pd.read_csv('../input/fifa-21-complete-player-dataset/players_21.csv')
data.head()

In [None]:
skills=[]
for i in data.columns:
    skills.append(i)
data.isnull().sum()

In [None]:
# selecting columns to impute the missing values by mean
to_impute_by_mean = data.loc[:, ['dribbling','defending','passing','pace','shooting','physic']]
# replacing the missing values with mean
for i in to_impute_by_mean.columns:
    data[i].fillna(data[i].mean(), inplace = True)

# 2. Analysis and Visualisation


In [None]:
plt.figure(figsize = (22, 8))
ax = sns.countplot(x = 'height_cm', data = data)
ax.set_title(label = 'Count of players on Basis of Height', fontsize = 20)
ax.set_xlabel(xlabel = 'Height in Foot per inch', fontsize = 16)
ax.set_ylabel(ylabel = 'Count', fontsize = 16)
plt.show()

In [None]:
plt.figure(figsize = (22, 7))
plt.xlabel('Height', fontsize=20)
plt.ylabel('Dribbling', fontsize=20)
plt.title('Height vs Dribbling', fontsize = 25)
sns.barplot(x='height_cm', y='dribbling', data=data.sort_values('height_cm', inplace=False), alpha=0.6)


In [None]:
plt.figure(figsize = (22, 7))
plt.xlabel('Weight_kg', fontsize=20)
plt.ylabel('Dribbling', fontsize=20)
plt.title('Weight vs Dribbling', fontsize = 25)
sns.barplot(x='weight_kg', y='dribbling', data=data.sort_values('weight_kg'),alpha=0.6)

In [None]:
plt.figure(figsize = (15, 7))
sns.countplot(x = 'work_rate', data = data, palette = 'hls')
plt.title('Different work rates of the Players Participating in the FIFA 2021', fontsize = 20)
plt.xlabel('Work rates associated with the players', fontsize = 16)
plt.ylabel('count of Players', fontsize = 16)
plt.show()

# Comparing Stats of Messi,Lewandowski & CR7

In [None]:
#data.columns
skills = ['pace',
 'shooting',
 'passing',
 'dribbling',
 'defending',
 'physic',
 'player_traits',
 'attacking_crossing',
 'attacking_finishing',
 'attacking_heading_accuracy',
 'attacking_short_passing',
 'attacking_volleys',
 'skill_dribbling',
 'skill_curve',
 'skill_fk_accuracy',
 'skill_long_passing',
 'skill_ball_control',
 'movement_acceleration',
 'movement_sprint_speed',
 'movement_agility',
 'movement_reactions',
 'movement_balance',
 'power_shot_power',
 'power_jumping',
 'power_stamina',
 'power_strength',
 'power_long_shots']

In [None]:
messi = data.loc[data['short_name'] == 'L. Messi']
messi = pd.DataFrame(messi, columns = skills)
ronaldo = data.loc[data['short_name'] == 'Cristiano Ronaldo']
ronaldo = pd.DataFrame(ronaldo, columns = skills)
lewandowski = data.loc[data['short_name'] == 'R. Lewandowski']
lewandowski = pd.DataFrame(lewandowski, columns = skills)


plt.figure(figsize=(15,8))
sns.pointplot(data=messi,color='blue',alpha=0.6)
sns.pointplot(data=ronaldo, color='red', alpha=0.6)
sns.pointplot(data=lewandowski, color='green', alpha=0.6)

plt.xticks(rotation=90)
plt.xlabel('Skills', fontsize=20)
plt.ylabel('Skill value', fontsize=20)
plt.title('Messi vs Ronaldo Vs Lewandowski', fontsize = 25)
plt.grid()

# Top 10 Players based on Overall skills


In [None]:
display(
    HTML(data.sort_values('overall', ascending=False)[['short_name', 'overall']][:10].to_html(index=False)
))

# Age distribution of players in the clubs


In [None]:
top_club_names = ('FC Barcelona','Real Madrid', 'Juventus', 'Paris Saint-Germain', 'Chelsea', 'Manchester City', 'Manchester United')
clubs = data.loc[data['club_name'].isin(top_club_names) & data['age']]
fig, ax = plt.subplots()
fig.set_size_inches(20, 10)
ax = sns.boxenplot(x="club_name", y="age", data=clubs)
ax.set_title(label='Age distribution in the top clubs', fontsize=25)
plt.xlabel('Clubs', fontsize=20)
plt.ylabel('Age', fontsize=20)
plt.grid()

# Age distribution of players in countries


In [None]:
countries_names = ('France', 'Brazil', 'Germany', 'Belgium', 'Spain', 'Netherlands', 'Argentina', 'Portugal', 'Chile', 'Colombia')
countries = data.loc[data['nationality'].isin(countries_names) & data['age']]
fig, ax = plt.subplots()
fig.set_size_inches(20, 10)
ax = sns.boxenplot(x="nationality", y="age", data=countries)
ax.set_title(label='Age distribution in countries', fontsize=25)
plt.xlabel('countries', fontsize=20)
plt.ylabel('age', fontsize=20)
plt.grid()

# Finding the best players for each performance criteria


In [None]:
# finding the best players for each performance criteria

pr_cols=['pace',
 'shooting',
 'passing',
 'dribbling',
 'defending',
 'physic',
 'attacking_crossing',
 'attacking_finishing',
 'attacking_heading_accuracy',
 'attacking_short_passing',
 'attacking_volleys',
 'skill_dribbling',
 'skill_curve',
 'skill_fk_accuracy',
 'skill_long_passing',
 'skill_ball_control',
        'movement_acceleration',
 'movement_sprint_speed',
 'movement_agility',
 'movement_reactions',
 'movement_balance',
 'power_shot_power',
 'power_jumping',
 'power_stamina',
 'power_strength',
 'power_long_shots']
i=0
while i < len(pr_cols):
    print('Best {0} : {1}'.format(pr_cols[i],data.loc[data[pr_cols[i]].idxmax()][2]))
    i += 1

# Overall scores By Top National Teams


In [None]:
some_countries = ('England', 'Germany', 'Spain', 'Argentina','Belgium', 'France', 'Brazil', 'Italy', 'Columbia') # defining a tuple consisting of country names
data_countries = data.loc[data['nationality'].isin(some_countries) & data['overall']] # extracting the overall data of the countries selected in the line above
data_countries.head()


In [None]:
plt.rcParams['figure.figsize'] = (15, 7)
ax = sns.barplot(x = data_countries['nationality'], y = data_countries['overall'], palette = 'spring') # creating a bargraph
ax.set_xlabel(xlabel = 'Countries', fontsize = 9)
ax.set_ylabel(ylabel = 'Overall Scores', fontsize = 9)
ax.set_title(label = 'Distribution of overall scores of players from different countries', fontsize = 20)
plt.show()

# Overall Scores Comparison by Clubs


In [None]:
some_clubs = ('Manchester United', 'Liverpool', 'FC Bayern München', 'Atlético Madrid', 'Juventus', 'Manchestar City',
             'Tottenham Hotspur', 'FC Barcelona', 'Paris Saint-Germain', 'Chelsea', 'Real Madrid') # creating a tuple of club names

data_clubs = data.loc[data['club_name'].isin(some_clubs) & data['overall']] # extracting the overall data of the clubs selected in the line above

data_clubs.head()

In [None]:
plt.rcParams['figure.figsize'] = (15, 8)
ax = sns.barplot(x = data_clubs['club_name'], y = data_clubs['overall'], palette = 'inferno') # creating a barplot
ax.set_xlabel(xlabel = 'Some Popular Clubs', fontsize = 9)
ax.set_ylabel(ylabel = 'Overall Score', fontsize = 9)
ax.set_title(label = 'Distribution of Overall Score in Different popular Clubs', fontsize = 20)
plt.xticks(rotation = 90)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (15, 8)
ax = sns.boxplot(x = data_clubs['club_name'], y = data_clubs['overall'], palette = 'inferno') # creating a boxplot
ax.set_xlabel(xlabel = 'Some Popular Clubs', fontsize = 9)
ax.set_ylabel(ylabel = 'Overall Score', fontsize = 9)
ax.set_title(label = 'Distribution of Overall Score in Different popular Clubs', fontsize = 20)
plt.xticks(rotation = 90)
plt.show()

# Average Potential and Player Counts per Nation

In [None]:
cnt_best_avg=data.groupby('nationality').apply(lambda x:np.average(x['overall'])).reset_index(name='Overall Ratings')

cnt_best_cnt=data.groupby('nationality').apply(lambda x:x['overall'].count()).reset_index(name='Player Counts')
snt_best_avg_cnt=pd.merge(cnt_best_avg,cnt_best_cnt,how='inner',left_on='nationality',right_on='nationality')
sel_best_avg_cnt=snt_best_avg_cnt[snt_best_avg_cnt['Player Counts']>=200]
sel_best_avg_cnt.sort_values(by=['Overall Ratings','Player Counts'],ascending=[False,False])
px.scatter(sel_best_avg_cnt,x='Overall Ratings',y='Player Counts',color='Player Counts',size='Overall Ratings',hover_data=['nationality'],title='Nationwise Player counts and Average Potential')

# Top Football Players in the FIFA 21 game

In [None]:
top_play=data[['short_name','overall',"age",'club_name']]
top_play.sort_values(by='overall',ascending=False,inplace=True)
top_30_play=top_play[:100]
fig=px.scatter(top_30_play,x='age',y='overall',color='age',size='overall',hover_data=['short_name','club_name'],title='Top Football Players in the FIFA 21 game')
fig.show()

# Which Team has the most number of OP Players?

In [None]:
cnt_best_avg=data.groupby('club_name').apply(lambda x:np.average(x['overall'])).reset_index(name='Overall Ratings')
cnt_best_cnt=data.groupby('club_name').apply(lambda x:x['overall'].count()).reset_index(name='Player Counts')
snt_best_avg_cnt=pd.merge(cnt_best_avg,cnt_best_cnt,how='inner',left_on='club_name',right_on='club_name')
sel_best_avg_cnt=snt_best_avg_cnt[snt_best_avg_cnt['Player Counts']>=25]
sel_best_avg_cnt.sort_values(by=['Overall Ratings','Player Counts'],ascending=[False,False])
px.scatter(sel_best_avg_cnt,x='Player Counts',y='Overall Ratings',color='Player Counts',size='Overall Ratings',hover_data=['club_name'],title='Clubwise player counts and Average Rating')