In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

**Exploratory data analysis - FIFA 20**

Hello Kagglers,

This is my attempt in doing in exploratory data analysis on kaggle on the **FIFA - 20 compelete player dataset**

This dataset contains the data from FIFA-15 to FIFA-20. I would be focusing on FIFA-20 data.

**Table of contents**
1) Import libraries 

2) Data Exploration 

3) Cleaning data 

4) Data Visualization 

5) Data anaylsis 

In [None]:
df=pd.read_csv('/kaggle/input/fifa-20-complete-player-dataset/players_20.csv')
df.head()

**Data Exploration**

Now, let's begin data exploration to gain insights about our data.

In [None]:
df.shape

This dataset contains *18278* rows and *104* columns

In [None]:
print(df.columns)

In [None]:
df.info()

**Cleaning data**

In [None]:
#Removing uselesscolumns for this analysis
uselesscolumn = ['dob','sofifa_id','player_url','long_name','body_type','real_face','nation_position','loaned_from','nation_jersey_number']
df=df.drop(uselesscolumn,axis=1)
df.head()

**Handling Missing Data**

In [None]:
import missingno as msno # for finding missing values in the data
import matplotlib.pyplot as plt
import seaborn as sns
msno.matrix(df)


In [None]:
columns = ['ls','st','rs','lw','lf','cf','rf','rw','lam','cam','ram','lm','lcm','cm','rcm','rm','lwb','ldm', 'cdm','rdm','rwb','lb','lcb','cb','rcb','rb']
df[columns].head()

In [None]:
for col in columns:
    df[col] = df[col].str.split('+').str[0]
    
df[columns] = df[columns].fillna(0)

In [None]:
#changig the dtype to 'int'
df[columns] = df[columns].astype('int')
df.info()

In [None]:
df[columns]

In [None]:
#replacig 0 with the mea of the column
for col in columns:
    df[col] = df[col].replace(0,(df[col].mean()))

In [None]:
attributes = ['dribbling','defending','physic','passing','shooting','pace']
df[attributes]

In [None]:
df[attributes].isna().sum()

In [None]:
for att in attributes:
    df[att] = df[att].fillna(df[att].mean())

**Data Visualization**

How many players preferred left foot?

In [None]:
df['preferred_foot'].value_counts()

In [None]:
sns.countplot(x=df['preferred_foot'],linewidth=3, palette = 'Set3')
plt.title('Preferred foot of the Players', fontsize = 20)
plt.show()

In [None]:
figsize = ()
label = df['preferred_foot'].value_counts().index
name = df['preferred_foot'].value_counts()
explode = [0,0.3]
plt.pie(name,labels = label,shadow = True,autopct='%1.1f%%',explode =explode, radius = 2)
plt.title('Perecentage of the preferred foot')
plt.legend()
plt.show()

Which country has maximum number of players?

In [None]:
top = df['nationality'].value_counts().head(10)
top

In [None]:
fig,ax = plt.subplots(figsize = (10,8))
ar = top.values
ra = top.index
ax.bar(ra,ar, color = 'green')
ax.set_title(('Top 10 Countries with most number of players'))
plt.show()

In [None]:
plt.figure(1 , figsize = (12,6))
sns.barplot(x = 'nationality', y = 'potential', data = df.head(10), palette='PuBuGn_d')
plt.title('Comparison of Potential of Top 10 FIFA countries')
plt.show()

Potential of Top 10 FIFA players

In [None]:
plt.figure(1 , figsize = (12,6))
sns.barplot(x = 'short_name', y = 'potential', data = df.head(10),palette='PuBuGn_d')
plt.title('Comparison of Potential of Top 10 FIFA players')
plt.xticks(rotation = 50)
plt.show()

**Data Anaylsis**

Average player height in FIFA


In [None]:
tall = df['height_cm'].mean()
print(str(tall) + " is the average height in fifa")

Average player weight in FIFA

In [None]:
heavy = df['weight_kg'].mean()
print(str(heavy) + " is the average weight in fifa")

**Oldest players**

In [None]:
df.sort_values(by = 'age' , ascending = False)[['short_name','club','nationality','overall', 'age' ]].head()

**Youngest players**

In [None]:
df.sort_values(by = 'age' , ascending = True)[['short_name','club','nationality','overall', 'age' ]].head()

**Best ball controlers**

In [None]:
df.sort_values(by = 'skill_ball_control' , ascending = False)[['short_name','club','nationality','overall', 'age','skill_ball_control']].head()

**Best defenders**


In [None]:
df.sort_values(by = 'defending' , ascending = False)[['short_name','club','nationality','overall', 'age','defending']].head()

**Best shooters**

In [None]:
df.sort_values(by = 'shooting' , ascending = False)[['short_name','club','nationality','overall', 'age','shooting']].head()

**Best dribllers**

In [None]:
df.sort_values(by = 'dribbling' , ascending = False)[['short_name','club','nationality','overall', 'age','dribbling']].head()


**Quikest players**

In [None]:
df.sort_values(by = 'movement_acceleration' , ascending = False)[['short_name','club','nationality','overall', 'age','movement_acceleration']].head()


**Highest Earners**

In [None]:
df.sort_values(by = 'wage_eur' , ascending = False)[['short_name','club','nationality','overall', 'age','wage_eur']].head()