### Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Reading Dataset

In [None]:
df = pd.read_csv('../input/fifa19eda/fifa_eda.csv')
df.head()

### Basic statistics check

In [None]:
df.shape

In [None]:
df.describe(include = 'all')

### percentage for Null values and datatype of each column

In [None]:
df_dtype=df.dtypes
df_missing=df.isna().mean().round(4)*100

dtls_1=pd.DataFrame(df_missing, columns=['missing'])
dtls_2=pd.DataFrame(df_dtype, columns=['dtype'])

dtls=pd.concat([dtls_1, dtls_2], axis=1)
dtls.sort_values(by='missing', ascending=False)

In [None]:
#Nominal data so we choose mode
df['Club'].fillna(df['Club'].mode()[0], inplace=True)
df['Contract Valid Until'].fillna(df['Contract Valid Until'].mode()[0], inplace=True)

#discrete value imputaion so we go with mode
df['International Reputation'].fillna(df['International Reputation'].mode()[0], inplace=True)
df['Skill Moves'].fillna(df['Skill Moves'].mode()[0], inplace=True)

#continuous value imputation so we choose mean
df['Value'].fillna(df['Value'].mean(), inplace=True)

### let's check player's prefered foot

In [None]:
plt.rcParams['figure.figsize']=(8,8)
plt.pie(df['Preferred Foot'].value_counts(), labels=['Right','Left'], autopct='%1.1f%%')
plt.title('Preferred Foot of player')
plt.show()

### Top 5 players with good skill moves

In [None]:
df[df['Skill Moves']==5][['Name','Nationality','Overall']]\
.sort_values(by='Overall', ascending=False).head(5).style.background_gradient(cmap='viridis')

In [None]:
plt.rcParams['figure.figsize']=(10,10)
size=df['Nationality'].value_counts().sort_values(ascending=False).head(5)

def absolute_value(val):
    a  = np.round(val/100.*size.sum())
    return a

plt.pie(size, labels=size.index, autopct=absolute_value, explode = [0,0,0,0,0.05])

plt.title('Countries with more number of players')
plt.show()

### Let's check which players' contracts is gonna expire soon from each country

In [None]:
df['Contract Valid Until'].unique()

In [None]:
df['Contract Valid Until']=df['Contract Valid Until'].str.split('-')
df['Contract Valid Until']=df['Contract Valid Until'].apply(lambda x: int(x[0]))
df['Contract Valid Until'].unique()

In [None]:
from ipywidgets import interact

@interact
def country(year = df['Nationality'].values ):
    return df[df['Nationality'] == year][['Name','Contract Valid Until']].\
sort_values(by = 'Contract Valid Until', ascending = False).head(10)

### Let's check indian players contract validity

In [None]:
Ind_players=df[df['Nationality']=='India']
plt.rcParams['figure.figsize']=20,5
plt.subplot(121)
sns.distplot(Ind_players['Overall'], color='purple')

plt.subplot(122)
sns.distplot(Ind_players['Potential'], color='Red')

plt.suptitle('Indian players Overall and Potential score comparition')
plt.show()

In [None]:
plt.rcParams['figure.figsize']=(10,10)
size=df['Club'].value_counts().sort_values(ascending=False).head(5)

def absolute_value(val):
    a  = np.round(val/100.*size.sum())
    return a

plt.pie(size, labels=size.index, autopct=absolute_value, explode = [0,0,0,0.05,0.1])

plt.title('Clubs with more number of players')
plt.show()