# Data Loading

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-deep')

import warnings
warnings.filterwarnings('ignore')

In [None]:
data = pd.read_csv('../input/fifa-20-complete-player-dataset/players_20.csv')

data.head()

# Data Cleaning

In [None]:
data.columns[data.isnull().any()]

In [None]:
data.drop('release_clause_eur',axis=1, inplace = True)
data.drop('loaned_from',axis=1, inplace = True)
data['player_tags'].fillna('no tag', inplace = True)
data['team_position'].fillna('none', inplace = True)
data['team_jersey_number'].fillna(1, inplace = True)
data['nation_position'].fillna('not join', inplace = True)
data['nation_jersey_number'].fillna(0, inplace = True)

In [None]:
data.fillna(0, inplace = True)

# Data Visualization

In [None]:
plt.rcParams['figure.figsize'] = (10, 5)
sns.countplot(data['preferred_foot'], palette = 'pink')
plt.title('Preferred Foot of the Players', fontsize = 20)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (10, 5)
sns.countplot(data['age'], palette = 'pink')
plt.title('Ages of the Players', fontsize = 20)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (20, 7)
plt.style.use('seaborn-dark-palette')

sns.boxenplot(data['overall'], data['age'], hue = data['preferred_foot'], palette = 'Greys')
plt.title('Relationships among Overall Scores and age wrt Preferred foot', fontsize = 20)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (30, 20)
sns.heatmap(data[['age', 'nationality', 'overall', 'potential', 'club', 'value_eur',
                    'wage_eur', 'preferred_foot', 'international_reputation', 'weak_foot',
                    'skill_moves', 'work_rate', 'body_type', 'team_position', 'height_cm', 'weight_kg',
                    'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 
                    ]].corr(), annot = True)

plt.title('Histogram of the Dataset', fontsize = 30)
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (30, 20)
sns.heatmap(data[['age', 'nationality', 'overall', 'potential', 'club', 'value_eur',
                    'wage_eur', 'preferred_foot', 'international_reputation', 'weak_foot',
                    'skill_moves', 'work_rate', 'body_type', 'team_position', 'height_cm', 'weight_kg',
                    'pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 
                    'gk_diving', 'gk_handling', 'gk_kicking', 'gk_reflexes', 'gk_speed', 
                    'gk_positioning', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 
                    'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 
                    'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed',
                    'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 
                    'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 'mentality_interceptions',
                    'mentality_positioning', 'mentality_vision', 'mentality_penalties', 'mentality_composure', 'defending_marking',
                    'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking',
                    'goalkeeping_positioning', 'goalkeeping_reflexes']].corr(), annot = True)

plt.title('Histogram of the Dataset', fontsize = 30)
plt.show()

In [None]:
data.iloc[data.groupby(data['team_position'])['overall'].idxmax()][['team_position', 'short_name', 'age', 
                                'club', 'nationality', 'wage_eur', 'overall']].style.background_gradient('Reds')

In [None]:
data.iloc[data.groupby(data['team_position'])['potential'].idxmax()][['team_position', 'short_name', 'age', 
                                'club', 'nationality', 'wage_eur', 'potential']].style.background_gradient('Blues')

In [None]:
data[data['preferred_foot'] == 'Left'][['short_name', 'age', 'club', 'nationality', 'overall']].head(10).style.background_gradient('magma')

In [None]:
data[data['preferred_foot'] == 'Right'][['short_name', 'age', 'club', 'nationality', 'overall']].head(10).style.background_gradient('copper')

In [None]:
sns.lmplot(x = 'shooting', y = 'dribbling', data = data, col = 'preferred_foot')
plt.show()

In [None]:
data['nationality'].value_counts().head(8)

In [None]:
some_countries = ('England', 'Germany', 'Spain', 'France', 'Argentina', 'Brazil', 'Italy', 'Columbia')
data_countries = data.loc[data['nationality'].isin(some_countries) & data['overall']]

plt.rcParams['figure.figsize'] = (15, 7)
ax = sns.violinplot(x = data_countries['nationality'], y = data_countries['overall'], palette = 'Reds')
ax.set_xlabel(xlabel = 'Countries', fontsize = 9)
ax.set_ylabel(ylabel = 'Overall Rating of Players', fontsize = 9)
ax.set_title(label = 'Distribution of Overall Rating of players from different countries', fontsize = 20)
plt.show()

In [None]:
some_clubs = ('CD Leganés', 'Southampton', 'RC Celta', 'Empoli', 'Fortuna Düsseldorf', 'Manchestar City',
             'Tottenham Hotspur', 'FC Barcelona', 'Valencia CF', 'Chelsea', 'Real Madrid')

data_clubs = data.loc[data['club'].isin(some_clubs) & data['overall']]

plt.rcParams['figure.figsize'] = (15, 8)
ax = sns.boxplot(x = data_clubs['club'], y = data_clubs['overall'], palette = 'spring')
ax.set_xlabel(xlabel = 'Some Popular Clubs', fontsize = 9)
ax.set_ylabel(ylabel = 'Overall Score', fontsize = 9)
ax.set_title(label = 'Distribution of Overall Score in Different popular Clubs', fontsize = 20)
plt.xticks(rotation = 90)
plt.show()

In [None]:
player_features = ('pace', 'shooting', 'passing', 'dribbling', 'defending', 'physic', 
                    'gk_diving', 'gk_handling', 'gk_kicking', 'gk_reflexes', 'gk_speed', 
                    'gk_positioning', 'attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy', 
                    'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve', 'skill_fk_accuracy', 
                    'skill_long_passing', 'skill_ball_control', 'movement_acceleration', 'movement_sprint_speed',
                    'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 
                    'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 'mentality_interceptions',
                    'mentality_positioning', 'mentality_vision', 'mentality_penalties', 'mentality_composure', 'defending_marking',
                    'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking',
                    'goalkeeping_positioning', 'goalkeeping_reflexes')

for i, val in data.groupby(data['team_position'])[player_features].mean().iterrows():
    print('Position {}: {}, {}, {}'.format(i, *tuple(val.nlargest(4).index)))

In [None]:
from math import pi

idx = 1
plt.figure(figsize=(15,45))
for position_name, features in data.groupby(data['team_position'])[player_features].mean().iterrows():
    top_features = dict(features.nlargest(5))
    
    categories=top_features.keys()
    N = len(categories)

    values = list(top_features.values())
    values += values[:1]

    angles = [n / float(N) * 2 * pi for n in range(N)]
    angles += angles[:1]

    ax = plt.subplot(10, 3, idx, polar=True)

    plt.xticks(angles[:-1], categories, color='grey', size=8)

    ax.set_rlabel_position(0)
    plt.yticks([25,50,75], ["25","50","75"], color="grey", size=7)
    plt.ylim(0,100)
    
    plt.subplots_adjust(hspace = 0.5)
    
    ax.plot(angles, values, linewidth=1, linestyle='solid')

    ax.fill(angles, values, 'b', alpha=0.1)
    
    plt.title(position_name, size=11, y=1.1)
    
    idx += 1 