In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from statistics import mean
from sklearn.metrics import classification_report, accuracy_score
from pandas.plotting import scatter_matrix
import plotly.express as px
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
root = '/kaggle/input/fifa-20-complete-player-dataset/'
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
df = pd.read_csv(
    os.path.join(root, 'players_20.csv'))
df.isnull().any()[df.isnull().any()>0]
df = df.fillna(df.mean)
df.describe()

In [None]:
df.columns.tolist()

***Analysis by Nation***

In [None]:
#Top 5 country with most per capital income
#Conisdering ony countries where players count is more than 100
nation = df.groupby('nationality')['wage_eur'].agg(['mean', 'count'])
nation = nation[nation['count'] > 100]
nation = nation.sort_values(by = 'mean', ascending = False)
nation[0:10]

In [None]:
#Top 20 country with most potential
#Conisdering ony countries where players count is more than 100
nation = df.groupby('nationality')['potential'].agg(['mean', 'count'])
nation = nation[nation['count'] > 100]
nation = nation.sort_values(by = 'mean', ascending = False)

fig = px.bar(nation[0:20], x= nation.index[0:20], y='mean')
fig.update_layout(title_text='Mean wages')
fig.update_xaxes(title_text="<b> Countries </b>")
fig.show()

In [None]:
#Top 5 country with most overall
#Conisdering ony countries where players count is more than 100
nation = df.groupby('nationality')['overall'].agg(['mean', 'count'])
nation = nation[nation['count'] > 100]
nation = nation.sort_values(by = 'mean', ascending = False)
nation[0:10]

***Position Analysis***

In [None]:
#Top 5 positions to earm money
#Conisdering ony positions where count is more than 100

position = df.groupby('player_positions')['wage_eur'].agg(['mean', 'count'])
position = position[position['count'] > 100]
position = position.sort_values(by = 'mean', ascending = False)

fig = px.bar(position[0:20], x= position.index[0:20], y='mean')
fig.update_layout(title_text='Best position to earn money')
fig.update_xaxes(title_text="<b> Positions </b>")
fig.show()

In [None]:
#Popular positions
#Couting products at various sectors
pos_dict = {'RW':0,'CF':0, 'ST':0, 'LW':0, 'RB':0, 'CB':0, 'CDM':0, 'CAM':0, 'LM':0,  'CB':0, 'CM':0, 'LB':0,  
           'RM':0, 'RWB':0, 'LWB':0}
for pos in pos_dict.keys():
    val = df['player_positions'].str.contains(pos).sum()
    pos_dict.update({pos:val})
fig = px.pie(values= pos_dict.values(), names= pos_dict.keys(), title='Position distribution')
fig.show()

***Players Ability Analysis***

In [None]:
#Are there any correlation between physique and potential to EUR?
newdf =df[['weight_kg', 'potential', 'overall',  'height_cm', 'age', 'wage_eur',]]
correlation = newdf.corr()
sns.heatmap(correlation, cmap="Reds", annot = True) 

In [None]:
#Who can be a good defender?
defender = df[['defending_marking', 'defending_standing_tackle', 'defending_sliding_tackle', 'mentality_aggression', 'mentality_interceptions'
                       ,'age', 'height_cm', 'weight_kg', 'body_type', 'body_type', 'wage_eur']]
corrdf = defender.corr()
sns.heatmap(corrdf , cmap="Reds", annot = True) 

In [None]:
#Who can attack well?
defender = df[['attacking_volleys','attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy','attacking_short_passing',  'mentality_aggression', 'mentality_interceptions'
                       ,'age', 'height_cm', 'weight_kg', 'body_type', 'body_type', 'wage_eur']]
corrdf = defender.corr()
sns.heatmap(corrdf , cmap="Reds", annot = True) 

In [None]:
#Can attack get players more money?
fig = px.scatter(df, x="attacking_crossing", y="wage_eur" );

fig.update_layout(title_text='Attacks vs Wage')
fig.update_xaxes(title_text="<b> Attacking ability </b>")
fig.update_yaxes(title_text="<b> wage </b>")
fig.show()

In [None]:
#Can height earn players more money?
fig = px.scatter(df, x="height_cm", y="wage_eur" );

fig.update_layout(title_text='height_cm vs Wage')
fig.update_xaxes(title_text="<b> Attacking ability </b>")
fig.update_yaxes(title_text="<b> height </b>")
fig.show()

In [None]:
#Highest earning players
#Which are players earn the the highest?
df = df.sort_values(by = ['wage_eur'], ascending = False)
df[['short_name', 'overall']][0:5]

In [None]:
#Highest overall players
df = df.sort_values(by = ['overall'], ascending = False)
df[['short_name', 'overall']][0:5]