In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns

import plotly.graph_objects as go

%matplotlib inline

# Show all rows and columns without hiding them - jupyter creates a scrollbar
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Data Fetching and Understanding the data

In [None]:
# importing dataset

df = pd.read_csv("../input/fifa-20-complete-player-dataset/players_20.csv")
df.head()

In [None]:
# To check all the features of the dataset
cols = df.columns
cols

In [None]:
# Taking cols required for data analysis
required_cols = ['short_name','age','dob','height_cm','weight_kg','nationality','club','overall','potential','value_eur','wage_eur','player_positions','preferred_foot','shooting','passing','dribbling','defending','physic','mentality_composure']
fifa20_df = df[required_cols]

In [None]:
fifa20_df.shape

In [None]:
# checking for min and max in features - useful for cleaning purposes

fifa20_df.describe()

In [None]:
# datatypes to get an understanding of what values might be and if we need to change the format

print(fifa20_df.dtypes)

## Cleaning the data

In [None]:
# checking for null or na values

print(fifa20_df.isnull().sum())

In [None]:
# Removing null values
fifa20_df = fifa20_df.dropna()
print(fifa20_df.isnull().sum())

## Calculating BMI and Mental Composure of Players

In [None]:
# Function for calculating BMI

def BMI_calc(weight_kg,height_cm):
    bmi = weight_kg/((height_cm/100)**2)
    if bmi < 18.5:
        return "Underweight"
    elif bmi >= 18.5 and bmi < 25:
        return "Healthy"
    elif bmi >= 25 and bmi < 30:
        return "Overweight"
    else:
        return "Obese"

In [None]:
# Calculating BMI for the given dataset

for index,row in fifa20_df.iterrows():
    fifa20_df.loc[index,"BMI"] = BMI_calc(row["weight_kg"],row["height_cm"])
    
fifa20_df.head(5)

In [None]:
# lets create some categorical data on mental composure of players

# if mental_composure score is 

# < 51 => below average
# >=51 and <67 => average
# >= 67 => above average

def mental_calc(m):
    if m < 51:
        return "Unstable and Turbulent"
    elif m >= 51 and m < 67:
        return "Stable"
    elif m >= 67:
        return "Calm and Composed"

In [None]:
for index,row in fifa20_df.iterrows():
    fifa20_df.loc[index,"mental_composure"] = mental_calc(row["mentality_composure"])
    
fifa20_df.head(5)

## Correlations and Heatmap

In [None]:
fifa20_corr = fifa20_df.corr()
plt.figure(figsize=(15,10))
tick_labels=["Age","Height(cm)","Weight(kg)","Overall","Potential","Value(eur)","Wage(eur)","Shooting","Passing","Dribbling","Defending","Physic","Mentality"]
sns.heatmap(fifa20_corr,
            cmap="BrBG",
            linewidth=1,
            linecolor="black",
            xticklabels=tick_labels,
            yticklabels=tick_labels,
            square=True
           )

### Observations

- Height is negatively correlated with dribbling skills. So players who are good at dribbling are usually shorter

> Shorter Strides: Shorter people are more comfortable making shorter strides which allows them to change directions quickly as well as allows than a superior ball control. This explains why they are better dribblers.

- Age is negatively correlated with potential
    - This is obvious as talented young players are said to have more potential

- Players who are good shooters are usually bad defenders as they are negatively correlated

- Shooting, Passing and Dribbling are all negatively correlated with height and weight or BMI (Body mass index)

> They found that 92 percent of the players fell in the body mass index range rated normal, between 20 and 24.9 -- in practical terms, normal BMI coincides with ideal weight. The normal weight range for a player 5 feet 9 inches tall is 136 to 169 pounds.

- wage_eur and value_eur are positively correlated which makes as the wage of player increases with increase in his reputation

### Having a Good Mental Composure and why is it important?

Composed athletes have the ability to remain calm, persistent, and in control under pressure situations or after mistakes. These athletes are “gamers” who thrive under competitive pressure and are able to raise their level of performance during competition.

>Mental skills are a necessity for peak performance in athletic events and are crucial for non-sports situations as well. Mental acuity is not found but learned and developed by any athlete who gives an effort.

In [None]:
plt.figure(figsize=(10,8))
sns.kdeplot(x="overall",
            y="potential",
            data=fifa20_df,
            hue="mental_composure",
            levels=5,
            palette="viridis",
           )

In [None]:
plt.figure(figsize=(12,10))
sns.histplot(x="overall",
             data=fifa20_df,
             hue="mental_composure",
             multiple="dodge",
             palette="plasma"
            )

### Observations

- Players with a good mental composure are likely to have a greater overall score and potential to succeed
- Most of the players have a stable mind and also players who have a greater overall score are calm and composed however they are only a few of them

In [None]:
g = sns.pairplot(fifa20_df,
             x_vars=["age"],
             y_vars=["dribbling","shooting","passing","defending"],
             kind="scatter",
             hue="mental_composure",
             palette="afmhot",
             height=6,
             aspect=1.3)

- It's obvious from these plots that players who are older and experienced and have a good mental composure have higher score in these skills

In [None]:
# ecdfplot
plt.figure(figsize=(12,10))
sns.ecdfplot(x="mentality_composure",hue="mental_composure",data=fifa20_df)

## Value vs Wage

In [None]:
value_eur_limit = 1.25*10**8
wage_eur_limit = 6*10**5
sns.jointplot(x="value_eur",
              y="wage_eur",
              data=fifa20_df,
              kind="reg",
              ylim=(0,wage_eur_limit),
              xlim=(0,value_eur_limit))

### Observations

- Linear relation between value and wage in euros
- Most of the players lie in the lower ranges hence more denser scatter plot near the origin
- Very few players have a high value and a corresponding high wage

## Bringing BMI into the picture

In [None]:
bmi_df = fifa20_df
bmi_df["count"] = 1
bmi_pie_data = bmi_df.groupby(["BMI"]).count()["count"]

plt.figure(figsize=(20,10))

plt.subplot(1,2,1)
bmi_pie_labels = bmi_pie_data.index
plt.pie(x=bmi_pie_data,labels=bmi_pie_labels,radius=0.7)


plt.subplot(1,2,2)
ax = sns.countplot(x="BMI",data=fifa20_df) 
for p in ax.patches:
    ax.annotate('{:.0f}'.format(p.get_height()), (p.get_x(), p.get_height()+100))

plt.show()

In [None]:
plt.figure(figsize=(20,10))

plt.subplot(1,2,1)
sns.boxplot(y="overall",
            data=fifa20_df,
            x="BMI",
           hue="mental_composure")

plt.subplot(1,2,2)
sns.violinplot(x="BMI",
               y="overall",
               hue="mental_composure",
               data=fifa20_df
              )

plt.show()

- Healthy, calm and composed players have greater overall score

In [None]:
plt.figure(figsize=(15,10))
sns.stripplot(x="BMI",
              y="overall",
              jitter=True,
              hue="preferred_foot",
              dodge=True,
              data=fifa20_df)

## Countries

In [None]:
plt.figure(figsize=(15,80))
sns.set_context("paper",font_scale=1.4)

sns.barplot(y="nationality",
            x="age",
            data=fifa20_df,
            hue="preferred_foot",
            palette="viridis")

### Observations

- Few countries have only left foot players like Indonesia
- Some others have only have right foot players like Hong Kong
- All of these players lie in the range of 15 to 35 in terms of age

## Clubs

In [None]:
fifa20_df["count"] = 1
hierarchical = fifa20_df.groupby(["nationality","club"]).count()["count"]

# Top 5 Clubs in France
print(hierarchical["France"].sort_values(ascending=False).head(5))

### Number of players from each club in Italy

In [None]:
# Number of players from various clubs in any country say Italy
plt.figure(figsize=(15,20))

country_name = "Italy"
clubs = hierarchical[country_name].reset_index().sort_values(by="count",ascending=False)
sns.barplot(y="club",x="count",data=clubs)
plt.show()