# Description
- The datasets provided include the players data for the Career Mode from FIFA 15 to FIFA 21 ("players_21.csv"). The data allows multiple comparison of the same players across the last 7 version of the videogame.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data = pd.read_csv('/kaggle/input/fifa-21-complete-player-dataset/players_21.csv')

# Detach ability Column
- Separation of ability and non-ability columns to evaluate them only by their ability.
 

In [None]:
ability_cols = list(data.columns.values)

In [None]:
data.columns.values

In [None]:
non_ability_cols = ['sofifa_id','player_url','short_name','long_name','age','dob','height_cm','weight_kg','nationality','club_name',
                   'league_name','league_rank','value_eur','wage_eur','player_positions','preferred_foot','body_type','real_face',
                   'release_clause_eur','player_tags','team_position','team_jersey_number','loaned_from','joined','contract_valid_until',
                   'nation_position','nation_jersey_number','player_traits']
for col in non_ability_cols:
    ability_cols.remove(col)

In [None]:
len(data.columns), len(ability_cols) , len(non_ability_cols)

In [None]:
np.array(ability_cols)

# Best by Position


## Distribute Critical ability Column by Position


In [None]:
FW_ability = ['weak_foot','skill_moves','FW_work_rate','shooting','dribbling','physic','attacking_crossing', 'attacking_finishing',
       'attacking_heading_accuracy', 'attacking_short_passing','attacking_volleys','skill_dribbling', 'skill_curve',
       'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control','movement_acceleration', 'movement_sprint_speed',
       'movement_agility', 'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping', 'power_stamina',
       'power_strength', 'power_long_shots','ls', 'st','rs', 'lw', 'lf', 'cf', 'rf', 'rw']

MF_ability = ['skill_moves','MF_work_rate','passing','dribbling','physic','skill_dribbling', 'skill_curve',
       'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control',
       'movement_acceleration', 'movement_sprint_speed',
       'movement_agility', 'movement_reactions', 'movement_balance','mentality_aggression',
       'mentality_interceptions', 'mentality_positioning',
       'mentality_vision', 'mentality_penalties', 'mentality_composure','lam', 'cam', 'ram', 'lm','lcm', 'cm', 'rcm', 'rm']

DF_ability = ['DF_work_rate','defending','physic','mentality_aggression',
       'mentality_interceptions', 'mentality_positioning',
       'mentality_vision', 'mentality_penalties', 'mentality_composure',  'defending_standing_tackle',
       'defending_sliding_tackle','lwb', 'ldm', 'cdm', 'rdm', 'rwb', 'lb','lcb', 'cb', 'rcb', 'rb']

GK_ability = ['gk_diving','gk_handling','gk_kicking','gk_reflexes','gk_speed','gk_positioning','goalkeeping_diving',
       'goalkeeping_handling', 'goalkeeping_kicking',
       'goalkeeping_positioning', 'goalkeeping_reflexes']

## Converting string position ability to numbers

- Calculates and converts position ability in the form of strings such as '90+2' into numbers

- There is also a case where there is no number after + such as '90+', in which case it should be zeroed to calculate

In [None]:
def calculating_position(col):
    for idx, value in enumerate(col):
        if value.split('+')[1] == '':
            col[idx] = int(value.split('+')[0])
        else:
            col[idx] = int(value.split('+')[0]) + int(value.split('+')[1])
        
    return col

In [None]:
data.iloc[:,80:] = data.iloc[:,80:].apply(lambda x : calculating_position(x))

In [None]:
data.iloc[:,80:]

## All abilities to 0 ~ 100 score
- weak_foot, skill_moves columns are 0 ~ 5 score
- Therefore, multiply by 20 to convert to 100

In [None]:
data['weak_foot'] = data['weak_foot'] * 20
data['skill_moves'] = data['skill_moves'] * 20

## Calculation of activity by position

- The work_rate column is in the form of 'Attack Activity/Defense Activity', and each has a value of High/Medium/Low.
- Therefore, convert string work_rate to  attack and defensive activity score and calculating the average of the two values of midfielder activity. 

In [None]:
def calculating_FW_work_rate(s):
    work_rate = s.split('/')[0]
    if work_rate == 'High':
        return 100
    if work_rate == 'Medium':
        return 67
    else:
        return 34
    
def calculating_MF_work_rate(s):
    fw_work_rate = s.split('/')[0]
    df_work_rate = s.split('/')[1]
    if fw_work_rate == 'High':
        fw_work_rate = 100
    elif fw_work_rate == 'Medium':
        fw_work_rate = 67
    elif fw_work_rate == 'Low':
        fw_work_rate = 34
    
    if df_work_rate == 'High':
        df_work_rate = 100
    elif df_work_rate == 'Medium':
        df_work_rate = 67
    elif df_work_rate == 'Low':
        df_work_rate = 34
         
    return (fw_work_rate + df_work_rate) / 2 
    
def calculating_DF_work_rate(s):
    work_rate = s.split('/')[1]
    if work_rate == 'High':
        return 100
    if work_rate == 'Medium':
        return 67
    else:
        return 34

In [None]:
data['FW_work_rate'] = data['work_rate'].map(calculating_FW_work_rate)
data['MF_work_rate'] = data['work_rate'].map(calculating_MF_work_rate)
data['DF_work_rate'] = data['work_rate'].map(calculating_DF_work_rate)

## Ability score calculation by position

- Calculate the ability score for each position based on the average of the important ability for each position.
- And Print out the top 5 ability score for each position along with the top 5 of the overall score

In [None]:
data['FW_ability_score'] = data[FW_ability].apply(lambda row : row.mean(),axis=1)
data['MF_ability_score'] = data[MF_ability].apply(lambda row : row.mean(),axis=1)
data['DF_ability_score'] = data[DF_ability].apply(lambda row : row.mean(),axis=1)
data['GK_ability_score'] = data[GK_ability].apply(lambda row : row.mean(),axis=1)

In [None]:
data.sort_values(by='overall',ascending=False)[['short_name','overall','player_positions']].head(5)

In [None]:
data.sort_values(by='FW_ability_score',ascending=False)[['short_name','FW_ability_score','player_positions']].head(5)

In [None]:
data.sort_values(by='MF_ability_score',ascending=False)[['short_name','MF_ability_score','player_positions']].head(5)

In [None]:
data.sort_values(by='DF_ability_score',ascending=False)[['short_name','DF_ability_score','player_positions']].head(5)

In [None]:
data.sort_values(by='GK_ability_score',ascending=False)[['short_name','GK_ability_score','player_positions']].head(5)

## World Best 11 
- World Best 11 by Position

                   Cristiano Ronaldo		                   L. Messi	

         Neymar Jr		         K. De Bruyne	    Bruno Fernandes	       M. Salah

         Fabinho		         Fabinho		    Sergio Ramos	       N. Kanté	
			
                                          M. Neuer	

# Ranking by ability

## Overall ability Top 10

In [None]:
data.sort_values(by='overall',ascending=False).head(10)[['short_name','age','overall']]

## Potential ability Top 10

In [None]:
data.sort_values(by='potential',ascending=False).head(10)[['short_name','age','potential']]

## Speed Top 10

In [None]:
data['overall_speed'] = (data['pace'] + data['movement_acceleration'] + data['movement_sprint_speed']) / 3

In [None]:
data.sort_values(by='overall_speed',ascending=False).head(10)[['short_name','age','overall_speed']]

# # non_abilities to consider.

- Age
- Height
- BMI
- Nationality
- Wage_eur
- Weak_foot
- Real face
- Prospect

In [None]:
np.array(non_ability_cols)

# Age

In [None]:
sns.distplot(data['age'])

## 
Most of them are aged 20 to 30, and the number decreases rapidly after the age of 30.


In [None]:
sns.scatterplot(x=data['age'],y=data['overall'])

In [None]:
data.sort_values(by='overall',ascending=False)[['short_name','age']].head(10)

In [None]:
data[data['age']>50]

##  
- Of course, individual may lose their physical abilities as they get older, but as you can see from Messi, Ronaldo, and Lewandowski, who are still top ranked in the world at the age of 33,35,31, their ability vary widely regardless of age.

- You can see players who are active even at the age of 50, and I searched it to see if this is possible, and it is because of making and recording typical heroes of Japan.

# Height

In [None]:
sns.distplot(data['height_cm'])

In [None]:
data.sort_values(by='overall',ascending=False)[['short_name','height_cm']].head(20).mean()

In [None]:
data.sort_values(by='FW_ability_score',ascending=False)[['short_name','height_cm']].head(10).mean()

In [None]:
data.sort_values(by='MF_ability_score',ascending=False)[['short_name','height_cm']].head(10).mean()

In [None]:
data.sort_values(by='DF_ability_score',ascending=False)[['short_name','height_cm']].head(10).mean()

In [None]:
data.sort_values(by='GK_ability_score',ascending=False)[['short_name','height_cm']].head(10).mean()

##  
- By position, the average height of the defender and goalkeeper top 10 is taller than that of the striker and midfielder, and in particular, the taller the goalkeeper, the longer the limbs, and the longer the limbs are, which is an important factor for the goalkeeper position.

# BMI

In [None]:
data['BMI'] = data['weight_kg'] / ( (data['height_cm']/100) * (data['height_cm']/100) )

In [None]:
sns.distplot(data['BMI'])

In [None]:
data.sort_values(by='overall',ascending=False).head(100)['BMI'].mean()

In [None]:
data.sort_values(by='overall',ascending=False).tail(100)['BMI'].mean()

##  
- Calculate BMI by Height and Weight
- Most soccer players have values between 20 and 25, which shows a regular distribution.
- Given the small difference in BMI figures between the top 100 and the bottom 100 players based on their abilities, the BMI figure itself is not thought to have a significant impact on their ability.

# Nationality

In [None]:
data[data['nationality']=='Korea Republic'].sort_values(by='FW_ability_score',ascending=False).head(10)[['long_name','player_positions']]

In [None]:
data[data['nationality']=='Korea Republic'].sort_values(by='MF_ability_score',ascending=False).head(10)[['long_name','player_positions']]

In [None]:
data[data['nationality']=='Korea Republic'].sort_values(by='DF_ability_score',ascending=False).head(10)[['long_name','player_positions']]

In [None]:
data[data['nationality']=='Korea Republic'].sort_values(by='GK_ability_score',ascending=False).head(10)[['long_name','player_positions']]

## 
- Top 10 by Position of South Korean Players
- South Korea best 11 consisting of the best by position
 
               손흥민	                 황희찬
           
        이강인	       김보경		이재성	      권창훈
	
        최철순	       김영권		홍정호	      이용
	 
                            조현우

# International reputation 

In [None]:
data.sort_values(by='overall',ascending=False).head(5)[['short_name','international_reputation']]

In [None]:
sns.boxplot(x=data['international_reputation'],y=data['overall'])

## 
- International reputation can be seen naturally proportional to the overall ability of the player.

In [None]:
sns.boxplot(x=data['international_reputation'],y=data['wage_eur'])

In [None]:
data[ (data['international_reputation']==5) & (data['wage_eur']>500000) ]

## 
- Even among players with the best reputation, Messi is the player who gets paid more than them.
- However, he is considered the best player in the history of soccer, beyond the world's best, so it is understandable.

In [None]:
Q1 = np.percentile(data[data['international_reputation']==3]['wage_eur'], 25) 
Q3 = np.percentile(data[data['international_reputation']==3]['wage_eur'], 75) 
IQR = Q3 - Q1 
data [ (data['international_reputation']==3) & (data['wage_eur'] > Q3 + 1.5 * IQR) ][['short_name','wage_eur']]

## 
- Among players with a intermediate reputation, six players are paid more than other players, but considering their careers and abilities, this is also understandable.


# Weak_foot

In [None]:
sns.countplot(data['weak_foot'])

In [None]:
data.sort_values(by='overall',ascending=False)[['short_name','weak_foot']].head(20).mean()

In [None]:
data.sort_values(by='FW_ability_score',ascending=False)[['short_name','weak_foot']].head(20).mean()

In [None]:
data.sort_values(by='MF_ability_score',ascending=False)[['short_name','weak_foot']].head(20).mean()

In [None]:
data.sort_values(by='DF_ability_score',ascending=False)[['short_name','weak_foot']].head(20).mean()

In [None]:
data.sort_values(by='GK_ability_score',ascending=False)[['short_name','weak_foot']].head(20).mean()

## 
- In terms of top-class players by position, the striker has the highest weak_foot score.
- I think this is due to the fact that being able to shoot from various positions and angles is an important ability for the striker.

# real face

In [None]:
data['real_face'].value_counts()

In [None]:
sns.distplot(data[data['real_face']=='Yes']['overall'])
sns.distplot(data[data['real_face']=='No']['overall'])
plt.legend(['Real Face','No Real Face'])

In [None]:
sns.countplot(data['international_reputation'],hue=data['real_face'])
plt.legend(loc='upper right')

In [None]:
sns.countplot(data[ data['international_reputation']!=1 ]['international_reputation'],hue=data['real_face'])
plt.legend(loc='upper right')

## 
- Whether or not to implement the real face in the game is considered to be determined by the player's ability and reputation.

# Prospect(Rookie)

## 
- The value_eur is considered to be a Euro conversion of the value of the player, taking into account the potential of the player in addition to its current capabilities.
- The potential is thought to represent only the potential of the player.
- Therefore, we first proceed with scaling of two numbers with different ranges of values, and then select the top 10 world prospects(rookies) under the age of 25 with a propect_index calculated with a weight of 2:8 between value_eur and potentail.

In [None]:
sns.distplot(data['value_eur'])

In [None]:
sns.distplot(data['potential'])

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
df = pd.DataFrame( scaler.fit_transform(data[['value_eur','potential']]) , columns=['scaled_value','scaled_potential'])
df

In [None]:
data = pd.concat([data,df],axis=1)

In [None]:
sns.distplot(data['scaled_value'])
sns.distplot(data['scaled_potential'])

In [None]:
data['prospect_index'] =  data['scaled_value']*0.2 + data['scaled_potential']*0.8 

In [None]:
data[data['age'] <= 25].sort_values(by='prospect_index',ascending=False).head(10)[['short_name','age','prospect_index']]