In [1]:
import pandas as pd

In [2]:
def load_data(path):
    return pd.read_csv(path)

def filter_leagues(df):
    return df[df['league_level'] == 1]

def filter_columns(df):
    df = df.drop(columns=[
      'player_url', 'long_name', 'potential', 'league_level', 'club_loaned_from', 'club_contract_valid_until',
      'international_reputation', 'work_rate', 'body_type', 'real_face', 'release_clause_eur', 'player_tags',
      'ls', 'st', 'rs', 'lw', 'lf', 'cf', 'rf', 'rw', 'lam', 'cam', 'ram', 'lm', 'lcm', 'cm', 'rcm', 'rm', 'lwb',
      'ldm', 'cdm', 'rwb', 'lb', 'lcb', 'cb', 'rcb', 'rb', 'gk', 'player_face_url', 'club_flag_url', 'nation_flag_url',
      'nation_team_id', 'rdm', 'nationality_id', 'value_eur', 'wage_eur', 'dob', 'club_team_id', 'nation_position',
      'nation_logo_url', 'nation_jersey_number', 'club_joined', 'player_traits',
    ], axis=1)
    return df

def replace_nan_values(df):
    nan_columns = []
    for col in df.columns:
      if df[col].isna().sum() != 0:
        nan_columns.append(str(col))
    for col in nan_columns:
      df.loc[df[col].isna(), col] = df[col].mean()
    return df

def update_dtypes(df):
    f_columns = list(df.select_dtypes(include='float64'))
    for col in f_columns:
      df[col] = df[col].astype(int)
    return df

def filter_top_leagues(df):
    top_leagues = [
      'English Premier League',
      'Spain Primera Division',
      'French Ligue 1',
      'German 1. Bundesliga',
      'Italian Serie A',
      'Indian Super League'
    ]
    df = df[df['league_name'].isin(top_leagues)]
    return df

def reset_indices(df):
    df.reset_index(inplace=True)
    df.drop(columns=['index'], inplace=True)
    return df

def split_player_positions(df):
    pd.options.mode.chained_assignment = None
    for x in range(len(df['player_positions'])):
      pos_list = str(df['player_positions'][x]).split(', ')
      for item in pos_list:
        df[str(item)] = 0

    for x in range(len(df['player_positions'])):
      pos_list = str(df['player_positions'][x]).split(', ')
      for item in pos_list:
        df[str(item)][x] = 1
    return df

def remove_reserves(df):
    df = df[df['club_position']!='RES']
    return df
   

filename = [
  'players_22',
  'players_21',
  'players_20',
  'players_19',
  'players_18',
]
df_list = {}
for name in filename:
  path = '../data/{}.csv'.format(name)
  res = (
      load_data(path)
      .pipe(filter_leagues)
      .pipe(filter_columns)
      .pipe(replace_nan_values)
      .pipe(update_dtypes)
      .pipe(filter_top_leagues)
      .pipe(reset_indices)
      .pipe(split_player_positions)
      .pipe(remove_reserves)
  )
  df_list[name] = res

  return pd.read_csv(path)
  return pd.read_csv(path)


In [3]:
df_list.keys()

dict_keys(['players_22', 'players_21', 'players_20', 'players_19', 'players_18'])

In [4]:
df_l = []
m_22 = df_list['players_22']; df_l.append(m_22)
m_21 = df_list['players_21']; df_l.append(m_21)
m_20 = df_list['players_20']; df_l.append(m_20)
m_19 = df_list['players_19']; df_l.append(m_19)
m_18 = df_list['players_18']; df_l.append(m_18)

In [5]:
columns = ['sofifa_id', 'short_name', 'player_positions', 'overall', 'age',
       'height_cm', 'weight_kg', 'club_name', 'league_name', 'club_position',
       'club_jersey_number', 'nationality_name', 'preferred_foot', 'weak_foot',
       'skill_moves', 'pace', 'shooting', 'passing', 'dribbling', 'defending',
       'physic', 'attacking_crossing', 'attacking_finishing',
       'attacking_heading_accuracy', 'attacking_short_passing',
       'attacking_volleys', 'skill_dribbling', 'skill_curve',
       'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control',
       'movement_acceleration', 'movement_sprint_speed', 'movement_agility',
       'movement_reactions', 'movement_balance', 'power_shot_power',
       'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots',
       'mentality_aggression', 'mentality_interceptions',
       'mentality_positioning', 'mentality_vision', 'mentality_penalties',
       'mentality_composure', 'defending_marking_awareness',
       'defending_standing_tackle', 'defending_sliding_tackle',
       'goalkeeping_diving', 'goalkeeping_handling', 'goalkeeping_kicking',
       'goalkeeping_positioning', 'goalkeeping_reflexes', 'goalkeeping_speed',
       'club_logo_url', 'RW', 'ST', 'CF', 'LW', 'CAM', 'CM', 'GK', 'CDM', 'LM',
       'CB', 'RB', 'RM', 'LB', 'RWB', 'LWB']

In [6]:
# s_name = 'J. Grealish'
# s_name = 'H. Kane'
s_name = 'Cristiano Ronaldo'
attrs = columns
years = ['2022', '2021', '2020', '2019', '2018']
growth = pd.DataFrame({'Year':years})
for at in attrs:
    attr_values = []
    for df in df_l:
        sub_df = df[df['short_name']==s_name]
        if sub_df.shape[0] > 0:
          val = list(df[df['short_name']==s_name][at].values)[0]
        else:
          val = None
        attr_values.append(val)
    growth[at] = attr_values
growth = growth[growth['overall'].notna()]
growth = update_dtypes(growth)
growth.columns

Index(['Year', 'sofifa_id', 'short_name', 'player_positions', 'overall', 'age',
       'height_cm', 'weight_kg', 'club_name', 'league_name', 'club_position',
       'club_jersey_number', 'nationality_name', 'preferred_foot', 'weak_foot',
       'skill_moves', 'pace', 'shooting', 'passing', 'dribbling', 'defending',
       'physic', 'attacking_crossing', 'attacking_finishing',
       'attacking_heading_accuracy', 'attacking_short_passing',
       'attacking_volleys', 'skill_dribbling', 'skill_curve',
       'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control',
       'movement_acceleration', 'movement_sprint_speed', 'movement_agility',
       'movement_reactions', 'movement_balance', 'power_shot_power',
       'power_jumping', 'power_stamina', 'power_strength', 'power_long_shots',
       'mentality_aggression', 'mentality_interceptions',
       'mentality_positioning', 'mentality_vision', 'mentality_penalties',
       'mentality_composure', 'defending_marking_awareness',
    

In [7]:
p_skill = ['attacking_crossing','attacking_finishing','attacking_heading_accuracy','attacking_short_passing','attacking_volleys',
           'skill_dribbling','skill_curve','skill_fk_accuracy','skill_long_passing','skill_ball_control',
           'movement_acceleration','movement_sprint_speed','movement_agility','movement_reactions','movement_balance',
           'power_shot_power','power_jumping','power_stamina','power_strength','power_long_shots',
           'mentality_aggression','mentality_interceptions','mentality_positioning','mentality_vision','mentality_penalties','mentality_composure',
           'defending_marking_awareness','defending_standing_tackle','defending_sliding_tackle']

In [8]:
growth

Unnamed: 0,Year,sofifa_id,short_name,player_positions,overall,age,height_cm,weight_kg,club_name,league_name,...,CM,GK,CDM,LM,CB,RB,RM,LB,RWB,LWB
0,2022,20801,Cristiano Ronaldo,"ST, LW",91,36,187,83,Manchester United,English Premier League,...,0,0,0,0,0,0,0,0,0,0
1,2021,20801,Cristiano Ronaldo,"ST, LW",92,35,187,83,Juventus,Italian Serie A,...,0,0,0,0,0,0,0,0,0,0
2,2020,20801,Cristiano Ronaldo,"ST, LW",93,34,187,83,Juventus,Italian Serie A,...,0,0,0,0,0,0,0,0,0,0
3,2019,20801,Cristiano Ronaldo,"ST, LW",94,33,187,83,Juventus,Italian Serie A,...,0,0,0,0,0,0,0,0,0,0
4,2018,20801,Cristiano Ronaldo,"LW, ST",94,32,185,80,Real Madrid CF,Spain Primera Division,...,0,0,0,0,0,0,0,0,0,0


In [9]:
years = list(growth['Year'])
years = years[::-1]
print(years)

['2018', '2019', '2020', '2021', '2022']


In [10]:
selected_skills = p_skill[:5]
df_selected = growth[selected_skills]
min_range = min(list(df_selected.min(axis='columns'))) - 15
max_range = max(list(df_selected.max(axis='columns'))) + 15
if max_range>100: max_range=100

In [11]:
colors = ['#FE5D26', '#F2C078', '#FAEDCA', '#C1DBB3', '#7EBC89']

In [12]:
for i in range(len(selected_skills)):
    print(i, end=':')
    if i>=len(colors): color_p = i-len(colors)
    else: color_p = i
    print(color_p)


0:0
1:1
2:2
3:3
4:4


In [13]:
len(selected_skills) - len(colors)

0

In [14]:
import plotly.graph_objects as go
fig = go.Figure()

for i in range(len(selected_skills)):
# for skill in p_skill:
    skill = selected_skills[i]
    if i>=len(colors): color_p = i-len(colors)
    else: color_p = i
    fig.add_trace(go.Bar(
        x=years, y=list(growth[skill]), name=skill,
        text=list(growth[skill]), marker_color=colors[color_p],
    ))
fig.update_layout(template='plotly_dark', yaxis_range=[min_range, max_range])
fig.show()

In [15]:
growth_2 = growth[selected_skills].transpose()
year_col = {
  '0':'2022',
  '1':'2021',
  '2':'2020',
  '3':'2019',
  '4':'2018',
}
for i in range(len(years)):
  growth_2.rename(columns={i:year_col[str(i)]}, inplace=True)

growth_2

Unnamed: 0,2022,2021,2020,2019,2018
attacking_crossing,87,84,84,84,85
attacking_finishing,95,95,94,94,94
attacking_heading_accuracy,90,90,89,89,88
attacking_short_passing,80,82,83,81,83
attacking_volleys,86,86,87,87,88


In [16]:
list(growth_2.iloc[0])

[87, 84, 84, 84, 85]

In [17]:
growth_2.iloc[0][1]

84

In [18]:
years[:2]

['2018', '2019']

In [19]:
ye = years; se = selected_skills[:1]
for j in range(len(se)):
  for i in range(1, len(ye)):
    print(growth_2.iloc[j][i-1], end=', ')
    print(growth_2.iloc[j][i])

87, 84
84, 84
84, 84
84, 85


In [20]:
import plotly.graph_objects as go
fig = go.Figure()

for i in range(len(years)):
  fig.add_trace(go.Bar(
      y=list(growth_2[years[i]].values),
      x=list(growth_2.index),
      marker_color=colors[i],
      name=str(years[i]),
  ))

bar_count = len(years)
      
fig.update_layout(
    template='plotly_dark',
    yaxis_range=[min_range, max_range],
    bargap=1-(0.1*bar_count),
    showlegend=False,
)
fig.show()

In [21]:
growth_2

Unnamed: 0,2022,2021,2020,2019,2018
attacking_crossing,87,84,84,84,85
attacking_finishing,95,95,94,94,94
attacking_heading_accuracy,90,90,89,89,88
attacking_short_passing,80,82,83,81,83
attacking_volleys,86,86,87,87,88


In [22]:
list(growth_2[years[0]].values)

[85, 94, 88, 83, 88]

In [23]:
from plotly import graph_objects as go

fig = go.Figure()

for i in range(len(years)):
  fig.add_trace(go.Funnel(
      x=list(growth_2[years[i]].values),
      y=list(growth_2.index),
      marker={
        'color':colors,
        'line': {'color':'black', 'width':2}
      },
      connector={
        'line': {'color':'wheat', 'width': 4}
      },
      hoverinfo='skip',
  ))

fig.update_layout(
    template='plotly_dark', 
    bargap=1-(0.1*bar_count),
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    showlegend=False
)

fig.show()

In [24]:
growth['passing'][0]

80

In [25]:
m_22[m_22['league_name']=='English Premier League'].head()

Unnamed: 0,sofifa_id,short_name,player_positions,overall,age,height_cm,weight_kg,club_name,league_name,club_position,...,CM,GK,CDM,LM,CB,RB,RM,LB,RWB,LWB
2,20801,Cristiano Ronaldo,"ST, LW",91,36,187,83,Manchester United,English Premier League,ST,...,0,0,0,0,0,0,0,0,0,0
4,192985,K. De Bruyne,"CM, CAM",91,30,181,70,Manchester City,English Premier League,RCM,...,1,0,0,0,0,0,0,0,0,0
9,202126,H. Kane,ST,90,27,188,89,Tottenham Hotspur,English Premier League,ST,...,0,0,0,0,0,0,0,0,0,0
10,215914,N. Kanté,"CDM, CM",90,30,168,70,Chelsea,English Premier League,RCM,...,1,0,1,0,0,0,0,0,0,0
13,200104,H. Son,"LM, CF, LW",89,28,183,78,Tottenham Hotspur,English Premier League,LW,...,0,0,0,1,0,0,0,0,0,0


In [26]:
league_df = m_22[(m_22['league_name']=='English Premier League') & (m_22['club_position']!='SUB')]
pos_league_df = league_df[(league_df['CAM']==1) | (league_df['CM']==1) | (league_df['CDM']==1)]
pos_league_df.head()

Unnamed: 0,sofifa_id,short_name,player_positions,overall,age,height_cm,weight_kg,club_name,league_name,club_position,...,CM,GK,CDM,LM,CB,RB,RM,LB,RWB,LWB
4,192985,K. De Bruyne,"CM, CAM",91,30,181,70,Manchester City,English Premier League,RCM,...,1,0,0,0,0,0,0,0,0,0
10,215914,N. Kanté,"CDM, CM",90,30,168,70,Chelsea,English Premier League,RCM,...,1,0,1,0,0,0,0,0,0,0
28,212198,Bruno Fernandes,CAM,88,26,179,69,Manchester United,English Premier League,CAM,...,0,0,0,0,0,0,0,0,0,0
37,195864,P. Pogba,"CM, LM",87,28,191,84,Manchester United,English Premier League,RDM,...,1,0,0,1,0,0,0,0,0,0
51,189509,Thiago,"CM, CDM",86,30,174,70,Liverpool,English Premier League,LCM,...,1,0,1,0,0,0,0,0,0,0


In [27]:
player = pos_league_df[pos_league_df['short_name']=='Thiago']
player

Unnamed: 0,sofifa_id,short_name,player_positions,overall,age,height_cm,weight_kg,club_name,league_name,club_position,...,CM,GK,CDM,LM,CB,RB,RM,LB,RWB,LWB
51,189509,Thiago,"CM, CDM",86,30,174,70,Liverpool,English Premier League,LCM,...,1,0,1,0,0,0,0,0,0,0


In [28]:
import plotly.express as px

fig = px.scatter(
    pos_league_df, x='passing', y='mentality_vision',
    hover_data=['short_name'], 
    size='mentality_composure', size_max=15,
    template='plotly_dark')
fig.show()