# FIFA Player Recommender

In [1]:
import numpy as np
import pandas as pd
from scipy.spatial.distance import pdist, squareform
from sklearn.decomposition import PCA

## Data cleaning: remove unnecessary columns and make everything numerical

In [2]:
def convert_position(position):
    if position == 'GK':
        return 'Goalkeeper'
    elif position in ["RB", "RWB"]:
        return "Right Back"
    elif position in ["LB", "LWB"]:
        return "Left Back"
    elif position == 'CB':
        return "Center Back"
    elif position in ['RM', "RW"]:
        return 'Right Winger'
    elif position in ['LM', 'LW']:
        return 'Left Winger'
    elif position == 'CDM':
        return 'Defensive Midfielder'
    elif position == 'CM':
        return 'Central Midfielder'
    elif position == 'CAM':
        return 'Attacking Midfielder'
    elif position == 'CF':
        return 'Supporting Striker'
    elif position == 'ST':
        return 'Center Forward'

### Feature engineering

In [3]:
df = pd.read_csv("data/players_20.csv")
df.style.hide_index()

# Splits positions on comma and extract preferred position
positions = df.player_positions.str.split(',', expand=True)
positions.fillna('NA', inplace=True)
positions = positions.apply(lambda x: [e.strip() for e in x])
positions = positions.iloc[:, 0]
positions = positions.apply(lambda position: convert_position(position))
df['Position'] = positions

# Split work rate on '/' and put in two columns
df[['atk_wr', 'def_wr']] = df.work_rate.str.split('/', expand=True)

names = df.short_name

with open("data/drop.csv") as f:
    to_drop = [line.rstrip() for line in f]

df.drop(to_drop, inplace=True, axis=1)

categoricals = ['preferred_foot', 'body_type', 'team_position', 'Position', 'atk_wr', 'def_wr']
df = pd.concat([df, pd.get_dummies(df[categoricals])], axis=1)
#df.drop(categoricals, axis=1, inplace=True)




for col, content in zip(df.columns, df.iloc[0]):
    print(f"{col}: \t\t\t {content}")

short_name: 			 L. Messi
age: 			 32
height_cm: 			 170
weight_kg: 			 72
club: 			 FC Barcelona
overall: 			 94
potential: 			 94
preferred_foot: 			 Left
weak_foot: 			 4
skill_moves: 			 4
body_type: 			 Messi
team_position: 			 RW
team_jersey_number: 			 10.0
attacking_crossing: 			 88
attacking_finishing: 			 95
attacking_heading_accuracy: 			 70
attacking_short_passing: 			 92
attacking_volleys: 			 88
skill_dribbling: 			 97
skill_curve: 			 93
skill_fk_accuracy: 			 94
skill_long_passing: 			 92
skill_ball_control: 			 96
movement_acceleration: 			 91
movement_sprint_speed: 			 84
movement_agility: 			 93
movement_reactions: 			 95
movement_balance: 			 95
power_shot_power: 			 86
power_jumping: 			 68
power_stamina: 			 75
power_strength: 			 68
power_long_shots: 			 94
mentality_aggression: 			 48
mentality_interceptions: 			 40
mentality_positioning: 			 94
mentality_vision: 			 94
mentality_penalties: 			 75
mentality_composure: 			 96
defending_marking: 			 33
defending_st

In [4]:
features = ['distribution', 'shot_stopping', 'dominance', 'tenacity', 'awareness', 'power', 'mobility', 'composure', 'passing', 'dribbling', 'shooting',
           'height_cm', 'preferred_foot_Left', 'preferred_foot_Right', 'overall']
gk = ['distribution', 'shot_stopping', 'dominance']
outfield = ['tenacity', 'awareness', 'power', 'mobility', 'composure', 'passing', 'dribbling', 'shooting']

df['distribution'] = df[['goalkeeping_kicking', 'mentality_vision', 'mentality_composure', 'attacking_short_passing', 'skill_long_passing']].sum(axis=1).where(df['Position_Goalkeeper'] == 1.0, 0)
df['shot_stopping'] = df[['goalkeeping_diving', 'goalkeeping_reflexes']].sum(axis=1).where(df['Position_Goalkeeper'] == 1.0, 0)
df['dominance'] = df[['mentality_composure', 'goalkeeping_handling', 'goalkeeping_positioning', 'height_cm']].sum(axis=1).where(df['Position_Goalkeeper'] == 1.0, 0)
df['tenacity'] = df[['movement_agility', 'movement_acceleration', 'mentality_aggression', 'defending_standing_tackle', 'defending_sliding_tackle', 'mentality_interceptions']].sum(axis=1)
df['awareness'] = df[['defending_marking', 'mentality_composure', 'movement_reactions', 'defending_standing_tackle', 'defending_sliding_tackle']].sum(axis=1)
df['power'] = df[['power_strength', 'height_cm', 'attacking_heading_accuracy', 'mentality_aggression']].sum(axis=1)
df['mobility'] = df[['power_stamina', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'atk_wr_High', 'atk_wr_Low', 'atk_wr_Medium', 'def_wr_High', 'def_wr_Low', 'def_wr_Medium']].sum(axis=1)
df['composure'] = df[['skill_dribbling', 'skill_ball_control', 'movement_agility', 'mentality_composure', 'movement_reactions', 'movement_balance']].sum(axis=1)
df['passing'] = df[['mentality_vision', 'skill_long_passing', 'attacking_short_passing', 'attacking_crossing', 'mentality_composure']].sum(axis=1)
df['dribbling'] = df[['skill_dribbling', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_balance']].sum(axis=1)
df['shooting'] = df[['attacking_finishing', 'attacking_volleys', 'power_long_shots', 'power_shot_power', 'skill_curve', 'mentality_positioning']].sum(axis=1)


In [5]:
attributes = ['attacking_crossing', 'attacking_finishing', 'attacking_heading_accuracy',
              'attacking_short_passing', 'attacking_volleys', 'skill_dribbling', 'skill_curve',
              'skill_fk_accuracy', 'skill_long_passing', 'skill_ball_control', 
              'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 
              'movement_reactions', 'movement_balance', 'power_shot_power', 'power_jumping',
              'power_stamina', 'power_strength', 'power_long_shots', 'mentality_aggression', 
              'mentality_interceptions', 'mentality_positioning', 'mentality_vision', 
              'mentality_penalties', 'mentality_composure', 'defending_marking', 
              'defending_standing_tackle', 'defending_sliding_tackle', 'goalkeeping_diving',
              'goalkeeping_handling', 'goalkeeping_kicking', 'goalkeeping_positioning', 'goalkeeping_reflexes']

df_norm = df.copy()
#df_norm.drop(['short_name'], axis=1, inplace=True)
df_norm = (df_norm - df_norm.min()) / (df_norm.max() - df_norm.min())
#df_norm.fillna(0.5, inplace=True)


features = ['distribution', 'shot_stopping', 'dominance', 'tenacity', 'awareness', 'power', 'mobility', 'composure', 'passing', 'dribbling', 'shooting',
           'height', 'left_foot', 'right_foot', 'overall']
gk = ['distribution', 'shot_stopping', 'dominance']
outfield = ['tenacity', 'awareness', 'power', 'mobility', 'composure', 'passing', 'dribbling', 'shooting']
distribution = df_norm[['goalkeeping_kicking', 'mentality_vision', 'mentality_composure', 'attacking_short_passing', 'skill_long_passing']].sum(axis=1).where(df_norm['pos1_GK'] == 1.0, 0).to_list()
shot_stopping = df_norm[['goalkeeping_diving', 'goalkeeping_reflexes']].sum(axis=1).where(df_norm['pos1_GK'] == 1.0, 0).to_list()
dominance = df_norm[['mentality_composure', 'goalkeeping_handling', 'goalkeeping_positioning', 'height_cm']].sum(axis=1).where(df_norm['pos1_GK'] == 1.0, 0).to_list()
tenacity = df_norm[['movement_agility', 'movement_acceleration', 'mentality_aggression', 'defending_standing_tackle', 'defending_sliding_tackle', 'mentality_interceptions']].sum(axis=1).to_list()
awareness = df_norm[['defending_marking', 'mentality_composure', 'movement_reactions', 'defending_standing_tackle', 'defending_sliding_tackle']].sum(axis=1).to_list()
power = df_norm[['power_strength', 'height_cm', 'attacking_heading_accuracy', 'mentality_aggression']].sum(axis=1).to_list()
mobility = df_norm[['power_stamina', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'atk_wr_High', 'atk_wr_Low', 'atk_wr_Medium', 'def_wr_High', 'def_wr_Low', 'def_wr_Medium']].sum(axis=1).to_list()
composure = df_norm[['skill_dribbling', 'skill_ball_control', 'movement_agility', 'mentality_composure', 'movement_reactions', 'movement_balance']].sum(axis=1).to_list()
passing = df_norm[['mentality_vision', 'skill_long_passing', 'attacking_short_passing', 'attacking_crossing', 'mentality_composure']].sum(axis=1).to_list()
dribbling = df_norm[['skill_dribbling', 'movement_acceleration', 'movement_sprint_speed', 'movement_agility', 'movement_balance']].sum(axis=1).to_list()
shooting = df_norm[['attacking_finishing', 'attacking_volleys', 'power_long_shots', 'power_shot_power', 'skill_curve', 'mentality_positioning']].sum(axis=1).to_list()
feature_values = [distribution, shot_stopping, dominance, tenacity, awareness, power, mobility, composure, passing, dribbling, shooting,
                  df_norm.height_cm.to_list(), df_norm.preferred_foot_Left.to_list(), df_norm.preferred_foot_Right.to_list(), df_norm.overall.to_list()]

data = dict(zip(features, feature_values))
df_custom = pd.DataFrame(data)
df_std = (df_custom - df_custom.mean()) / df_custom.std()
df_std.height /= 4
df_std.left_foot /= 4
df_std.right_foot /= 4
#df_std.overall /= 10
df_std.loc[df_norm['Position_Goalkeeper'] == 1.0, outfield] /= 10

TypeError: unsupported operand type(s) for -: 'str' and 'float'

In [5]:
features = ['distribution', 'shot_stopping', 'dominance', 'tenacity', 'awareness', 'power', 'mobility', 'composure', 'passing', 'dribbling', 'shooting',
           'height_cm', 'preferred_foot_Left', 'preferred_foot_Right', 'overall']
class PlayerRecommender():
    def train(self, df, metric='cosine'):
        self.df = df
        self.df_norm = self.df[features].copy()
        self.df_norm = (self.df_norm - self.df_norm.min()) / (self.df_norm.max() - self.df_norm.min())
        self.matrix = squareform(pdist(self.df_norm.values, metric=metric))
        return self
    
    def predict(self, p_name, n_results=20, lwr=0, upr=99):
        # Locate row in similarity matrix and display most similar entries in that row
        idx = self.df.short_name[df.short_name.str.contains(p_name, case=False)].index[0]
        most_similar_idxs = np.argsort(self.matrix[idx, :]) 
        return self.df.loc[most_similar_idxs, ['short_name', 'age', 'preferred_foot', 'Position', 'club', 'overall']][1:n_results]

In [6]:
df

Unnamed: 0,short_name,age,height_cm,weight_kg,club,overall,potential,preferred_foot,weak_foot,skill_moves,...,shot_stopping,dominance,tenacity,awareness,power,mobility,composure,passing,dribbling,shooting
0,L. Messi,32,170,72,FC Barcelona,94,94,Left,4,4,...,0,0,335,287,356,345,572,462,460,550
1,Cristiano Ronaldo,34,187,83,Juventus,93,93,Right,4,5,...,0,0,324,275,417,354,530,421,427,545
2,Neymar Jr,27,175,68,Paris Saint-Germain,92,92,Right,5,5,...,0,0,332,268,337,362,557,439,459,513
3,J. Oblak,26,188,87,Atlético Madrid,91,93,Right,3,1,...,176,438,193,213,315,213,314,229,231,119
4,E. Hazard,28,175,74,Real Madrid,91,91,Right,4,4,...,0,0,333,264,353,363,559,433,466,499
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18273,Shao Shuai,22,186,79,Beijing Renhe FC,48,56,Right,3,2,...,0,0,297,228,319,221,258,150,256,147
18274,Xiao Mingjie,22,177,66,Shanghai SIPG FC,48,56,Right,2,2,...,0,0,299,228,305,216,258,166,267,148
18275,Zhang Wei,19,186,75,Hebei China Fortune FC,48,56,Right,2,2,...,0,0,300,235,336,213,289,217,258,209
18276,Wang Haijian,18,185,74,Shanghai Greenland Shenhua FC,48,54,Right,2,2,...,0,0,314,231,344,217,299,229,274,204


In [14]:
#a = np.load('./data/matrix.npy')

In [9]:
np.save('./data/matrix.npy', rec.matrix)

In [20]:
#df_norm = df.copy()
#df_norm = (df_norm - df_norm.min()) / (df_norm.max() - df_norm.min())

In [6]:
rec = PlayerRecommender()
rec.train(df)

<__main__.PlayerRecommender at 0x7fa1bb16b760>

In [7]:
rec.predict('kant', 30) # KEEP PLAYING AROUND WITH FOOT, HEIGHT ETC.

Unnamed: 0,short_name,age,preferred_foot,Position,club,overall
94,Carvajal,27,Right,Right Back,Real Madrid,85
244,L. Torreira,23,Right,Defensive Midfielder,Arsenal,82
107,Allan,28,Right,Central Midfielder,Napoli,85
226,I. Gueye,29,Right,Defensive Midfielder,Paris Saint-Germain,83
58,M. Verratti,26,Right,Central Midfielder,Paris Saint-Germain,86
618,Jonny,25,Right,Left Back,Wolverhampton Wanderers,79
326,Rosberto Dourado,31,Right,Defensive Midfielder,Atlético Mineiro,82
728,Roque Mesa,30,Right,Defensive Midfielder,CD Leganés,79
265,Ricardo Pereira,25,Right,Right Back,Leicester City,82
637,R. Aguilar,26,Right,Right Back,AS Monaco,79


In [10]:
df.to_pickle('./data/df.pkl')
#df_std.to_pickle('./data/df_std.pkl')

In [11]:
df.head()

Unnamed: 0,short_name,age,height_cm,weight_kg,club,overall,potential,preferred_foot,weak_foot,skill_moves,...,shot_stopping,dominance,tenacity,awareness,power,mobility,composure,passing,dribbling,shooting
0,L. Messi,32,170,72,FC Barcelona,94,94,Left,4,4,...,0,0,335,287,356,345,572,462,460,550
1,Cristiano Ronaldo,34,187,83,Juventus,93,93,Right,4,5,...,0,0,324,275,417,354,530,421,427,545
2,Neymar Jr,27,175,68,Paris Saint-Germain,92,92,Right,5,5,...,0,0,332,268,337,362,557,439,459,513
3,J. Oblak,26,188,87,Atlético Madrid,91,93,Right,3,1,...,176,438,193,213,315,213,314,229,231,119
4,E. Hazard,28,175,74,Real Madrid,91,91,Right,4,4,...,0,0,333,264,353,363,559,433,466,499


In [2]:
df = pd.read_pickle('./data/df.pkl')

In [3]:
df

Unnamed: 0,short_name,age,height_cm,weight_kg,club,overall,potential,preferred_foot,weak_foot,skill_moves,...,shot_stopping,dominance,tenacity,awareness,power,mobility,composure,passing,dribbling,shooting
0,L. Messi,32,170,72,FC Barcelona,94,94,Left,4,4,...,0,0,335,287,356,345,572,462,460,550
1,Cristiano Ronaldo,34,187,83,Juventus,93,93,Right,4,5,...,0,0,324,275,417,354,530,421,427,545
2,Neymar Jr,27,175,68,Paris Saint-Germain,92,92,Right,5,5,...,0,0,332,268,337,362,557,439,459,513
3,J. Oblak,26,188,87,Atlético Madrid,91,93,Right,3,1,...,176,438,193,213,315,213,314,229,231,119
4,E. Hazard,28,175,74,Real Madrid,91,91,Right,4,4,...,0,0,333,264,353,363,559,433,466,499
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18273,Shao Shuai,22,186,79,Beijing Renhe FC,48,56,Right,3,2,...,0,0,297,228,319,221,258,150,256,147
18274,Xiao Mingjie,22,177,66,Shanghai SIPG FC,48,56,Right,2,2,...,0,0,299,228,305,216,258,166,267,148
18275,Zhang Wei,19,186,75,Hebei China Fortune FC,48,56,Right,2,2,...,0,0,300,235,336,213,289,217,258,209
18276,Wang Haijian,18,185,74,Shanghai Greenland Shenhua FC,48,54,Right,2,2,...,0,0,314,231,344,217,299,229,274,204
