In [331]:
import os
import pickle
import numpy as np
import pandas as pd
import ipywidgets as widgets
from IPython.display import display
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

In [332]:
CODE_FOLDER = os.getcwd()
DATA_FOLDER = os.getcwd().replace("code", "data")

"ATTACK":["%_successfulLinkupPlays", "attackingActions_P90", "foulsSuffered_P90", 
              "linkupPlays_P90", "offsides_P90", "receivedPass_P90", "successfulAttackingActions_P90",
              "successfulLinkupPlays_P90", "touchInBox_P90"],

In [352]:
MACRO_AREAS = {
    "ATTACK":["attackingActions_P90", "foulsSuffered_P90", "offsides_P90", 
              "receivedPass_P90", "successfulAttackingActions_P90", "touchInBox_P90"],
    "CARDS":["%_yellowCardsPerFoul", "directRedCards_P90", "redCards_P90", "yellowCards_P90"],
    "CROSS":["%_successfulCrosses", "crosses_P90", "successfulCrosses_P90"],
    "DEFENSE":["%_dribblesAgainstWon", "%_successfulSlidingTackles", "ballLosses_P90", 
               "ballRecoveries_P90", "clearances_P90", "counterpressingRecoveries_P90",
               "dangerousOpponentHalfRecoveries_P90", "dangerousOwnHalfLosses_P90",
               "defensiveActions_P90", "dribblesAgainst_P90", "dribblesAgainstWon_P90",
               "interceptions_P90", "losses_P90", "missedBalls_P90", "opponentHalfRecoveries_P90",
               "ownHalfLosses_P90", "shotsBlocked_P90", "slidingTackles_P90", 
               "successfulDefensiveAction_P90", "successfulSlidingTackles_P90"],
    "DRIBBLING":["%_newSuccessfulDribbles", "%_successfulDribbles", "dribbles_P90",
                 "newSuccessfulDribbles_P90", "successfulDribbles_P90"],
    "DUELS":["%_aerialDuelsWon", "%_defensiveDuelsWon", "%_duelsWon",
             "%_fieldAerialDuelsWon", "%_newDefensiveDuelsWon", "%_newDuelsWon",
             "%_newOffensiveDuelsWon", "%_offensiveDuelsWon", "aerialDuels_P90",
             "defensiveDuels_P90", "defensiveDuelsWon_P90", "duels_P90",
             "duelsWon_P90", "fieldAerialDuels_P90", "fieldAerialDuelsWon_P90",
             "fouls_P90", "looseBallDuels_P90", "looseBallDuelsWon_P90",
             "newDefensiveDuelsWon_P90", "newDuelsWon_P90", "newOffensiveDuelsWon_P90",
             "offensiveDuels_P90", "offensiveDuelsWon_P90"],
    "GOALKEEPER":["%_gkAerialDuelsWon", "%_gkSaves", "%_gkSuccessfulExits", 
                  "%_successfulGoalKicks", "gkAerialDuels_P90", "gkAerialDuelsWon_P90",
                  "gkConcededGoals_P90", "gkExits_P90", "gkSaves_P90", "gkShotsAgainst_P90",
                  "gkSuccessfulExits_P90", "goalKicks_P90", "goalKicksLong_P90",
                  "goalKicksShort_P90", "successfulGoalKicks_P90", "xgSave_P90"],
    "PASSES":["%_successfulBackPasses", "%_successfulForwardPasses", "%_successfulKeyPasses",
              "%_successfulLongPasses", "%_successfulPasses", "%_successfulPassesToFinalThird",
              "%_successfulProgressivePasses", "%_successfulShotAssists", 
              "%_successfulSmartPasses", "%_successfulThroughPasses", 
              "%_successfulVerticalPasses", "assists_P90", "backPasses_P90",
              "dribbleDistanceFromOpponentGoal_P90", "forwardPasses_P90", "keyPasses_P90",
              "lateralPasses_P90", "longPasses_P90", "longPassLength_P90", "passes_P90",
              "passesToFinalThird_P90", "passLength_P90", "progressivePasses_P90",
              "secondAssists_P90", "shotAssists_P90", "shotOnTargetAssists_P90", 
              "smartPasses_P90", "successfulBackPasses_P90", "successfulForwardPasses_P90",
              "successfulKeyPasses_P90", "successfulLateralPasses_P90", "successfulLongPasses_P90",    
              "successfulPasses_P90", "successfulPassesToFinalThird_P90", "successfulProgressivePasses_P90",
              "successfulSmartPasses_P90", "successfulThroughPasses_P90", "successfulVerticalPasses_P90",
              "thirdAssists_P90", "throughPasses_P90", "verticalPasses_P90",
              "xgAssist_P90", "%_successfulLateralPasses"],
    "PHYSICS":["accelerations_P90", "progressiveRun_P90"],
    "SET_PIECES":["%_directFreeKicksOnTarget", "%_penaltiesConversion", "corners_P90", 
                  "directFreeKicks_P90", "directFreeKicksOnTarget_P90", "freeKicks_P90",
                  "freeKicksOnTarget_P90", "penalties_P90", "successfulPenalties_P90"],
    "SHOTS":["%_goalConversion", "%_headShotsOnTarget", "%_shotsOnTarget", "goals_P90", 
             "headShots_P90", "shots_P90", "shotsOnTarget_P90", "xgShot_P90"]
}

In [353]:
df = pickle.load(open(DATA_FOLDER + "/filter_dataset", "rb"))
advanced_stats = pickle.load(open(DATA_FOLDER + "/players_advancedstats_Serie_A_2020_2021", "rb"))
advanced_stats = pd.DataFrame(advanced_stats).transpose().dropna(how="all")

In [354]:
average_stats = pd.json_normalize(advanced_stats["average"])
average_stats.columns = average_stats.columns + "_P90"
average_stats.index = advanced_stats.index
for idx, row in average_stats.iterrows():
    if row.sum() == 0:
        average_stats.drop(idx, axis=0, inplace=True)

In [355]:
percent_stats = pd.json_normalize(advanced_stats["percent"])
percent_stats.columns = "%_" + percent_stats.columns
percent_stats.index = advanced_stats.index
for idx, row in percent_stats.iterrows():
    if row.sum() == 0:
        percent_stats.drop(idx, axis=0, inplace=True)

In [356]:
stats = pd.merge(average_stats, percent_stats, left_index=True, right_index=True)

scaler = MinMaxScaler()
scaled_stats = pd.DataFrame(scaler.fit_transform(stats))
scaled_stats.columns, scaled_stats.index = stats.columns, stats.index
scaled_stats = pd.merge(advanced_stats[["competitionId", "seasonId"]], scaled_stats, left_index=True, right_index=True)

final_df = pd.merge(df, scaled_stats, left_on="player_id", right_index=True, how="inner")
final_df.index = final_df["player_name"]

In [357]:
final_df.head(10)

Unnamed: 0_level_0,team_id,team_name,main_scheme,player_id,player_name,height,weight,age,birth_area,passport_area,...,%_dribblesAgainstWon,%_fieldAerialDuelsWon,%_gkSaves,%_gkSuccessfulExits,%_gkAerialDuelsWon,%_newDuelsWon,%_newDefensiveDuelsWon,%_newOffensiveDuelsWon,%_newSuccessfulDribbles,%_successfulLateralPasses
player_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Theo Hernández,3157.0,Milan,4-2-3-1,344132,Theo Hernández,184,81,23,France,Spain,...,0.4607,0.5663,0.0,0.0,0.0,0.5643,0.6129,0.5583,0.6061,0.8704
R. Calafiori,3158.0,Roma,3-4-3,542200,R. Calafiori,183,78,19,Italy,Italy,...,0.125,0.6667,0.0,0.0,0.0,0.4615,0.3333,0.6,0.5,1.0
Bruno Peres,3158.0,Roma,3-4-3,44251,Bruno Peres,178,72,31,Brazil,Brazil,...,0.3947,0.3,0.0,0.0,0.0,0.4502,0.4941,0.4495,0.5614,0.8349
L. Spinazzola,3158.0,Roma,3-4-3,190167,L. Spinazzola,186,75,28,Italy,Italy,...,0.381,0.4706,0.0,0.0,0.0,0.5373,0.6421,0.5176,0.5904,0.799
Alex Sandro,3159.0,Juventus,4-4-2,70084,Alex Sandro,181,81,30,Brazil,Brazil,...,0.5714,0.5645,0.0,0.0,0.0,0.595,0.6172,0.569,0.7447,0.9256
G. Frabotta,3159.0,Juventus,4-4-2,489124,G. Frabotta,176,70,21,Italy,Italy,...,0.5238,0.4839,0.0,0.0,0.0,0.48,0.5893,0.4,0.4444,0.8836
R. Gagliolo,3160.0,Parma,4-3-3,220377,R. Gagliolo,182,75,31,Italy,Sweden,...,0.3671,0.4828,0.0,0.0,0.0,0.5258,0.5222,0.597,0.5,0.8981
G. Pezzella,3160.0,Parma,4-3-3,388398,G. Pezzella,187,81,23,Italy,Italy,...,0.44,0.5,0.0,0.0,0.0,0.519,0.5643,0.4831,0.6386,0.8228
A. Young,3161.0,Internazionale,3-5-2,7939,A. Young,175,65,35,England,Jamaica,...,0.4359,0.35,0.0,0.0,0.0,0.4709,0.5065,0.5763,0.6875,0.8176
I. Perišić,3161.0,Internazionale,3-5-2,14812,I. Perišić,186,80,32,Croatia,Croatia,...,0.4815,0.6333,0.0,0.0,0.0,0.5786,0.617,0.6,0.7037,0.7689


In [358]:
playerToReplace = final_df[final_df.flagReplace == True].iloc[0]
player_id, player_name = playerToReplace.player_id, playerToReplace.player_name
player_id, player_name

(295176, 'R. Gosens')

In [359]:
playerToReplace[MACRO_AREAS["ATTACK"]]

attackingActions_P90              0.251422
foulsSuffered_P90                 0.158621
offsides_P90                      0.144796
receivedPass_P90                   0.61164
successfulAttackingActions_P90    0.216216
touchInBox_P90                    0.253904
Name: R. Gosens, dtype: object

In [341]:
checkboxes = [widgets.Checkbox(value=False, description=label) for label in MACRO_AREAS]
macroareas = widgets.VBox(children=checkboxes)
display(macroareas)

VBox(children=(Checkbox(value=False, description='ATTACK'), Checkbox(value=False, description='CARDS'), Checkb…

In [364]:
macroareas_selected = []
for i in range(len(checkboxes)):
    if checkboxes[i].value == True:
        macroareas_selected = macroareas_selected + [checkboxes[i].description]
macroareas_selected

['ATTACK', 'CROSS', 'DEFENSE', 'DUELS', 'PHYSICS', 'SHOTS']

In [367]:
df_list = []
for macroarea in macroareas_selected:
    sim = {}
    start_player = np.array([list(final_df[final_df.player_name == player_name][MACRO_AREAS[macroarea]].iloc[0])])
    for idx, row in final_df[MACRO_AREAS[macroarea]].iterrows():
        target_player = np.array([list(row)])
        sim[idx] = round(cosine_similarity(start_player, target_player)[0][0] * 100, 2)
    sim = pd.DataFrame(sim, index=[0]).transpose()
    sim.columns = [macroarea + "_Similarity"]
    df_list.append(sim)
    
similarity_df = pd.concat(df_list, axis=1)
similarity_df["MEAN_Similarity"] = round(similarity_df.mean(axis=1), 2)
similarity_df.sort_values(by="MEAN_Similarity", ascending=False)

Unnamed: 0,ATTACK_Similarity,CROSS_Similarity,DEFENSE_Similarity,DUELS_Similarity,PHYSICS_Similarity,SHOTS_Similarity,MEAN_Similarity
R. Gosens,100.0,100.0,100.0,100.0,100.0,100.0,100.0
A. Marušić,99.22,97.11,93.04,98.91,99.66,90.46,96.4
I. Perišić,89.25,97.84,95.57,98.47,100.0,92.33,95.58
Alex Sandro,95.66,99.5,93.89,98.54,99.96,84.32,95.31
C. Lykogiannis,95.71,99.16,94.09,98.9,99.29,82.42,94.93
F. Dimarco,97.13,97.48,95.7,97.55,99.44,79.83,94.52
D. Lazović,95.78,98.92,93.81,98.11,99.87,79.22,94.29
A. Reca,91.74,98.94,95.77,97.5,99.5,82.0,94.24
Bruno Peres,96.29,99.66,97.01,96.01,98.18,77.64,94.13
S. Bastoni,94.8,99.75,91.26,96.82,99.81,79.58,93.67


In [350]:
print("Gosens")
macroarea = "ATTACK"
stats.loc[player_id][MACRO_AREAS[macroarea]]

Gosens


%_successfulLinkupPlays           50.00
attackingActions_P90               7.07
foulsSuffered_P90                  0.92
linkupPlays_P90                    0.07
offsides_P90                       0.32
receivedPass_P90                  38.57
successfulAttackingActions_P90     3.04
successfulLinkupPlays_P90          0.04
touchInBox_P90                     3.74
Name: 295176, dtype: float64

In [351]:
compare = "G. Kyriakopoulos"
compare_player_id = final_df[final_df.player_name == compare].iloc[0].player_id
stats.loc[compare_player_id][MACRO_AREAS[macroarea]]

%_successfulLinkupPlays            0.00
attackingActions_P90               8.90
foulsSuffered_P90                  0.36
linkupPlays_P90                    0.00
offsides_P90                       0.12
receivedPass_P90                  40.46
successfulAttackingActions_P90     4.45
successfulLinkupPlays_P90          0.00
touchInBox_P90                     1.26
Name: 272673, dtype: float64