In [90]:
import os
import pickle
import numpy as np
import pandas as pd
import ipywidgets as widgets
from IPython.display import display

In [91]:
CODE_FOLDER = os.getcwd()
DATA_FOLDER = os.getcwd().replace("code", "data")

In [92]:
df = pickle.load(open(DATA_FOLDER + "/preprocessing_dataset", "rb"))
df.shape

(751, 13)

In [93]:
df.columns

Index(['team_id', 'team_name', 'main_scheme', 'player_id', 'player_name',
       'height', 'weight', 'age', 'birth_area', 'passport_area', 'foot',
       'main_role', 'contractExpiration'],
      dtype='object')

In [94]:
df.head()

Unnamed: 0,team_id,team_name,main_scheme,player_id,player_name,height,weight,age,birth_area,passport_area,foot,main_role,contractExpiration
0,3157.0,Milan,4-2-3-1,582906,P. Kalulu,179,69,20,France,Congo DR,right,Right Centre Back,2025
1,3157.0,Milan,4-2-3-1,558720,D. Maldini,181,70,19,Italy,Venezuela,right,Left Attacking Midfielder,2024
2,3157.0,Milan,4-2-3-1,20420,Z. Ibrahimović,195,95,39,Sweden,Bosnia-Herzegovina,right,Striker,2021
3,3157.0,Milan,4-2-3-1,20433,S. Kjær,190,86,32,Denmark,Denmark,right,Right Centre Back,2022
4,3157.0,Milan,4-2-3-1,83574,C. Tătărușanu,198,90,35,Romania,Romania,right,Goalkeeper,2023


In [95]:
df["label"] = df["player_name"] + " | " + df["team_name"]
playerToSub = widgets.Dropdown(options=df["label"].sort_values(), description="player to replace")
display(playerToSub)

WIDGET_TYPE = {
    "category":["team_name", "main_scheme", "birth_area", "passport_area", "foot", "main_role"],
    "numeric":["age", "height", "weight", "contractExpiration"]
}
widgets_dict = {}
for w_type in WIDGET_TYPE.keys():
    for col in WIDGET_TYPE[w_type]:
        options_list = [x for x in list(df[col].unique()) if x != "N/A" and x is not None]
        if w_type == "category":
            widgets_dict[col] = widgets.SelectMultiple(options=sorted(options_list), description=col)
        elif w_type == "numeric":
            min_, max_ = df[df[col] > 0][col].min(), df[df[col] > 0][col].max()
            if col == "contractExpiration":
                min_ = 2021
            widgets_dict[col] = widgets.IntRangeSlider(value=[min_, max_], min=min_, max=max_, step=1, description=col)
        display(widgets_dict[col])

Dropdown(description='player to replace', options=('A. Adorante | N/A', 'A. Arnofoli | Bologna', 'A. Baldursso…

SelectMultiple(description='team_name', options=('Atalanta', 'Benevento', 'Bologna', 'Cagliari', 'Crotone', 'F…

SelectMultiple(description='main_scheme', options=('3-4-1-2', '3-4-2-1', '3-4-3', '3-5-2', '4-2-3-1', '4-3-2-1…

SelectMultiple(description='birth_area', options=('Albania', 'Angola', 'Argentina', 'Armenia', 'Austria', 'Bel…

SelectMultiple(description='passport_area', options=('Albania', 'Algeria', 'Angola', 'Argentina', 'Armenia', '…

SelectMultiple(description='foot', options=('both', 'left', 'right'), value=())

SelectMultiple(description='main_role', options=('Attacking Midfielder', 'Centre Back', 'Defensive Midfielder'…

IntRangeSlider(value=(15, 43), description='age', max=43, min=15)

IntRangeSlider(value=(163, 202), description='height', max=202, min=163)

IntRangeSlider(value=(58, 96), description='weight', max=96, min=58)

IntRangeSlider(value=(2021, 2026), description='contractExpiration', max=2026, min=2021)

In [96]:
playerToReplace = playerToSub.value.split(" | ")[0]
playerToReplace_df = df[df.player_name == playerToReplace]

selection = df.copy()
for col in widgets_dict.keys():
    if col in WIDGET_TYPE["category"]:
        values = list(widgets_dict[col].value)
        if len(values) > 0:
            query = col + " in " + str(values)
            print(query)
            selection = selection.query(query)
    elif col in WIDGET_TYPE["numeric"]:
        query = col + " >= " + str(widgets_dict[col].value[0]) + " and " + col + " <= " + str(widgets_dict[col].value[1])
        print(query)
        selection = selection.query(query)

selection["flagReplace"] = False
if playerToReplace not in selection.player_name.unique():
    selection = pd.concat([selection, playerToReplace_df], axis=0)
selection.loc[selection.player_name == playerToReplace, "flagReplace"] = True

main_role in ['Left Back', 'Left Back (5 at the back)', 'Left Wingback']
age >= 15 and age <= 43
height >= 163 and height <= 202
weight >= 58 and weight <= 96
contractExpiration >= 2021 and contractExpiration <= 2026


In [97]:
selection[selection.flagReplace == True].iloc[0]

team_id                               3172
team_name                         Atalanta
main_scheme                        3-4-1-2
player_id                           295176
player_name                      R. Gosens
height                                 183
weight                                  76
age                                     26
birth_area                     Netherlands
passport_area                  Netherlands
foot                                  left
main_role                    Left Wingback
contractExpiration                    2022
label                 R. Gosens | Atalanta
flagReplace                           True
Name: 264, dtype: object

In [98]:
print("There are " + str(len(selection[selection.flagReplace == False])) + " players selected!")

There are 49 players selected!


In [99]:
selection.head()

Unnamed: 0,team_id,team_name,main_scheme,player_id,player_name,height,weight,age,birth_area,passport_area,foot,main_role,contractExpiration,label,flagReplace
17,3157.0,Milan,4-2-3-1,344132,Theo Hernández,184,81,23,France,Spain,left,Left Back,2024,Theo Hernández | Milan,False
26,3158.0,Roma,3-4-3,542200,R. Calafiori,183,78,19,Italy,Italy,left,Left Back,2025,R. Calafiori | Roma,False
37,3158.0,Roma,3-4-3,44251,Bruno Peres,178,72,31,Brazil,Brazil,right,Left Wingback,2021,Bruno Peres | Roma,False
39,3158.0,Roma,3-4-3,190167,L. Spinazzola,186,75,28,Italy,Italy,right,Left Wingback,2024,L. Spinazzola | Roma,False
62,3159.0,Juventus,4-4-2,70084,Alex Sandro,181,81,30,Brazil,Brazil,left,Left Back,2023,Alex Sandro | Juventus,False


In [100]:
pickle.dump(selection, open(DATA_FOLDER + "/filter_dataset", "wb"))