In [6]:
import os
import pickle
import numpy as np
import ipywidgets as widgets
from IPython.display import display

In [7]:
CODE_FOLDER = os.getcwd()
DATA_FOLDER = os.getcwd().replace("code", "data")

In [8]:
df = pickle.load(open(DATA_FOLDER + "/preprocessing_dataset", "rb"))
df.shape

(752, 13)

In [9]:
df.columns

Index(['team_id', 'team_name', 'main_scheme', 'player_id', 'player_name',
       'height', 'weight', 'age', 'birth_area', 'passport_area', 'foot',
       'main_role', 'contractExpiration'],
      dtype='object')

In [10]:
df.head()

Unnamed: 0,team_id,team_name,main_scheme,player_id,player_name,height,weight,age,birth_area,passport_area,foot,main_role,contractExpiration
0,3157.0,Milan,4-2-3-1,582906,P. Kalulu,179,69,20,France,Congo DR,right,Right Centre Back,2025.0
1,3157.0,Milan,4-2-3-1,558720,D. Maldini,181,70,19,Italy,Venezuela,right,Left Attacking Midfielder,2024.0
2,3157.0,Milan,4-2-3-1,20420,Z. Ibrahimović,195,95,39,Sweden,Bosnia-Herzegovina,right,Striker,2021.0
3,3157.0,Milan,4-2-3-1,20433,S. Kjær,190,86,32,Denmark,Denmark,right,Right Centre Back,2022.0
4,3157.0,Milan,4-2-3-1,83574,C. Tătărușanu,198,90,35,Romania,Romania,right,Goalkeeper,2023.0


In [11]:
WIDGET_TYPE = {
    "category":["team_name", "main_scheme", "birth_area", "passport_area", "foot", "main_role"],
    "numeric":["age", "height", "weight", "contractExpiration"]
}
widgets_dict = {}
for w_type in WIDGET_TYPE.keys():
    for col in WIDGET_TYPE[w_type]:
        options_list = [x for x in list(df[col].unique()) if x == x and x is not None]
        if w_type == "category":
            widgets_dict[col] = widgets.SelectMultiple(options=sorted(options_list), description=col)
        elif w_type == "numeric":
            min_, max_ = df[df[col] > 0][col].min(), df[df[col] > 0][col].max()
            if col == "contractExpiration":
                min_ = 2021
            widgets_dict[col] = widgets.IntRangeSlider(value=[min_, max_], min=min_, max=max_, step=1, description=col)
        display(widgets_dict[col])

SelectMultiple(description='team_name', options=('Atalanta', 'Benevento', 'Bologna', 'Cagliari', 'Crotone', 'F…

SelectMultiple(description='main_scheme', options=('3-4-1-2', '3-4-2-1', '3-4-3', '3-5-2', '4-2-3-1', '4-3-2-1…

SelectMultiple(description='birth_area', options=('Albania', 'Angola', 'Argentina', 'Armenia', 'Austria', 'Bel…

SelectMultiple(description='passport_area', options=('Albania', 'Algeria', 'Angola', 'Argentina', 'Armenia', '…

SelectMultiple(description='foot', options=('both', 'left', 'right'), value=())

SelectMultiple(description='main_role', options=('Attacking Midfielder', 'Centre Back', 'Defensive Midfielder'…

IntRangeSlider(value=(15, 43), description='age', max=43, min=15)

IntRangeSlider(value=(163, 202), description='height', max=202, min=163)

IntRangeSlider(value=(58, 96), description='weight', max=96, min=58)

IntRangeSlider(value=(2021, 2026), description='contractExpiration', max=2026, min=2021)

In [15]:
aux = df.copy()
for col in widgets_dict.keys():
    if col in WIDGET_TYPE["category"]:
        values = list(widgets_dict[col].value)
        if len(values) > 0:
            query = col + " in " + str(values)
            print(query)
            aux = aux.query(query)
    elif col in WIDGET_TYPE["numeric"]:
        query = col + " >= " + str(widgets_dict[col].value[0]) + " and " + col + " <= " + str(widgets_dict[col].value[1])
        print(query)
        aux = aux.query(query)

team_name in ['Atalanta', 'Juventus', 'Lazio', 'Milan', 'Napoli', 'Roma', 'Sassuolo']
main_role in ['Attacking Midfielder', 'Defensive Midfielder', 'Left Centre Midfielder', 'Left Defensive Midfielder', 'Right Centre Midfielder', 'Right Defensive Midfielder']
age >= 25 and age <= 32
height >= 163 and height <= 202
weight >= 58 and weight <= 96
contractExpiration >= 2021 and contractExpiration <= 2023


In [16]:
print("There are " + str(len(aux)) + " players selected!")

There are 13 players selected!


In [17]:
aux.head()

Unnamed: 0,team_id,team_name,main_scheme,player_id,player_name,height,weight,age,birth_area,passport_area,foot,main_role,contractExpiration
7,3157.0,Milan,4-2-3-1,15524,H. Çalhanoğlu,178,76,27,Turkey,Turkey,right,Attacking Midfielder,2021.0
8,3157.0,Milan,4-2-3-1,25504,S. Meïté,187,91,27,France,Côte d'Ivoire,right,Right Centre Midfielder,2023.0
27,3158.0,Roma,3-4-3,25440,J. Pastore,187,78,31,Argentina,Italy,right,Attacking Midfielder,2023.0
31,3158.0,Roma,3-4-3,105338,H. Mkhitaryan,177,75,32,Armenia,Armenia,right,Attacking Midfielder,2021.0
66,3159.0,Juventus,4-4-2,209117,A. Rabiot,188,71,26,France,France,left,Left Centre Midfielder,2023.0


In [19]:
pickle.dump(aux, open(DATA_FOLDER + "/filter_dataset", "wb"))