In [1]:
from pathlib import Path
from models.transfersdataset import TransfersDataset
from models.xgboost import Xgboost
import warnings
import pandas as pd

In [2]:
td = TransfersDataset(Path.cwd()/'../prepped/final-2.csv')
td.encode_last_positions()
td.encode()
td.drop()
m = Xgboost(td.data)
m.train_test_split()
m.scale()
m.tune_hp()
m.train()
m.inverse_scale()
m.score(thresh=10)
m.plot_predictions()
m.top_n_predictions(20)
m.top_n_predictions(20, worst=True)
m.best_params

  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 186, 'learning_rate': 0.09935958414524583, 'n_estimators': 2290, 'subsample': 0.9111204963210666, 'colsample_bytree': 0.8747350774498337, 'gamma': 0.1658689154916186, 'reg_alpha': 0.04733955920304034, 'reg_lambda': 0.07078337648871202}
thresh = 10
mae: 2.9005781617861115
mape: 0.5838131986276466
mae_below_thresh: 1.579952129933142
mape_above_thresh: 0.255899325536429


{'max_depth': 186,
 'learning_rate': 0.09935958414524583,
 'n_estimators': 2290,
 'subsample': 0.9111204963210666,
 'colsample_bytree': 0.8747350774498337,
 'gamma': 0.1658689154916186,
 'reg_alpha': 0.04733955920304034,
 'reg_lambda': 0.07078337648871202}

In [3]:
features = pd.read_csv('feature_importance_xgboost.csv')
features = features[features.importance > 1]['feature'].to_list()
features

['marketval',
 'fee',
 'league_to_elo',
 'goals90',
 'kp90',
 'posRW',
 'season',
 'club_from_elo',
 'succssdribblespct',
 'posLAMF',
 'age',
 'prevgoals90',
 'goalcnvrsnpct',
 'rcvdpasses90',
 'deepcomp90',
 'league_from_elo',
 'padjsltackles',
 'acccrossesrightpct',
 'dribbles90',
 'accpassespct',
 'hdrgoals90',
 'sltackles90',
 'accthrpassespct',
 'thrpasses90',
 'xa90',
 'accprpassespct']

In [4]:
td = TransfersDataset(Path.cwd()/'../prepped/final-2.csv')
td.encode_last_positions()
td.encode()
td.drop()
m = Xgboost(td.data, features=features, full_feature_set=True)
m.train_test_split()
m.scale()
m.tune_hp()
m.train()
m.inverse_scale()
m.score(thresh=10)
m.plot_predictions()
m.top_n_predictions(20)
m.top_n_predictions(20, worst=True)
m.best_params

  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 8, 'learning_rate': 0.09812670821763003, 'n_estimators': 846, 'subsample': 0.9014920903871387, 'colsample_bytree': 0.9857092241943303, 'gamma': 0.4898498477923379, 'reg_alpha': 6.3658392865400035, 'reg_lambda': 6.828605534517097}
thresh = 10
mae: 2.780458031611496
mape: 0.5783292868666918
mae_below_thresh: 1.5297491323563361
mape_above_thresh: 0.2629197238017326


{'max_depth': 8,
 'learning_rate': 0.09812670821763003,
 'n_estimators': 846,
 'subsample': 0.9014920903871387,
 'colsample_bytree': 0.9857092241943303,
 'gamma': 0.4898498477923379,
 'reg_alpha': 6.3658392865400035,
 'reg_lambda': 6.828605534517097}

In [5]:
m.top_n_predictions(20, criteria='error_pct')

Unnamed: 0,name,age,season,country_from,league_from,club_from,country_to,league_to,club_to,window,marketval,marketval_0,fee,marketval_0.1,marketval_0_pred,error,error_pct
1207,Rémy Cabella,32,21/22,Russia,RU1,FK Krasnodar,France,FR1,Montpellier HSC,0,9.0,6.5,0.0,6.5,6.498903,0.001097,0.000169
1805,Randal Kolo Muani,24,23/24,Germany,L1,Eintracht Frankfurt,France,FR1,Paris Saint-Germain,1,80.0,70.0,95.0,70.0,70.35321,0.35321,0.005046
303,Bart Ramselaar,23,19/20,Netherlands,NL1,PSV Eindhoven,Netherlands,NL1,FC Utrecht,1,2.5,2.4,0.0,2.4,2.413014,0.013014,0.005422
70,Dennis Praet,25,19/20,Italy,IT1,UC Sampdoria,England,GB1,Leicester City,1,24.0,19.0,19.2,19.0,19.144943,0.144943,0.007629
1780,Davie Selke,27,22/23,Germany,L1,Hertha BSC,Germany,L1,1.FC Köln,0,1.8,2.5,0.0,2.5,2.480513,0.019487,0.007795
1933,Mark Flekken,30,23/24,Germany,L1,SC Freiburg,England,GB1,Brentford FC,1,10.0,12.0,13.0,12.0,11.884003,0.115997,0.009666
1457,Guus Til,24,22/23,Russia,RU1,Spartak Moscow,Netherlands,NL1,PSV Eindhoven,1,7.5,6.0,3.0,6.0,6.067336,0.067336,0.011223
233,Alexis Blin,22,19/20,France,FR1,FC Toulouse,France,FR1,Amiens SC,1,5.0,4.0,1.65,4.0,3.949785,0.050215,0.012554
282,Deniz Türüç,26,19/20,Türkiye,TR1,Kayserispor,Türkiye,TR1,Fenerbahce,1,3.0,2.8,2.5,2.8,2.761861,0.038139,0.013621
1894,Raphaël Guerreiro,29,23/24,Germany,L1,Borussia Dortmund,Germany,L1,Bayern Munich,1,20.0,15.0,0.0,15.0,15.241243,0.241243,0.016083


In [6]:
m.top_n_predictions(20)

Unnamed: 0,name,age,season,country_from,league_from,club_from,country_to,league_to,club_to,window,marketval,marketval_0,fee,marketval_0.1,marketval_0_pred,error,error_pct
1207,Rémy Cabella,32,21/22,Russia,RU1,FK Krasnodar,France,FR1,Montpellier HSC,0,9.0,6.5,0.0,6.5,6.498903,0.001097,0.000169
303,Bart Ramselaar,23,19/20,Netherlands,NL1,PSV Eindhoven,Netherlands,NL1,FC Utrecht,1,2.5,2.4,0.0,2.4,2.413014,0.013014,0.005422
1780,Davie Selke,27,22/23,Germany,L1,Hertha BSC,Germany,L1,1.FC Köln,0,1.8,2.5,0.0,2.5,2.480513,0.019487,0.007795
282,Deniz Türüç,26,19/20,Türkiye,TR1,Kayserispor,Türkiye,TR1,Fenerbahce,1,3.0,2.8,2.5,2.8,2.761861,0.038139,0.013621
233,Alexis Blin,22,19/20,France,FR1,FC Toulouse,France,FR1,Amiens SC,1,5.0,4.0,1.65,4.0,3.949785,0.050215,0.012554
1564,Edinson Cavani,35,22/23,England,GB1,Manchester United,Spain,ES1,Valencia CF,1,4.0,3.0,0.0,3.0,2.93621,0.06379,0.021263
1666,Kévin Rodrigues,28,22/23,Spain,ES1,Real Sociedad,Türkiye,TR1,Adana Demirspor,1,1.8,2.3,0.0,2.3,2.235162,0.064838,0.02819
1457,Guus Til,24,22/23,Russia,RU1,Spartak Moscow,Netherlands,NL1,PSV Eindhoven,1,7.5,6.0,3.0,6.0,6.067336,0.067336,0.011223
2075,Florian Lejeune,32,23/24,Spain,ES1,Deportivo Alavés,Spain,ES1,Rayo Vallecano,1,4.0,3.5,2.5,3.5,3.416079,0.083921,0.023977
307,Ander Iturraspe,30,19/20,Spain,ES1,Athletic Bilbao,Spain,ES1,RCD Espanyol Barcelona,1,2.5,2.0,0.0,2.0,1.91588,0.08412,0.04206


In [7]:
m.top_n_predictions(20, worst=True)

Unnamed: 0,name,age,season,country_from,league_from,club_from,country_to,league_to,club_to,window,marketval,marketval_0,fee,marketval_0.1,marketval_0_pred,error,error_pct
121,Diego Carlos,26,19/20,France,FR1,FC Nantes,Spain,ES1,Sevilla FC,1,13.0,50.0,15.0,50.0,13.806778,36.193222,0.723864
508,Emiliano Martínez,28,20/21,England,GB1,Arsenal FC,England,GB1,Aston Villa,1,8.0,35.0,17.4,35.0,12.333734,22.666266,0.647608
1316,Morgan Gibbs-White,22,22/23,England,GB1,Wolverhampton Wanderers,England,GB1,Nottingham Forest,1,11.0,40.0,29.5,40.0,17.697262,22.302738,0.557568
1454,Loïs Openda,22,22/23,Belgium,BE1,Club Brugge KV,France,FR1,RC Lens,1,7.5,35.0,15.39,35.0,12.851352,22.148648,0.632819
1812,Sandro Tonali,23,23/24,Italy,IT1,AC Milan,England,GB1,Newcastle United,1,50.0,40.0,64.0,40.0,57.795937,17.795937,0.444898
73,Theo Hernández,21,19/20,Spain,ES1,Real Madrid,Italy,IT1,AC Milan,1,15.0,32.0,22.8,32.0,14.589581,17.410419,0.544076
1718,Leandro Trossard,28,22/23,England,GB1,Brighton & Hove Albion,England,GB1,Arsenal FC,0,20.0,35.0,24.0,35.0,17.904024,17.095976,0.488456
96,Christopher Nkunku,21,19/20,France,FR1,Paris Saint-Germain,Germany,L1,RB Leipzig,1,12.0,33.0,19.5,33.0,15.909696,17.090304,0.517888
1271,Antony,22,22/23,Netherlands,NL1,Ajax Amsterdam,England,GB1,Manchester United,1,35.0,60.0,95.0,60.0,43.925037,16.074963,0.267916
1825,Rasmus Højlund,20,23/24,Italy,IT1,Atalanta BC,England,GB1,Manchester United,1,45.0,65.0,73.9,65.0,49.18412,15.81588,0.243321


In [8]:
m.calculate_effectiveness()
t = m.predictions.dropna()
t.eff_true.corr(t.eff_pred)

0.9139195761830462

In [9]:
m = Xgboost(td.data, features=['marketval'], full_feature_set=True)
m.train_test_split()
m.scale()
m.tune_hp()
m.train()
m.inverse_scale()
m.score(thresh=10)
m.plot_predictions()

SyntaxError: invalid syntax (2798333454.py, line 1)

In [None]:
m.calculate_effectiveness()
t = m.predictions.dropna()
t.eff_true.corr(t.eff_pred)

AttributeError: 'Xgboost' object has no attribute 'predictions'