In [21]:
from pathlib import Path
from models.transfersdataset import TransfersDataset
from models.xgboost import Xgboost as Model

In [22]:
import pandas as pd
import warnings

In [23]:
warnings.filterwarnings("ignore")

In [24]:
td = TransfersDataset(Path.cwd()/'../prepped/final-2.csv')
td.encode_last_positions()
td.encode()
td.drop()

In [25]:
def_features = ['duels90','duelswonpct','succssdefact90','defduels90','defduelswpct','aerduels90','aerduelspct','sltackles90','padjsltackles',
                'shotblocks90','intrcpt90','padjintrcpt','fouls90','ycards90','rcards90','passes90','accpassespct','fwdpasses90','accfwdpassespct',
                'bpasses90','accbpassespct','latpasses90','acclatpassespct','shortmedpasses90','accshortmedpassespct','longpasses90','acclongpassesct',
                'avgpasslen','avglongpasslen']

In [26]:
model_scores = pd.DataFrame()
def pipeline(pos, features=None, full_feature_set=False):
    td = TransfersDataset(Path.cwd()/'../prepped/final-2.csv')
    td.filter_postion(pos)
    td.encode()
    td.drop()
    m = Model(data = td.data, features = features, full_feature_set=full_feature_set)
    m.train_test_split()
    m.scale()
    m.tune_hp()
    m.train()
    m.inverse_scale()
    global model_scores
    model_scores = pd.concat([model_scores, m.score(thresh=2.5)])
    # display(m.feature_importance())
    # m.plot_predictions()
    m.top_n_predictions(20)
    m.top_n_predictions(20, worst=True)
    m.calculate_effectiveness()

In [27]:
pipeline('DEF', def_features)

  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 94, 'learning_rate': 0.1918819068983684, 'n_estimators': 233, 'subsample': 0.6005442877240614, 'colsample_bytree': 0.9751125583665675, 'gamma': 0.3439158795357194, 'reg_alpha': 0.6454740674083349, 'reg_lambda': 0.8504878402805004}
thresh = 2.5
mae: 2.22129552313622
mape: 0.5688524566536844
mae_below_thresh: 0.9259822902856052
mape_above_thresh: 0.3569713907707334


In [28]:
scores = pd.read_csv('xgboost_def.csv').iloc[:,1:].sort_values('MSE')
scores.Features = scores.Features.str.split(', ')
feature_sets = scores.Features.head(10).to_dict()

In [29]:
for _, features in feature_sets.items():
    pipeline(pos='DEF', features=features, full_feature_set=True)

  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 53, 'learning_rate': 0.17432632890896935, 'n_estimators': 153, 'subsample': 0.6351538999595564, 'colsample_bytree': 0.9973006453851432, 'gamma': 0.05468007705437293, 'reg_alpha': 0.22287237445047553, 'reg_lambda': 0.16484309979107659}
thresh = 2.5
mae: 2.5263125231925474
mape: 0.564262718943062
mae_below_thresh: 0.8761115484767492
mape_above_thresh: 0.37918407033226365


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 18, 'learning_rate': 0.1976347470883176, 'n_estimators': 993, 'subsample': 0.6150458262197802, 'colsample_bytree': 0.8581506413348367, 'gamma': 0.30044666852885527, 'reg_alpha': 0.850283557581109, 'reg_lambda': 0.2758874086791958}
thresh = 2.5
mae: 2.2880827873311143
mape: 0.5698097506266887
mae_below_thresh: 0.8981097676135876
mape_above_thresh: 0.34403971516160525


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 44, 'learning_rate': 0.18978871911336978, 'n_estimators': 832, 'subsample': 0.6011702222463879, 'colsample_bytree': 0.7452336263657826, 'gamma': 0.03876770710239455, 'reg_alpha': 0.17252729291374702, 'reg_lambda': 0.3184651868708194}
thresh = 2.5
mae: 2.4373304545456635
mape: 0.5957295358861013
mae_below_thresh: 0.9790454398702693
mape_above_thresh: 0.3779646247973898


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 48, 'learning_rate': 0.17402625633654742, 'n_estimators': 280, 'subsample': 0.8705552855787554, 'colsample_bytree': 0.8603736834329954, 'gamma': 0.4405044988984207, 'reg_alpha': 0.7791806137474301, 'reg_lambda': 0.4689213807561758}
thresh = 2.5
mae: 2.515388716028092
mape: 0.6260462961212504
mae_below_thresh: 1.0776250450699418
mape_above_thresh: 0.3474750830804128


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 83, 'learning_rate': 0.19488207714980968, 'n_estimators': 1312, 'subsample': 0.6560429733501637, 'colsample_bytree': 0.7420315035203238, 'gamma': 0.22790382710015494, 'reg_alpha': 0.14408453126537266, 'reg_lambda': 0.5583594873336344}
thresh = 2.5
mae: 2.3065867944812095
mape: 0.5275652650952393
mae_below_thresh: 0.8423574915638677
mape_above_thresh: 0.3367953936447681


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 153, 'learning_rate': 0.19937953138424164, 'n_estimators': 959, 'subsample': 0.6423672259065571, 'colsample_bytree': 0.8762354543068036, 'gamma': 0.03165081603318565, 'reg_alpha': 0.4206950403384676, 'reg_lambda': 0.7153935404910939}
thresh = 2.5
mae: 2.4496371359689864
mape: 0.5328117162933877
mae_below_thresh: 0.7889241167792568
mape_above_thresh: 0.36353696971531485


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 133, 'learning_rate': 0.19929644525634643, 'n_estimators': 905, 'subsample': 0.6525073966087442, 'colsample_bytree': 0.8497544609308417, 'gamma': 0.4441765384336127, 'reg_alpha': 0.8893438476953577, 'reg_lambda': 0.9595472352797864}
thresh = 2.5
mae: 2.477569599692703
mape: 0.5979436499308947
mae_below_thresh: 0.9841050183331527
mape_above_thresh: 0.3776923751414572


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 138, 'learning_rate': 0.19139405470321882, 'n_estimators': 1457, 'subsample': 0.8529300341106317, 'colsample_bytree': 0.9802692930754129, 'gamma': 0.4862868305337867, 'reg_alpha': 0.5926113153114856, 'reg_lambda': 0.7695649351163693}
thresh = 2.5
mae: 2.44825827517408
mape: 0.5962911148302465
mae_below_thresh: 0.9486674551610595
mape_above_thresh: 0.32857082741146665


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 166, 'learning_rate': 0.19784512330044368, 'n_estimators': 791, 'subsample': 0.6060635008193335, 'colsample_bytree': 0.9068418489988622, 'gamma': 0.47847994569977653, 'reg_alpha': 0.49693675871722226, 'reg_lambda': 0.7030948919492146}
thresh = 2.5
mae: 2.406534659947064
mape: 0.5820143387339298
mae_below_thresh: 0.9534594641791451
mape_above_thresh: 0.33947461647959815


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 20, 'learning_rate': 0.19390115660279528, 'n_estimators': 1141, 'subsample': 0.8501866184190362, 'colsample_bytree': 0.8140399065117935, 'gamma': 0.21375959035973524, 'reg_alpha': 0.9430507076471171, 'reg_lambda': 0.31321072750364914}
thresh = 2.5
mae: 2.624212546044207
mape: 0.5955513234546788
mae_below_thresh: 0.9221950760594123
mape_above_thresh: 0.37322983304942026


In [32]:
with pd.option_context('display.max_colwidth', None):
    display(model_scores.sort_values('mape_above_thresh'))

Unnamed: 0,mae,mape,mae_below_thresh,mape_above_thresh,features
0,2.448258,0.596291,0.948667,0.328571,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, latpasses90, shotassist90, aerduelspct, directfk90, defduelswpct, acccrossesrightpct, padjintrcpt, pens, directfkontrgtpct, pencnvrspct, acccrossesleftpct, fk90, npgoals90, accprpassespct, crossesleft90, corners90, acclongpassesct, assists90, goals90, shots90, crossesright90, deepcomp90, crosses6yard90, xa90, acclr90, bpasses90, rcards90, deepcompcrosses90]"
0,2.306587,0.527565,0.842357,0.336795,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, latpasses90, shotassist90, aerduelspct, directfk90, defduelswpct, acccrossesrightpct, padjintrcpt, pens, directfkontrgtpct, pencnvrspct, acccrossesleftpct, fk90, npgoals90, accprpassespct, crossesleft90, corners90, acclongpassesct, assists90, goals90, shots90, crossesright90, deepcomp90, crosses6yard90, xa90, acclr90]"
0,2.406535,0.582014,0.953459,0.339475,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, latpasses90, shotassist90, aerduelspct, directfk90, defduelswpct, acccrossesrightpct, padjintrcpt, pens, directfkontrgtpct, pencnvrspct, acccrossesleftpct, fk90, npgoals90, accprpassespct]"
0,2.288083,0.56981,0.89811,0.34404,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, latpasses90, shotassist90, aerduelspct, directfk90, defduelswpct, acccrossesrightpct, padjintrcpt, pens, directfkontrgtpct, pencnvrspct, acccrossesleftpct, fk90, npgoals90, accprpassespct, crossesleft90]"
0,2.515389,0.626046,1.077625,0.347475,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, latpasses90, shotassist90, aerduelspct, directfk90, defduelswpct, acccrossesrightpct, padjintrcpt, pens, directfkontrgtpct, pencnvrspct, acccrossesleftpct, fk90, npgoals90, accprpassespct, crossesleft90, corners90, acclongpassesct, assists90, goals90, shots90, crossesright90, deepcomp90, crosses6yard90, xa90]"
0,2.221296,0.568852,0.925982,0.356971,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, duels90, duelswonpct, succssdefact90, defduels90, defduelswpct, aerduels90, aerduelspct, sltackles90, padjsltackles, shotblocks90, intrcpt90, padjintrcpt, fouls90, ycards90, rcards90, passes90, accpassespct, fwdpasses90, accfwdpassespct, bpasses90, accbpassespct, latpasses90, acclatpassespct, shortmedpasses90, accshortmedpassespct, longpasses90, acclongpassesct, avgpasslen, avglongpasslen]"
0,2.449637,0.532812,0.788924,0.363537,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, latpasses90, shotassist90, aerduelspct, directfk90, defduelswpct, acccrossesrightpct, padjintrcpt, pens, directfkontrgtpct, pencnvrspct, acccrossesleftpct, fk90, npgoals90, accprpassespct, crossesleft90, corners90, acclongpassesct, assists90, goals90, shots90, crossesright90, deepcomp90, crosses6yard90, xa90, acclr90, bpasses90]"
0,2.624213,0.595551,0.922195,0.37323,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, latpasses90, shotassist90, aerduelspct, directfk90, defduelswpct, acccrossesrightpct, padjintrcpt, pens, directfkontrgtpct, pencnvrspct, acccrossesleftpct, fk90, npgoals90, accprpassespct, crossesleft90, corners90]"
0,2.47757,0.597944,0.984105,0.377692,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, latpasses90, shotassist90, aerduelspct, directfk90, defduelswpct, acccrossesrightpct, padjintrcpt, pens, directfkontrgtpct, pencnvrspct, acccrossesleftpct, fk90, npgoals90, accprpassespct, crossesleft90, corners90, acclongpassesct, assists90, goals90, shots90, crossesright90, deepcomp90, crosses6yard90, xa90, acclr90, bpasses90, rcards90]"
0,2.43733,0.59573,0.979045,0.377965,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, latpasses90, shotassist90, aerduelspct, directfk90, defduelswpct, acccrossesrightpct, padjintrcpt, pens, directfkontrgtpct, pencnvrspct, acccrossesleftpct, fk90, npgoals90, accprpassespct, crossesleft90, corners90, acclongpassesct, assists90, goals90, shots90, crossesright90, deepcomp90, crosses6yard90]"
