In [1]:
from pathlib import Path
from models.transfersdataset import TransfersDataset
from models.xgboost import Xgboost as Model

In [2]:
import pandas as pd
import warnings

In [3]:
warnings.filterwarnings("ignore")

In [4]:
td = TransfersDataset(Path.cwd()/'../prepped/final-2.csv')
td.encode_last_positions()
td.encode()
td.drop()

In [5]:
att_features = ['sattact90','goals90','npgoals90','xg90','hdrgoals90','shots90','ontrgtpct','goalcnvrsnpct','assists90','crosses90',
                'acccrossespct','crossesleft90','acccrossesleftpct','crossesright90','acccrossesrightpct','crosses6yard90','dribbles90',
                'succssdribblespct','offduels90','offduelspct','touchesbox90','prgruns90','acclr90','rcvdpasses90','rcvdlongpasses90',
                'foulsa90','fwdpasses90','accfwdpassespct','xa90','shotassist90','secassist90','thirdassist90','smartpasses90',
                'accsmartpassespct','kp90','final3rdpasses90','accfinal3rdpassespct','penareapasses90','accpenareapassespct','thrpasses90',
                'accthrpassespct','directfk90','directfkontrgtpct','corners90','pens','pencnvrspct']
att_features2 = ['goals90','npgoals90','xg90','hdrgoals90','shots90','xa90','shotassist90','touchesbox90','prgruns90','acclr90','rcvdpasses90','smartpasses90', 'dribbles90',
                'succssdribblespct']

In [6]:
model_scores = pd.DataFrame()
def pipeline(pos, features=None, full_feature_set=False):
    td = TransfersDataset(Path.cwd()/'../prepped/final-2.csv')
    td.filter_postion(pos)
    td.encode()
    td.drop()
    m = Model(data = td.data, features = features, full_feature_set=full_feature_set)
    m.train_test_split()
    m.scale()
    m.tune_hp()
    m.train()
    m.inverse_scale()
    global model_scores
    model_scores = pd.concat([model_scores, m.score(thresh=10)])
    # display(m.feature_importance())
    # m.plot_predictions()
    m.top_n_predictions(20)
    m.top_n_predictions(20, worst=True)
    m.calculate_effectiveness()

In [7]:
pipeline('ATT')

  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 150, 'learning_rate': 0.09995678790556223, 'n_estimators': 2068, 'subsample': 0.9844929567753955, 'colsample_bytree': 0.8146465384243842, 'gamma': 0.4441628054344121, 'reg_alpha': 7.384559317399929, 'reg_lambda': 4.703836913709459}
thresh = 10
mae: 4.892823459460503
mape: 0.6435050486892064
mae_below_thresh: 2.2701618892552915
mape_above_thresh: 0.440853326261909


In [8]:
pipeline('ATT', att_features2, full_feature_set=False)

  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 77, 'learning_rate': 0.09610824764703817, 'n_estimators': 2200, 'subsample': 0.9191796286878126, 'colsample_bytree': 0.9302840384058307, 'gamma': 0.2015296840008286, 'reg_alpha': 0.7438020472939169, 'reg_lambda': 1.6927846909453748}
thresh = 10
mae: 5.327533937803366
mape: 0.7703574184609308
mae_below_thresh: 2.433827652273792
mape_above_thresh: 0.5115902796403468


In [9]:
scores = pd.read_csv('xgboost_att.csv').iloc[:,1:].sort_values('MSE')
scores.Features = scores.Features.str.split(', ')
feature_sets = scores.Features.head(10).to_dict()

In [10]:
for _, features in feature_sets.items():
    pipeline(pos='ATT', features=features, full_feature_set=True)

  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 7, 'learning_rate': 0.0994460255534429, 'n_estimators': 2317, 'subsample': 0.6703323215897187, 'colsample_bytree': 0.9696553571079854, 'gamma': 0.11849669002604307, 'reg_alpha': 0.14991454498998813, 'reg_lambda': 5.931494310540202}
thresh = 10
mae: 5.556196027676989
mape: 0.9097199746011282
mae_below_thresh: 2.9265700559963888
mape_above_thresh: 0.48075083581905415


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 193, 'learning_rate': 0.09966671245375215, 'n_estimators': 1784, 'subsample': 0.8211115196705746, 'colsample_bytree': 0.9773084996767483, 'gamma': 0.024577863353113464, 'reg_alpha': 7.308010898513091, 'reg_lambda': 0.9118293916170728}
thresh = 10
mae: 5.397407050426636
mape: 0.7393404336629813
mae_below_thresh: 2.572236141947205
mape_above_thresh: 0.4775872864524656


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 43, 'learning_rate': 0.094722831316539, 'n_estimators': 220, 'subsample': 0.8150009405926598, 'colsample_bytree': 0.9265852175668986, 'gamma': 0.09583577141755883, 'reg_alpha': 1.3983434918701498, 'reg_lambda': 0.011751852341120028}
thresh = 10
mae: 5.6350827916313175
mape: 0.8327809208511591
mae_below_thresh: 2.707132450456838
mape_above_thresh: 0.5062156416045557


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 169, 'learning_rate': 0.09424529215812119, 'n_estimators': 2254, 'subsample': 0.8415823550684063, 'colsample_bytree': 0.9880589644865965, 'gamma': 0.21450501301518884, 'reg_alpha': 2.6091520420169543, 'reg_lambda': 0.41691788169376365}
thresh = 10
mae: 5.443369690070491
mape: 0.7978291111015684
mae_below_thresh: 2.6028697424588754
mape_above_thresh: 0.49848143511754417


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 26, 'learning_rate': 0.09912752329628216, 'n_estimators': 1320, 'subsample': 0.8241957309746002, 'colsample_bytree': 0.9292069498011922, 'gamma': 0.14962932545535595, 'reg_alpha': 1.7132116652313856, 'reg_lambda': 0.6291456313400092}
thresh = 10
mae: 5.479008220644255
mape: 0.7942597206900811
mae_below_thresh: 2.5337296709830524
mape_above_thresh: 0.5136861277797157


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 189, 'learning_rate': 0.0910103547972209, 'n_estimators': 2198, 'subsample': 0.9840800164859206, 'colsample_bytree': 0.7139474168382589, 'gamma': 0.24479794513031147, 'reg_alpha': 0.6631740620262697, 'reg_lambda': 0.5633281388303897}
thresh = 10
mae: 5.482758552284083
mape: 0.8134293334872716
mae_below_thresh: 2.7242720436625425
mape_above_thresh: 0.5012578108873329


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 74, 'learning_rate': 0.08753264432376852, 'n_estimators': 146, 'subsample': 0.9954265659601024, 'colsample_bytree': 0.8257523190347631, 'gamma': 0.33610490578669655, 'reg_alpha': 0.12861406980896462, 'reg_lambda': 1.2819359379299133}
thresh = 10
mae: 5.2759385655810025
mape: 0.7636454594339611
mae_below_thresh: 2.569375943525343
mape_above_thresh: 0.47520248940905213


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 170, 'learning_rate': 0.09481870085563883, 'n_estimators': 2311, 'subsample': 0.9794336032465534, 'colsample_bytree': 0.6688842506126151, 'gamma': 0.33092064283171696, 'reg_alpha': 0.43923964716852226, 'reg_lambda': 1.1520766906780837}
thresh = 10
mae: 5.202049248798026
mape: 0.7506293214658508
mae_below_thresh: 2.5057710624864846
mape_above_thresh: 0.47344734126697763


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 170, 'learning_rate': 0.09909568485220556, 'n_estimators': 2676, 'subsample': 0.7536633950663953, 'colsample_bytree': 0.809934484806943, 'gamma': 0.39946017549142837, 'reg_alpha': 4.192645569083583, 'reg_lambda': 0.05124315114551903}
thresh = 10
mae: 5.168764088523782
mape: 0.699451298600674
mae_below_thresh: 2.510889933205622
mape_above_thresh: 0.45099652443351274


  0%|          | 0/50 [00:00<?, ?it/s]

Best parameters: {'max_depth': 166, 'learning_rate': 0.09996402664949106, 'n_estimators': 2081, 'subsample': 0.6704765897349877, 'colsample_bytree': 0.9336207934441351, 'gamma': 0.0919018995845214, 'reg_alpha': 1.2063726205994378, 'reg_lambda': 0.9915362267923705}
thresh = 10
mae: 5.85605155516002
mape: 0.8801005513631092
mae_below_thresh: 2.8860268630209513
mape_above_thresh: 0.5444116578097302


In [11]:
with pd.option_context('display.max_colwidth', None):
    display(model_scores.sort_values('mape_above_thresh'))

Unnamed: 0,mae,mape,mae_below_thresh,mape_above_thresh,features
0,4.892823,0.643505,2.270162,0.440853,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, duels90, duelswonpct, foot, height, weight, succssdefact90, defduels90, defduelswpct, aerduels90, aerduelspct, sltackles90, padjsltackles, shotblocks90, intrcpt90, padjintrcpt, fouls90, ycards90, rcards90, sattact90, goals90, npgoals90, xg90, hdrgoals90, shots90, ontrgtpct, goalcnvrsnpct, assists90, crosses90, acccrossespct, crossesleft90, acccrossesleftpct, crossesright90, acccrossesrightpct, crosses6yard90, dribbles90, succssdribblespct, offduels90, offduelspct, touchesbox90, prgruns90, acclr90, rcvdpasses90, rcvdlongpasses90, foulsa90, passes90, accpassespct, fwdpasses90, accfwdpassespct, bpasses90, accbpassespct, latpasses90, acclatpassespct, shortmedpasses90, accshortmedpassespct, longpasses90, acclongpassesct, avgpasslen, avglongpasslen, xa90, shotassist90, secassist90, thirdassist90, smartpasses90, accsmartpassespct, kp90, final3rdpasses90, accfinal3rdpassespct, penareapasses90, accpenareapassespct, thrpasses90, accthrpassespct, deepcomp90, deepcompcrosses90, prpasses90, accprpassespct, fk90, directfk90, directfkontrgtpct, corners90, pens, pencnvrspct]"
0,5.168764,0.699451,2.51089,0.450997,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, aerduels90, longpasses90, padjintrcpt, touchesbox90, acccrossesleftpct, accprpassespct, penareapasses90]"
0,5.202049,0.750629,2.505771,0.473447,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, aerduels90, longpasses90, padjintrcpt, touchesbox90, acccrossesleftpct, accprpassespct]"
0,5.275939,0.763645,2.569376,0.475202,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, aerduels90, longpasses90, padjintrcpt, touchesbox90, acccrossesleftpct, accprpassespct, penareapasses90, rcards90, avglongpasslen, directfkontrgtpct, corners90, defduels90, hdrgoals90, pencnvrspct, secassist90]"
0,5.397407,0.73934,2.572236,0.477587,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, aerduels90, longpasses90, padjintrcpt, touchesbox90, acccrossesleftpct, accprpassespct, penareapasses90, rcards90, avglongpasslen, directfkontrgtpct, corners90, defduels90, hdrgoals90]"
0,5.556196,0.90972,2.92657,0.480751,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, aerduels90, longpasses90, padjintrcpt, touchesbox90, acccrossesleftpct, accprpassespct, penareapasses90, rcards90, avglongpasslen, directfkontrgtpct, corners90]"
0,5.44337,0.797829,2.60287,0.498481,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, aerduels90, longpasses90, padjintrcpt]"
0,5.482759,0.813429,2.724272,0.501258,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, aerduels90, longpasses90, padjintrcpt, touchesbox90, acccrossesleftpct, accprpassespct, penareapasses90, rcards90, avglongpasslen, directfkontrgtpct, corners90, defduels90, hdrgoals90, pencnvrspct, secassist90, final3rdpasses90, acccrossespct, fk90]"
0,5.635083,0.832781,2.707132,0.506216,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, aerduels90, longpasses90, padjintrcpt, touchesbox90, acccrossesleftpct, accprpassespct, penareapasses90, rcards90, avglongpasslen]"
0,5.327534,0.770357,2.433828,0.51159,"[age, season, window, fee, club_from_elo, club_to_elo, league_from_elo, league_to_elo, marketval, matchesplayed, minsplayed, foot, height, weight, goals90, npgoals90, xg90, hdrgoals90, shots90, xa90, shotassist90, touchesbox90, prgruns90, acclr90, rcvdpasses90, smartpasses90, dribbles90, succssdribblespct]"
