In [5]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import precision_score, f1_score

import xgboost as xgb

from src.helper import get_split_data

In [6]:
X_trn, y_trn, X_val, y_val, X_tst, y_tst = get_split_data.split_data_for_training(6)

In [7]:
params = {'colsample_bytree': 0.8500456771841122,
          'gamma': 3.523272196606738,
          'learning_rate': 0.28680136482199026,
          'max_depth': 7,
          'n_estimators': 550,
          'reg_alpha': 0.017890822660235197,
          'reg_lambda': 0.048046389257160545,
          'subsample': 0.8687964543256418}

y_train_value_counts = y_trn.value_counts()
ratio = y_train_value_counts[0] / y_train_value_counts[1]

model = xgb.XGBClassifier(
    **params,
    random_state=42,
    use_label_encoder=False,
    eval_metric="logloss",
    scale_pos_weight=ratio
)

model.fit(X_trn, y_trn, eval_set=[(X_trn, y_trn), (X_val, y_val)], verbose=False)
y_pred = model.predict(X_tst)

f1_score(y_tst, y_pred, average='binary').round(3)

np.float64(0.333)

In [8]:
import xgbfir

xgbfir.saveXgbFI(model, feature_names=X_trn.columns, OutputXlsxFile='fir.xlsx')

In [9]:
X_trn.columns

Index(['points_home', 'points_away', 'home_last_team_goal',
       'home_last_team_shoton', 'home_last_team_possession',
       'away_last_team_goal', 'away_last_team_shoton',
       'away_last_team_possession', 'team_strength_home', 'team_strength_away',
       'strength_difference', 'team_aggression_home', 'team_aggression_away',
       'aggression_difference', 'team_acceleration_home',
       'team_acceleration_away', 'acceleration_difference',
       'points_difference', 'goal_conversion_rate_home',
       'goal_conversion_rate_away', 'rolling_avg_goals_home',
       'rolling_goal_stability_home', 'rolling_avg_goals_away',
       'rolling_goal_stability_away', 'possession_strength_ratio_home',
       'possession_strength_ratio_away', 'rolling_avg_goals_ratio_away',
       'points_diff_accel_home', 'points_diff_accel_away',
       'accel_home_strength_home', 'accel_home_strength_away',
       'home_possession_strength_home', 'away_possession_strength_away',
       'aggression_physic

In [10]:
fir = pd.read_excel('fir.xlsx')
fir.sort_values(by='Average Rank').head().round(1)

Unnamed: 0,Interaction,Gain,FScore,wFScore,Average wFScore,Average Gain,Expected Gain,Gain Rank,FScore Rank,wFScore Rank,Avg wFScore Rank,Avg Gain Rank,Expected Gain Rank,Average Rank,Average Tree Index,Average Tree Depth
0,points_diff_accel_home,430.7,20,7.0,0.3,21.5,345.8,1,7,1,2,1,1,2.2,7.6,3.6
2,rolling_avg_goals_ratio_away,187.7,25,6.2,0.2,7.5,68.4,3,3,3,7,5,2,3.8,17.6,3.6
6,points_diff_accel_away,130.7,15,4.8,0.3,8.7,45.8,7,19,6,4,3,4,7.2,6.2,2.9
3,rolling_goal_stability_away,177.5,30,6.6,0.2,5.9,36.5,4,2,2,14,18,5,7.5,10.4,3.7
8,acceleration_difference,126.0,19,5.3,0.3,6.6,33.9,9,9,5,6,9,7,7.5,6.9,3.8


In [11]:
pd.read_excel('fir.xlsx', sheet_name='Interaction Depth 1').iloc[:20].sort_values(by='Average Rank').head(10).round(1)

Unnamed: 0,Interaction,Gain,FScore,wFScore,Average wFScore,Average Gain,Expected Gain,Gain Rank,FScore Rank,wFScore Rank,Avg wFScore Rank,Avg Gain Rank,Expected Gain Rank,Average Rank,Average Tree Index,Average Tree Depth
1,points_diff_accel_home|team_acceleration_home,200.7,3,1.2,0.4,66.9,64.4,2,17,5,9,4,3,6.7,10.3,2.7
4,points_diff_accel_away|rolling_avg_goals_ratio...,87.8,3,1.3,0.4,29.3,35.9,5,18,4,8,9,6,8.3,2.7,2.7
5,rolling_avg_goals_away|rolling_avg_goals_ratio...,68.0,3,0.9,0.3,22.7,20.2,6,19,10,20,16,10,13.5,48.3,1.7
0,points_diff_accel_home|rolling_avg_goals_ratio...,205.8,2,0.8,0.4,102.9,144.9,1,46,16,14,3,1,13.5,0.5,3.5
9,rolling_avg_goals_home|rolling_goal_stability_...,62.7,5,2.0,0.4,12.5,27.9,10,1,1,11,74,8,17.5,9.2,2.6
7,points_diff_accel_home|rolling_avg_goals_home,65.0,4,0.6,0.2,16.2,13.3,8,4,21,38,30,15,19.3,5.5,3.5
17,away_possession_strength_away|team_acceleratio...,53.0,4,1.5,0.4,13.3,18.8,18,10,2,15,62,11,19.7,6.5,2.8
13,points_diff_accel_home|rolling_goal_stability_...,60.2,4,0.8,0.2,15.0,11.6,14,6,11,32,41,21,20.8,9.8,5.2
3,away_last_team_shoton|points_diff_accel_home,115.8,1,0.6,0.6,115.8,69.7,4,92,24,2,2,2,21.0,1.0,1.0
11,points_diff_accel_home|rolling_avg_goals_away,60.6,1,0.6,0.6,60.6,36.3,12,94,25,3,6,5,24.2,2.0,1.0


In [13]:
X_trn[['points_diff_accel_home']].describe().round(2)

Unnamed: 0,points_diff_accel_home
count,2391.0
mean,541.04
std,71472.56
min,-286997.56
25%,-34512.67
50%,0.0
75%,30597.05
max,308874.64
