In [3]:
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import precision_score

import xgboost as xgb

from src.helper import get_split_data

In [4]:
X_trn, y_trn, X_val, y_val, X_tst, y_tst = get_split_data.split_data_for_training(6)

In [5]:
params = {'colsample_bytree': 0.8829682348067726,
          'gamma': 2.153140019195803,
          'learning_rate': 0.2839181641252695,
          'max_depth': 10,
          'n_estimators': 750,
          'reg_alpha': 0.05456053939633371,
          'reg_lambda': 0.014211434927705319,
          'subsample': 0.8413541436147373
          }

xg = xgb.XGBClassifier(**params, random_state=42)
xg.fit(X_trn, y_trn, eval_set=[(X_trn, y_trn), (X_val, y_val)], verbose=False)
y_pred = xg.predict(X_tst)

precision_score(y_tst, y_pred, average='binary').round(3)

np.float64(0.333)

In [6]:
import xgbfir

xgbfir.saveXgbFI(xg, feature_names=X_trn.columns, OutputXlsxFile='fir.xlsx')

In [11]:
X_trn.columns

Index(['points_home', 'points_away', 'home_last_team_goal',
       'home_last_team_shoton', 'home_last_team_possession',
       'away_last_team_goal', 'away_last_team_shoton',
       'away_last_team_possession', 'team_strength_home', 'team_strength_away',
       'strength_difference', 'team_aggression_home', 'team_aggression_away',
       'aggression_difference', 'team_acceleration_home',
       'team_acceleration_away', 'acceleration_difference',
       'points_difference', 'goal_conversion_rate_home',
       'goal_conversion_rate_away', 'rolling_avg_goals_home',
       'rolling_avg_goals_away', 'season_2009/2010', 'season_2010/2011',
       'season_2011/2012', 'season_2012/2013', 'season_2013/2014',
       'season_2014/2015', 'season_2015/2016'],
      dtype='object')

In [9]:
fir = pd.read_excel('fir.xlsx')
fir.sort_values(by='Average Rank').head().round(1)

Unnamed: 0,Interaction,Gain,FScore,wFScore,Average wFScore,Average Gain,Expected Gain,Gain Rank,FScore Rank,wFScore Rank,Avg wFScore Rank,Avg Gain Rank,Expected Gain Rank,Average Rank,Average Tree Index,Average Tree Depth
0,points_difference,541.6,52,15.9,0.3,10.4,375.2,1,6,1,2,1,1,2.0,8.4,4.3
1,team_acceleration_home,341.9,78,8.9,0.1,4.4,48.2,2,1,2,14,8,2,4.8,6.5,5.8
4,team_acceleration_away,273.3,63,8.3,0.1,4.3,34.8,5,4,4,9,9,3,5.7,6.6,5.9
5,away_last_team_possession,232.8,54,8.5,0.2,4.3,32.2,6,5,3,6,10,5,5.8,7.8,5.2
2,team_aggression_away,280.6,67,7.8,0.1,4.2,27.1,3,2,5,13,13,6,7.0,11.1,5.5


In [10]:
pd.read_excel('fir.xlsx', sheet_name='Interaction Depth 1').iloc[:20].sort_values(by='Average Rank').head(10).round(1)

Unnamed: 0,Interaction,Gain,FScore,wFScore,Average wFScore,Average Gain,Expected Gain,Gain Rank,FScore Rank,wFScore Rank,Avg wFScore Rank,Avg Gain Rank,Expected Gain Rank,Average Rank,Average Tree Index,Average Tree Depth
1,points_difference|team_acceleration_home,290.0,10,2.8,0.3,29.0,116.2,2,3,2,8,2,2,3.2,2.0,4.0
0,points_difference|points_difference,394.0,8,2.9,0.4,49.3,200.4,1,17,1,1,1,1,3.7,16.0,2.5
2,points_difference|rolling_avg_goals_away,142.8,6,1.6,0.3,23.8,62.3,3,38,8,9,3,3,10.7,1.8,5.2
12,points_difference|team_acceleration_away,88.2,8,1.7,0.2,11.0,26.5,13,18,6,14,15,4,11.7,13.5,4.8
6,team_acceleration_home|team_aggression_home,104.0,12,1.9,0.2,8.7,11.8,7,2,5,21,69,12,19.3,3.8,6.4
15,team_acceleration_home|team_strength_home,81.5,8,1.3,0.2,10.2,13.6,16,19,18,20,34,10,19.5,6.9,3.6
10,goal_conversion_rate_away|team_acceleration_home,90.7,10,1.3,0.1,9.1,14.1,11,6,15,27,61,8,21.3,6.8,7.0
17,team_aggression_home|team_aggression_home,78.2,7,0.8,0.1,11.2,10.0,18,29,24,29,13,18,21.8,2.6,6.3
9,team_acceleration_away|team_aggression_away,95.4,9,0.6,0.1,10.6,5.0,10,10,33,53,25,39,28.3,11.8,5.7
19,away_last_team_possession|points_difference,76.4,6,0.4,0.1,12.7,12.7,20,39,51,50,6,11,29.5,2.8,5.3


In [13]:
(X_trn
 .assign(match_result=y_trn)
 .corr(method='spearman')
 .loc[:, ['points_difference', 'team_acceleration_home', 'rolling_avg_goals_away', 'team_acceleration_away', 'team_strength_home', 'rolling_avg_goals_home']]
 .style
 .background_gradient(cmap='RdBu', vmin=-1, vmax=1)
 .format('{:.2f}')
)

Unnamed: 0,points_difference,team_acceleration_home,rolling_avg_goals_away,team_acceleration_away,team_strength_home,rolling_avg_goals_home
points_home,0.28,0.12,0.0,-0.03,0.07,0.18
points_away,-0.31,-0.04,0.19,0.13,0.03,-0.01
home_last_team_goal,0.13,0.12,0.01,0.02,-0.03,0.55
home_last_team_shoton,0.14,0.13,0.03,-0.01,0.06,0.15
home_last_team_possession,-0.1,-0.1,-0.01,0.01,0.0,-0.14
away_last_team_goal,-0.16,-0.01,0.56,0.12,0.02,-0.01
away_last_team_shoton,-0.1,0.0,0.12,0.13,0.02,0.04
away_last_team_possession,0.12,0.02,-0.12,-0.09,0.02,-0.02
team_strength_home,0.06,-0.11,0.03,-0.0,1.0,0.03
team_strength_away,-0.1,-0.06,0.07,-0.11,0.01,0.01
