In [1]:
import pandas as pd
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier
from joblib import load
import warnings
warnings.filterwarnings("ignore")

### UDFs

In [13]:
def create_inputs(path):
    df = pd.read_csv(path, index_col = 0)
    df = df[(df.odds_home_team_win != 0) & (df.odds_draw != 0) & (df.odds_away_team_win != 0)]
    
    df['home_team_relative_point_roll1_sum'] = df.home_team_point_roll1_sum - df.home_team_expected_point_roll1_sum
    df['home_team_relative_point_roll2_sum'] = df.home_team_point_roll2_sum - df.home_team_expected_point_roll2_sum
    df['home_team_relative_point_roll3_sum'] = df.home_team_point_roll3_sum - df.home_team_expected_point_roll3_sum
    df['home_team_relative_point_roll4_sum'] = df.home_team_point_roll4_sum - df.home_team_expected_point_roll4_sum

    df['away_team_relative_point_roll1_sum'] = df.away_team_point_roll1_sum - df.away_team_expected_point_roll1_sum
    df['away_team_relative_point_roll2_sum'] = df.away_team_point_roll2_sum - df.away_team_expected_point_roll2_sum
    df['away_team_relative_point_roll3_sum'] = df.away_team_point_roll3_sum - df.away_team_expected_point_roll3_sum
    df['away_team_relative_point_roll4_sum'] = df.away_team_point_roll4_sum - df.away_team_expected_point_roll4_sum

    df['prob_home_win'] = 1 / df.odds_home_team_win
    df['prob_draw'] = 1 / df.odds_draw
    df['prob_away_win'] = 1 / df.odds_away_team_win
    
    x_variables = list(set(df.columns) - set(['home_win_flag', 'draw_flag', 'away_win_flag', 'match_id', 'season', 'div']))
    
    return df[x_variables], df.match_id.tolist()

In [9]:
def predict(dataframe, matchid):
    
    model_home = load(model_home_path)
    model_away = load(model_away_path)
    model_draw = load(model_draw_path)
    
    y_pred_home = model_home.predict_proba(dataframe)
    y_pred_away = model_away.predict_proba(dataframe)
    y_pred_draw = model_draw.predict_proba(dataframe)
    
    df_predict = pd.DataFrame(
        {'match_id': matchid, 
         'home_win_p1': y_pred_home[:,1], 
         'draw_p1': y_pred_draw[:,1], 
         'away_win_p1': y_pred_away[:,1]})
    
    return df_predict
    

#### Predict

In [14]:
input_path = 'competition_table.csv'

In [5]:
model_home_path = 'gbm_home_cv.joblib'
model_away_path = 'gbm_away_cv.joblib'
model_draw_path = 'gbm_draw_cv.joblib'

In [None]:
df_input, ls_matchid = create_inputs(input_path)

In [10]:
df_predict = predict(df_input, ls_matchid)

In [12]:
df_predict.to_csv('t_model.csv', encoding = 'utf-8')