In [30]:
# Standard Imports
import numpy as np

# Model 
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Model selection
import statistics
from sklearn.model_selection import KFold

# Modules
from data_gather_ff import make_dataset
import predict_ff as predff

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [32]:
X = make_dataset([2019, 2020, 2021])

# Cross Validation 

In [46]:
def mse(y_true, y_pred):
    return np.square(np.subtract(y_true, y_pred)).mean()

def abs_error(y_true, y_pred):
    return np.abs(np.subtract(y_true, y_pred)).mean()

kf = KFold(n_splits=5, 
           shuffle=True, 
           random_state=42)

# split()  method generate indices to split data into training and test set.
for k, (train_index, test_index) in enumerate(kf.split(X, X.goals)):
    print(f'Fold:{k}, Train set: {len(train_index)}, Test set:{len(test_index)}')

mse_list = list()
error_vec_list = list()

for k, (train_idx, val_idx) in enumerate(kf.split(X, X.goals)):
    # Fitting the model  
    poisson_model = smf.glm(
        formula="goals ~ team + opponent + team_odds + draw_odds + oppo_odds + team_fantasy_atk_str + team_fantasy_def_str + oppo_fantasy_atk_str + oppo_fantasy_def_str + team_goals_last5 + oppo_goals_last5 + home",  
        data=X.iloc[train_idx, :],
        family=sm.families.Poisson() 
    ).fit()
    
    # Getting the predictions 
    validation_predictions = []
    for i in val_idx:
        if X.home[i] == 1:
            X_home, X_away = predff.create_X(home_team=X.team[i],
                                      away_team=X.opponent[i],
                                      home_odds=X.team_odds[i],
                                      draw_odds=X.draw_odds[i],
                                      away_odds=X.oppo_odds[i],
                                      home_fan_atk_str=X.team_fantasy_atk_str[i],
                                      home_fan_def_str=X.team_fantasy_def_str[i],
                                      away_fan_atk_str=X.oppo_fantasy_atk_str[i],
                                      away_fan_def_str=X.oppo_fantasy_def_str[i],
                                      home_goals_last5=X.team_goals_last5[i],
                                      away_goals_last5=X.oppo_goals_last5[i])
            pred = predff.predict_score(X_home, X_away, poisson_model)[0][0]
            
        elif X.home[i] == 0:
            X_home, X_away = predff.create_X(home_team=X.opponent[i],
                                      away_team=X.team[i],
                                      home_odds=X.oppo_odds[i],
                                      draw_odds=X.draw_odds[i],
                                      away_odds=X.team_odds[i],
                                      home_fan_atk_str=X.oppo_fantasy_atk_str[i],
                                      home_fan_def_str=X.oppo_fantasy_def_str[i],
                                      away_fan_atk_str=X.team_fantasy_atk_str[i],
                                      away_fan_def_str=X.team_fantasy_def_str[i],
                                      home_goals_last5=X.oppo_goals_last5[i],
                                      away_goals_last5=X.team_goals_last5[i])
            pred = predff.predict_score(X_home, X_away, poisson_model)[0][1]

        validation_predictions.append(pred)

    error_vec = np.subtract(X.goals[val_idx].values, np.array(validation_predictions))

    error_vec_list.append(error_vec)
        
    # Calculating and printing the MSE for the current fold
    val_mse = mse(y_true=X.goals[val_idx], y_pred=validation_predictions)
    
    # val_abs_error = abs_error(y_true=X.goals[val_idx], y_pred=validation_predictions)
    
    print(f"Fold {k} MSE: {val_mse:0.5f}")
    mse_list.append(val_mse)
    
    
print(f"repeated CV MSE: {np.mean(mse_list):0.5f} (std={np.std(mse_list):0.5f})")

Fold:0, Train set: 1492, Test set:374
Fold:1, Train set: 1493, Test set:373
Fold:2, Train set: 1493, Test set:373
Fold:3, Train set: 1493, Test set:373
Fold:4, Train set: 1493, Test set:373
Fold 0 MSE: 1.73529
Fold 1 MSE: 1.59249
Fold 2 MSE: 1.98123
Fold 3 MSE: 1.39946
Fold 4 MSE: 1.62735
repeated CV MSE: 1.66717 (std=0.19084)
