# Model Training

In [1]:
import sys
import pandas as pd
import sqlite3
import pathlib
import numpy as np

sys.path.append("model-training") 
import modelling_functions as mf
import training_config as tc

In [2]:
# Get to the root directory
project_root = pathlib.Path().absolute().parent.parent

# Now construct the relative path to your SQLite database
db_path = project_root / "data" / "footy-tipper-db.sqlite"

# Connect to the SQLite database
con = sqlite3.connect(str(db_path))

# Read SQL query from external SQL file
with open('footy_tipping_data.sql', 'r') as file:
    query = file.read()

footy_tipping_data = pd.read_sql_query(query, con)

# Don't forget to close the connection
con.close()

footy_tipping_data

Unnamed: 0,game_id,round_id,round_name,game_number,game_state_name,start_time,start_time_utc,venue_name,city,crowd,...,start_hour,game_day,matchup_form,state_of_origin,home_elo,away_elo,home_elo_prob,away_elo_prob,draw_prob,home_ground_advantage
0,2.012111e+10,1.0,Round 1,1.0,Final,1.330600e+09,1.330560e+09,McDonald Jones Stadium,Newcastle,29189.0,...,11,Thursday,0.0,0.0,1500.000000,1500.000000,0.482209,0.475102,0.042689,
1,2.012111e+10,1.0,Round 1,2.0,Final,1.330686e+09,1.330646e+09,Bankwest Stadium,Sydney,11399.0,...,11,Friday,0.0,0.0,1500.000000,1500.000000,0.482209,0.475102,0.042689,
2,2.012111e+10,1.0,Round 1,3.0,Final,1.330772e+09,1.330733e+09,Canberra Stadium,Canberra,7862.0,...,11,Saturday,0.0,0.0,1500.000000,1500.000000,0.482209,0.475102,0.042689,
3,2.012111e+10,1.0,Round 1,4.0,Final,1.330772e+09,1.330733e+09,Panthers Stadium,Penrith,9585.0,...,11,Saturday,0.0,0.0,1500.000000,1500.000000,0.482209,0.475102,0.042689,
4,2.012111e+10,1.0,Round 1,5.0,Final,1.330769e+09,1.330733e+09,1300SMILES Stadium,Townsville,16311.0,...,10,Saturday,0.0,0.0,1500.000000,1500.000000,0.482209,0.475102,0.042689,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2378,2.023111e+10,27.0,Round 27,4.0,Pre Game,1.693667e+09,1.693631e+09,Suncorp Stadium,Brisbane,,...,15,Saturday,0.0,0.0,1486.538358,1507.795919,0.458027,0.510018,0.031955,
2379,2.023111e+10,27.0,Round 27,5.0,Pre Game,1.693676e+09,1.693640e+09,BlueBet Stadium,Penrith,,...,17,Saturday,3.0,0.0,1519.532326,1505.086448,0.502092,0.455218,0.042689,
2380,2.023111e+10,27.0,Round 27,6.0,Pre Game,1.693683e+09,1.693647e+09,Netstrata Jubilee Stadium,Sydney,,...,19,Saturday,3.0,0.0,1493.680009,1490.370732,0.486767,0.470543,0.042689,
2381,2.023111e+10,27.0,Round 27,7.0,Pre Game,1.693750e+09,1.693714e+09,Cbus Super Stadium,Gold Coast,,...,14,Sunday,1.0,0.0,1502.449746,1478.795931,0.517834,0.445253,0.036913,


## Modelling

In [3]:
best_model, X_inference, label_encoder, game_id_inference = mf.train_and_select_best_model(
    footy_tipping_data, tc.predictors, tc.outcome_var,
    tc.use_rfe, tc.num_folds, tc.opt_metric
)

best_model

Fitting 5 folds for each of 729 candidates, totalling 3645 fits
{'colsample_bytree': 0.5, 'gamma': 0, 'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 100, 'subsample': 0.9}
0.7809388033164562
Fitting 5 folds for each of 432 candidates, totalling 2160 fits
{'bootstrap': True, 'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 5, 'n_estimators': 200}
0.7723592654415138
Fitting 5 folds for each of 1458 candidates, totalling 7290 fits
{'learning_rate': 0.01, 'max_depth': 4, 'max_features': 'log2', 'min_samples_leaf': 4, 'min_samples_split': 5, 'n_estimators': 50, 'subsample': 1.0}
0.7899428816219805


## Make Predictions

In [4]:
predictions_df = mf.model_predictions(best_model, X_inference, label_encoder, game_id_inference)
predictions_df

Unnamed: 0,game_id,home_team_result,home_team_win_prob,home_team_lose_prob
0,20231110000.0,Win,0.708443,0.291557
1,20231110000.0,Win,0.553274,0.446726
2,20231110000.0,Win,0.552467,0.447533
3,20231110000.0,Win,0.652813,0.347187
4,20231110000.0,Win,0.710377,0.289623
5,20231110000.0,Win,0.697508,0.302492
6,20231110000.0,Win,0.553493,0.446507
7,20231110000.0,Win,0.660299,0.339701


## Write them back to the database

In [5]:
# Connect to the SQLite database
con = sqlite3.connect(str(db_path))

# Read SQL query from external SQL file and create table
with open('create_table.sql', 'r') as file:
    create_table_query = file.read()
con.execute(create_table_query)

# Read SQL query from external SQL file for insertion
with open('insert_into_table.sql', 'r') as file:
    insert_into_table_query = file.read()

# Write each row in the DataFrame to the database
for index, row in predictions_df.iterrows():
    con.execute(insert_into_table_query, (
        row['game_id'], 
        row['home_team_result'],
        row['home_team_win_prob'],
        row['home_team_lose_prob']
    ))

# Commit the transaction
con.commit()

# Close the connection
con.close()