### This scripts takes about 1 1/2 minutes seconds to execute

This idea of this notebook is to create a first (simple) DL model using all the same features used in the multinomial logit model.

Much inspiration was derived from https://towardsdatascience.com/use-machine-learning-to-predict-horse-racing-4f1111fb6ced.

In [None]:
import math
from importlib import reload
import deeplearninglib
reload(deeplearninglib)
from deeplearninglib import *

import wandb

# device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cpu"
print(f"Using {device} device")

In [None]:
# select model to train

model_inventory = {'mktprob': {'XZ_columns': ["mkt_prob"],
                               'continuous_features': ["mkt_prob"],
                               'learning_rate': 10e-1,
                               'epochs': 50,
                               'vacant_stall_indicator': False,
                               'bias': True,
                               'model_architecture': LinSig},
                   'mktprob_soft': {'XZ_columns': ["mkt_prob"],
                               'continuous_features': ["mkt_prob"],
                               'learning_rate': 10e-1,
                               'epochs': 50,
                               'vacant_stall_indicator': False,
                               'bias': True,
                               'model_architecture': LinSoft},
                    'mktprob_MLR': {'XZ_columns': ["mkt_prob"],
                               'continuous_features': ["mkt_prob"],
                               'learning_rate': 10e-1,
                               'epochs': 50,
                               'vacant_stall_indicator': False,
                               'bias': True,
                               'model_architecture': MLR},
                   'AlunOwen_v0': {'XZ_columns': ["age", "sire_sr", "dam_sr", "trainer_sr", "daysLTO", "position1_1", "position1_2", "position1_3", "position1_4", "position2_1", "position2_2", "position2_3", "position2_4", "position3_1", "position3_2", "position3_3", "position3_4", "entire", "gelding", "blinkers", "visor", "cheekpieces", "tonguetie"],
                                   'continuous_features': ["age", "sire_sr", "dam_sr", "trainer_sr", "daysLTO"],
                                   'learning_rate': 10e-3,
                                   'epochs': 100,
                                   'vacant_stall_indicator': False,
                                   'bias': True,
                                   'model_architecture': LinSig},
                   'AlunOwen_v1': {'XZ_columns': ["age", "trainer_sr", "daysLTO", "position1_1", "position1_2", "position1_3", "position1_4", "position2_1", "position2_2", "position2_3", "position2_4", "position3_1", "position3_2", "position3_3", "position3_4", "entire", "gelding", "blinkers", "cheekpieces", "tonguetie"],
                                   'continuous_features': ["age", "trainer_sr", "daysLTO"],
                                   'learning_rate': 10e-3,
                                   'epochs': 100,
                                   'vacant_stall_indicator': False,
                                   'bias': True,
                                   'model_architecture': LinSig},
                   'AlunOwen_v2': {'XZ_columns': ["age", "trainer_sr", "daysLTO", "position1_1", "position1_2", "position1_3", "position1_4", "position2_1", "position2_2", "position2_3", "position2_4", "position3_1", "position3_2", "position3_3", "position3_4", "entire", "gelding", "blinkers", "cheekpieces", "tonguetie"],
                                   'continuous_features': ["age", "trainer_sr", "daysLTO"],
                                   'learning_rate': 10e-3,
                                   'epochs': 100,
                                   'vacant_stall_indicator': False,
                                   'bias': True,
                                   'model_architecture': LinDropReluLinSoft},
                   'AlunOwen_v3': {'XZ_columns': ["age", "trainer_sr", "daysLTO", "position1_1", "position1_2", "position1_3", "position1_4", "position2_1", "position2_2", "position2_3", "position2_4", "position3_1", "position3_2", "position3_3", "position3_4", "entire", "gelding", "blinkers", "cheekpieces", "tonguetie"],
                                   'continuous_features': ["age", "trainer_sr", "daysLTO"],
                                   'learning_rate': 10e-3,
                                   'epochs': 100,
                                   'vacant_stall_indicator': False,
                                   'bias': False,
                                   'model_architecture': MLR},
                   'AlunOwen_v4': {'XZ_columns': ["age", "trainer_sr", "daysLTO", "position1_1", "position1_2", "position1_3", "position1_4", "position2_1", "position2_2", "position2_3", "position2_4", "position3_1", "position3_2", "position3_3", "position3_4", "entire", "gelding", "blinkers", "cheekpieces", "tonguetie"],
                                   'continuous_features': [],
                                   'learning_rate': 10e-3,
                                   'epochs': 100,
                                   'vacant_stall_indicator': False,
                                   'bias': False,
                                   'model_architecture': MLR},
                   'test_v1': {'XZ_columns': ["age", "trainer_sr", "daysLTO", "position1_1", "position1_2", "position1_3", "position1_4", "position2_1", "position2_2", "position2_3", "position2_4", "position3_1", "position3_2", "position3_3", "position3_4", "entire", "gelding", "blinkers", "cheekpieces", "tonguetie", "course_Kempton", "course_Lingfield", "course_Southwell", "course_Wolverhampton", "going_Firm", "going_Good", "going_Good to Firm", "going_Good to Soft", "going_Slow", "going_Soft", "going_Standard", "going_Standard to Slow", "direction_Left Handed", "direction_Right Handed"],
                                   'continuous_features': [],
                                   'learning_rate': 10e-3,
                                   'epochs': 100,
                                   'vacant_stall_indicator': False,
                                   'bias': False,
                                   'model_architecture': LinDropReluLinSoft}
                               }

model = 'AlunOwen_v3'
XZ_columns = model_inventory[model]['XZ_columns']
continuous_features = model_inventory[model]['continuous_features']
learning_rate = model_inventory[model]['learning_rate']
epochs = model_inventory[model]['epochs']
vacant_stall_indicator = model_inventory[model]['vacant_stall_indicator']
bias = model_inventory[model]['bias']
model_architecture = model_inventory[model]['model_architecture']

In [None]:
# read in data

y_columns = ["win"] # assumed to be contained in runners files

runners_train_data_fn = "data\\runners_train.csv"
races_train_data_fn = "data\\races_train.csv"
runners_validate_data_fn = "data\\runners_validate.csv"
races_validate_data_fn = "data\\races_validate.csv"

train_data = RacesDataset(runners_train_data_fn, races_train_data_fn, XZ_columns, y_columns, vacant_stall_indicator=vacant_stall_indicator, continuous_features=continuous_features)
validate_data = RacesDataset(runners_validate_data_fn, races_validate_data_fn, XZ_columns, y_columns, vacant_stall_indicator=vacant_stall_indicator, scalar=train_data.scalar, continuous_features=continuous_features)

train_dataloader = DataLoader(train_data, batch_size=64)
validate_dataloader = DataLoader(validate_data, batch_size=64)

In [None]:
# prepare and save test data for use by Dividend Project Model; then delete them to ensure they're not used during training or validation

# note, test data is model-specific

runners_test_data_fn = "data\\runners_test.csv"
races_test_data_fn = "data\\races_test.csv"

test_data = RacesDataset(runners_test_data_fn, races_test_data_fn, XZ_columns, y_columns, vacant_stall_indicator=vacant_stall_indicator, scalar=train_data.scalar, continuous_features=continuous_features)

torch.save(test_data, "data\\" + model + "_test_data.pt")

del test_data

In [None]:
pd.options.display.max_columns = 1000 # was 20
train_data.runners_wide.loc[:, train_data.X_columns].head()

In [None]:
assert model_architecture != "MLR" or train_data.races is None, "Use of MLR with race-specific variables isn't yet supported"
train_data.races.head()

In [None]:
# build the neural network

output_layer_nodes = train_data.y.shape[1]
if train_data.Z is not None:
    input_layer_nodes = train_data.X.shape[1] + train_data.Z.shape[1]
else:
    input_layer_nodes = train_data.X.shape[1]

torch.manual_seed(0)
net = model_architecture(input_layer_nodes, output_layer_nodes, bias=bias).to(device) # linear-relu-linear-softwax nn (1 hidden layer)
print(f"Model structure: {model}")

for name, param in net.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param} \n")

In [None]:
# example to show how model is used from prediction

inputs = torch.rand(1, input_layer_nodes, device=device)
logits = net(inputs)
y_pred = logits.argmax(1)
print(f"Predicted class: {y_pred}")
print(inputs.shape)

In [None]:
%env WANDB_NOTEBOOK_NAME 'C:\Users\gille\OneDrive\1-Projects\_Horse Racing 2H22\New Framework\3b_Deep Learning.ipynb'

# start a new wandb run to track this script
wandb.init(
    # set the wandb project where this run will be logged
    project="horse-racing-project",
    
    # track hyperparameters and run metadata
    config={
    "device": device,
    "model": model,
    "XZ_columns": XZ_columns,
    "continuous_featurs": continuous_features,
    "learning_rate": learning_rate,
    "epochs": epochs,
    "vacant_stall_indicator": vacant_stall_indicator,
    "bias": bias,
    "model_architecture": list(net.modules())
    }
)

In [None]:
# optimizing model parameters

# initialize the loss function
loss_fn = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, net, loss_fn, optimizer, device)
    (acc, loss) = validate_loop(validate_dataloader, net, loss_fn, device)
    wandb.log({"acc": acc, "loss": loss})
print("Done!")

In [None]:
# finish the wandb run, necessary in notebooks
wandb.finish()

In [None]:
for para_name, para_vals in net.named_parameters():
    np.savetxt("weights and biases\\" + para_name + ".csv", para_vals.data.numpy(), fmt='%6.3f', delimiter=",")

In [None]:
# save trained model for use by Dividend Project Model
torch.save(net, "models\\" + model + ".pt")