In [1]:
from Preprocessing.data_clean import get_data, dropping_no_betting_data
from sklearn.preprocessing import MinMaxScaler
from sklearn import set_config
from sklearn.impute import SimpleImputer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense, Dropout
from keras.utils import get_custom_objects
import pandas as pd
import numpy as np
import pickle
import warnings

warnings.filterwarnings('ignore')

In [2]:

"""1)  Getting the data and first round of preprocessing"""
###receiving all columns of data for user selected rows (by track and time)

#Get the data from the bucket
data = get_data("../../../raw_data/hr_data_0409_221rem.csv") ### CHANGE THIS PATH to get data from the bucket
print("data loaded")
#Fill in the missing odds
data = dropping_no_betting_data(data)
#Drop stall_position NAs
data = data[data['stall_position'].notna()]
#Remove horses with odds over 50 at 5m before the race
data = data[(data['f_pm_05m'] <= 50)]
#Reset index
data = data.reset_index(drop=True)

data acquired with shape (120675, 116)
data loaded
Cleaned up missing odds. New shape = (120414, 116)


In [3]:
"""2)  Filling the null L16 columns with 0s"""

X_preproc = data[[
          'iv_trainer_l16r', 'iv_jockey_l16r',
          'ae_trainer_l16r' ,'ae_jockey_l16r']]
X_preproc = X_preproc.fillna(0)


In [4]:
"""3)  Scaling the numerical values and defining X"""

#Adding f_runners and stall_position to X_preproc pre-scaling
X_preproc['f_runners'] = data['f_runners']
X_preproc['stall_position'] = data['stall_position']


#Loading scaler values and scaling 5 features
set_config(transform_output = "pandas")
with open('../../../Models/weights-JStone2609/scaler_updated2.pkl', 'rb') as f: # CHANGE THIS PATH to get the saved scalar
    loaded_scaler = pickle.load(f)
X = loaded_scaler.transform(X_preproc)

#Adding final 2 features that don't need scaling
X['pred_isp_prob'] = 1 / data['pred_isp']

#Matching the column order to the order of the original saved weights
X = X[['stall_position', 'iv_trainer_l16r', 'iv_jockey_l16r', 'ae_trainer_l16r', 'ae_jockey_l16r', 'pred_isp_prob', 'f_runners']]

In [5]:
"""5) Defining backtest and changing commision to 2%"""

# Define a function to create a new profit column with 2% commision
def fill_01m_profit(data):
    def fill_nan(row):
        if row['f_place'] == 0:
            return -1
        elif row['f_place'] == 1:
            return (row['f_pm_01m'] - 1) * 0.98
        else:
            return row['f_pm_01m_p_back']

# Apply the lambda function to create 01m_profit column
    data['01m_profit'] = data.apply(fill_nan, axis=1)
    return data

data = fill_01m_profit(data)

backtest = data[['f_ko', 'f_track', 'f_id', 'id','f_horse','f_pm_01m', '01m_profit', 'f_place']]

In [6]:
"""6)  Model Architecture"""

NN = Sequential()
NN.add(InputLayer(input_shape=(7, ))) # input layer
NN.add(Dense(32, activation='relu')) # hidden layer 1
NN.add(Dense(2, activation='softmax')) # output layer

In [7]:

"""7)  Loading Weights"""

NN.load_weights("../../../Models/weights-JStone2609/custom_scorer0.05_7input_l16_05mfilter_01mplace") ##CHANGE PATH TO LOAD MODEL WEIGHTS

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x28c289450>

In [8]:
X_test = X.iloc[70000:]

In [9]:
backtest_test = backtest.iloc[70000:]

In [10]:
y_pred = NN.predict(X_test)



In [11]:
"""9) Creating backtest table"""

backtest_test['model_preds'] = y_pred[:, 0:1]
backtest_test['model_preds'] = round(backtest_test['model_preds'],2)
backtest_test = backtest_test.sort_values(['model_preds'], ascending = False)
backtest_live = backtest_test.drop(columns=['f_id', 'id', 'f_place'])
def bet_or_nobet(x):
        if x >= 0.5:
            return "BET"
        else:
            return "NO BET"
backtest_live['bet'] = backtest_live['model_preds'].apply(bet_or_nobet)

backtest_live

Unnamed: 0,f_ko,f_track,f_horse,f_pm_01m,01m_profit,model_preds,bet
99801,24/07/2023 19:40,BALLINROBE,Beer With The Boys,46.0,-1.000,1.0,BET
94941,24/06/2023 17:05,DOWN ROYAL,Universally,70.0,-1.000,1.0,BET
104497,22/08/2023 20:15,ROSCOMMON,Blackstone Cliff,13.0,-1.000,1.0,BET
72424,26/10/2022 16:35,CURRAGH,Shur Lookit,42.0,-1.000,1.0,BET
72425,26/10/2022 16:35,CURRAGH,Flier,38.0,-1.000,1.0,BET
...,...,...,...,...,...,...,...
78064,21/01/2023 14:47,LINGFIELD,Makinmedoit,10.0,-1.000,0.0,NO BET
78065,21/01/2023 14:47,LINGFIELD,Tequilamockingbird,15.5,-1.000,0.0,NO BET
89998,26/05/2023 19:30,CURRAGH,Rahmi,25.0,23.520,0.0,NO BET
89997,26/05/2023 19:30,CURRAGH,Earls,28.0,-1.000,0.0,NO BET


In [16]:
bets50 = backtest_live[backtest_live['model_preds'] >= 0.5]

In [19]:
bets50.to_csv("bets50.csv")

In [None]:
# import matplotlib.pyplot as plt
# def calculate_cumulative_profit(df, threshold):
#     df = backtest_live[backtest_live['model_preds'] >= threshold]
#     return df['01m_profit'].cumsum()
# thresholds = [0.5, 0.9]

# for threshold in thresholds:
#     plt.plot(calculate_cumulative_profit(backtest_live, threshold), label=f"Threshold {threshold}")
# plt.title("Cumulative Profits based on Model's Predictions")
# plt.legend()
# plt.show()

In [None]:
plt.figure(figsize=(10,6))

def calculate_cumulative_profit_over_time(backtest_live, threshold):
    df = backtest_live[backtest_live['model_preds'] >= threshold].sort_values(by='f_ko') # Filter based on model prediction threshold
    df['cumulative_profit'] = backtest_live['01m_profit'].cumsum() # Sum the profits
    return df[['f_ko', 'cumulative_profit']]

# Plot profits for each threshold
thresholds = [0.5, 0.9]
for threshold in thresholds:
    result = calculate_cumulative_profit_over_time(backtest_live, threshold)
    plt.plot(result['f_ko'], result['cumulative_profit'], label=f"Threshold {threshold}")

plt.title("Cumulative Profits Over Time based on Model's Predictions")
plt.legend()
plt.xlabel("Time")
plt.ylabel("Cumulative Profit")
plt.grid(True)
plt.tight_layout()
plt.show()