In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

import pickle


In [None]:
with open("../model/regression/race_speed_lasso_model.pkl", "rb") as lasso_model_file:
    lasso = pickle.load(lasso_model_file)

with open("../model/regression/race_speed_ridge_model.pkl", "rb") as ridge_model_file:
    ridge = pickle.load(ridge_model_file)

with open("../model/regression/race_speed_gbr_model.pkl", "rb") as gbr_model_file:
    gbr = pickle.load(gbr_model_file)

with open("../model/regression/race_speed_rfr_model.pkl", "rb") as rfr_model_file:
    rfr = pickle.load(rfr_model_file)


In [None]:
test_df = pd.read_csv("../eda/test.csv")
print(test_df.shape)
test_df.head()


In [None]:
FEATURES = [
    'draw_segments', 'horse_number_segments', 'race_distance',
    'temperature_2m_max', 'wind_speed_10m_max',
    'proportion_of_additional_weight', 'track_width', 'track_moisture',
    'colour_segment', 'sex_segment', 'country_segment',
    'win_freq', 'place_freq', 'average_placing', 'average_speed',
    'average_rating', 'average_race_class'
]

X = test_df[FEATURES]
X.shape


#### Use previously trained model to predict race speed and derive positions based on ranking of predicted speed for each race

In [None]:
y_pred_lasso = lasso.predict(X)
y_pred_ridge = ridge.predict(X)
y_pred_gbr = gbr.predict(X)
y_pred_rfr = rfr.predict(X)

test_df['pred_speed_lasso'] = y_pred_lasso
test_df['pred_speed_ridge'] = y_pred_ridge
test_df['pred_speed_gbr'] = y_pred_gbr
test_df['pred_speed_rfr'] = y_pred_rfr
test_df.head()


In [None]:
test_df['pred_position_lasso'] = test_df.groupby('race_id')['pred_speed_lasso'].rank(method='dense', ascending=False)
test_df['pred_position_ridge'] = test_df.groupby('race_id')['pred_speed_ridge'].rank(method='dense', ascending=False)
test_df['pred_position_gbr'] = test_df.groupby('race_id')['pred_speed_gbr'].rank(method='dense', ascending=False)
test_df['pred_position_rfr'] = test_df.groupby('race_id')['pred_speed_rfr'].rank(method='dense', ascending=False)
test_df.head()


# Making Betting Decisions

In [None]:
test_df['implied_win_probability'] = 1 / test_df['win_odds']
test_df['pred_win_probability'] = (
  (1 / test_df['pred_position_lasso'] * 0.25) + 
  (1 / test_df['pred_position_ridge'] * 0.25) + 
  (1 / test_df['pred_position_gbr'] * 0.25) + 
  (1 / test_df['pred_position_rfr'] * 0.25)
)
test_df['bet_action'] = test_df['pred_win_probability'] > test_df['implied_win_probability']
test_df.head()


In [None]:
test_df[[
  'race_id', 'race_date',
  'pred_speed_lasso', 'pred_speed_ridge', 'pred_speed_gbr', 'pred_speed_rfr',
  'pred_position_lasso', 'pred_position_ridge', 'pred_position_gbr', 'pred_position_rfr',
  'implied_win_probability', 'pred_win_probability', 'bet_action', 'finishing_position'
]].head(20)


In [None]:
test_df['cost'] = 100
test_df['actual_profit'] = test_df['bet_action'] * (
    test_df['win_odds'] * test_df['cost'] * (test_df['finishing_position'] == 1) - test_df['cost']
)


In [None]:
pnl = test_df.groupby('race_id').sum()['actual_profit']
cumulative_pnl = pnl.cumsum()
cumulative_pnl_df = cumulative_pnl.reset_index()
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df = cumulative_pnl_df.set_index('race_id').join(test_df[['race_date', 'race_id']].set_index('race_id'), how='inner').reset_index()
cumulative_pnl_df.drop_duplicates(inplace=True)
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df['race_date'] = pd.to_datetime(cumulative_pnl_df['race_date'])

fig = px.line(cumulative_pnl_df, x='race_date', y='actual_profit', title='Cumulative Profit and Loss with Race Speed Regressors')
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Profit and Loss ($)')
fig.show()


#### Only bet on the top performing horse in each race

In [None]:
test_df['bet_action'] = test_df.groupby('race_id')['pred_win_probability'].transform('max')
test_df['bet_action'] = test_df['bet_action'] == test_df['pred_win_probability']
test_df['actual_profit'] = test_df['bet_action'] * (
    test_df['win_odds'] * test_df['cost'] * (test_df['finishing_position'] == 1) - test_df['cost']
)


In [None]:
pnl = test_df.groupby('race_id').sum()['actual_profit']
cumulative_pnl = pnl.cumsum()
cumulative_pnl_df = cumulative_pnl.reset_index()
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df = cumulative_pnl_df.set_index('race_id').join(test_df[['race_date', 'race_id']].set_index('race_id'), how='inner').reset_index()
cumulative_pnl_df.drop_duplicates(inplace=True)
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df['race_date'] = pd.to_datetime(cumulative_pnl_df['race_date'])

fig = px.line(cumulative_pnl_df, x='race_date', y='actual_profit', title='Cumulative Profit and Loss with Race Speed Regressors')
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Profit and Loss ($)')
fig.show()


In [None]:
cumulative_pnl_df.tail()


In [None]:
test_df[test_df['bet_action'] & test_df['finishing_position'] == 1]['bet_action'].count()
