In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

import pickle


In [None]:
with open("../model/classification/finish_position_lr_model.pkl", "rb") as lr_model_file:
    lr = pickle.load(lr_model_file)

with open("../model/classification/finish_position_rfc_model.pkl", "rb") as rfc_model_file:
    rfc = pickle.load(rfc_model_file)

with open("../model/classification/finish_position_xgbc_model.pkl", "rb") as xgbc_model_file:
    xgbc = pickle.load(xgbc_model_file)

with open("../model/classification/finish_position_cbc_model.pkl", "rb") as cbc_model_file:
    cbc = pickle.load(cbc_model_file)


In [None]:
test_df = pd.read_csv("../eda/test.csv")
print(test_df.shape)
test_df.head()


In [None]:
test_df['finishing_position_class'] = test_df['finishing_position'].apply(lambda x: 1 if x <= 3 else 0)
test_df['finishing_position_class'].value_counts()


In [None]:
FEATURES = [
    'draw_segments', 'horse_number_segments', 'race_distance',
    'temperature_2m_max', 'wind_speed_10m_max',
    'proportion_of_additional_weight', 'track_width', 'track_moisture',
    'colour_segment', 'sex_segment', 'country_segment',
    'win_freq', 'place_freq', 'average_placing', 'average_speed',
    'average_rating', 'average_race_class'
]

X = test_df[FEATURES]
X.shape



In [None]:
y_pred_lr = lr.predict_proba(X)
y_pred_rfc = rfc.predict_proba(X)
y_pred_xgbc = xgbc.predict_proba(X)
y_pred_cbc = cbc.predict_proba(X)

test_df['pred_class_proba_lr'] = y_pred_lr[:, 1]
test_df['pred_class_proba_rfc'] = y_pred_rfc[:, 1]
test_df['pred_class_proba_xgb'] = y_pred_xgbc[:, 1]
test_df['pred_class_proba_cbc'] = y_pred_cbc[:, 1]
test_df.head()


In [None]:
test_df['implied_win_probability'] = 1 / test_df['win_odds']
test_df['pred_win_probability'] = (
  (test_df['pred_class_proba_rfc'] * (1/3)) + 
  (test_df['pred_class_proba_xgb'] * (1/3)) +
  (test_df['pred_class_proba_cbc'] * (1/3))
)
test_df['bet_action'] = test_df['pred_win_probability'] > test_df['implied_win_probability']
test_df.head()


In [None]:
test_df[[
  'race_id', 'race_date',
  'pred_class_proba_lr', 'pred_class_proba_rfc',
  'implied_win_probability', 'pred_win_probability', 'bet_action', 'finishing_position'
]].head(20)


In [None]:
test_df['cost'] = 100
test_df['actual_profit'] = test_df['bet_action'] * (
    test_df['win_odds'] * test_df['cost'] * (test_df['finishing_position'] == 1) - test_df['cost']
)


In [None]:
pnl = test_df.groupby('race_id').sum()['actual_profit']
cumulative_pnl = pnl.cumsum()
cumulative_pnl_df = cumulative_pnl.reset_index()
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df = cumulative_pnl_df.set_index('race_id').join(test_df[['race_date', 'race_id']].set_index('race_id'), how='inner').reset_index()
cumulative_pnl_df.drop_duplicates(inplace=True)
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df['race_date'] = pd.to_datetime(cumulative_pnl_df['race_date'])

fig = px.line(cumulative_pnl_df, x='race_date', y='actual_profit', title='Cumulative Profit and Loss with Finish Position Classifiers')
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Profit and Loss ($)')
fig.show()


#### Only bet on the top performing horse in each race

In [None]:
test_df['bet_action'] = test_df.groupby('race_id')['pred_win_probability'].transform('max')
test_df['bet_action'] = test_df['bet_action'] == test_df['pred_win_probability']
test_df['actual_profit'] = test_df['bet_action'] * (
    test_df['win_odds'] * test_df['cost'] * (test_df['finishing_position'] == 1) - test_df['cost']
)


In [None]:
pnl = test_df.groupby('race_id').sum()['actual_profit']
cumulative_pnl = pnl.cumsum()
cumulative_pnl_df = cumulative_pnl.reset_index()
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df = cumulative_pnl_df.set_index('race_id').join(test_df[['race_date', 'race_id']].set_index('race_id'), how='inner').reset_index()
cumulative_pnl_df.drop_duplicates(inplace=True)
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df['race_date'] = pd.to_datetime(cumulative_pnl_df['race_date'])

fig = px.line(cumulative_pnl_df, x='race_date', y='actual_profit', title='Cumulative Profit and Loss with Finish Position Classifiers using Flat Betting')
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Profit and Loss ($)')
fig.show()


In [None]:
cumulative_pnl_df.tail()


In [None]:
test_df[test_df['bet_action'] & test_df['finishing_position'] == 1]['bet_action'].count()


In [None]:
def print_stats(profit,cost, scale = 365):
    pnl = profit/cost
    mean = pnl.mean()
    std = pnl.std()
    median = pnl.median()
    cumsum_pnl = profit.cumsum()
    # append 0 to start of cumsum_pnl
    # cumsum_pnl = pd.concat([pd.Series([0]), cumsum_pnl], ignore_index=True)
    drawdown = (cumsum_pnl - cumsum_pnl.cummax())
    max_drawdown = min(drawdown.min(),cumsum_pnl.min())
    sharpe_ratio = mean / std * np.sqrt(scale)
    win_rate = (pnl > 0).sum() / (pnl != 0).sum()

    total_bets =  ((profit != 0)*cost).sum() 
    total_pct_return = cumsum_pnl[-1]/total_bets

    print("Win Rate:", round(win_rate*100,2), "%")
    print("Total Bets:", round(total_bets,2))
    print("Total Return:", round(cumsum_pnl[-1],2))
    print("Total Pct Return:", round(total_pct_return*100,2), "%")
    print("Mean: ", round(mean*100,2),"%")
    print("Standard Deviation: ", round(std*100,2),"%")
    print("Median: ", round(median*100,2),"%")
    print("Max Drawdown: ", round(max_drawdown,2))
    print("Sharpe Ratio: ", round(sharpe_ratio,4))

bets = test_df.groupby(["race_id"]).sum()
print_stats(
    bets["bet_action"] * bets["actual_profit"], 
    bets["bet_action"] * bets["cost"], 
    scale = 1
)


#### Optimal betting using Kelly Criterion

In [None]:
# set cost using kelley criterion
b = test_df['win_odds'] - 1
p = test_df['pred_win_probability']
q = 1 - p
betting_principal = 100
test_df['cost'] = ((b * p - q) / b) * betting_principal

# set cost to 0 if the cost is negative
test_df['cost'] = np.where(
    test_df['cost'] < 0, 0, test_df['cost']
)

test_df['expected_payoff'] = (
    test_df["win_odds"]
    * test_df['cost']
    * test_df['pred_win_probability']
)
test_df['expected_profit'] = (
    test_df['expected_payoff'] - test_df["cost"]
)


In [None]:
test_df['bet_action'] = test_df.groupby('race_id')['pred_win_probability'].transform('max')
test_df['bet_action'] = test_df['bet_action'] == test_df['pred_win_probability']
test_df['actual_profit'] = test_df['bet_action'] * (
    test_df['win_odds'] * test_df['cost'] * (test_df['finishing_position'] == 1) - test_df['cost']
)


#### First 20 Race Bets Outcome

In [None]:
test_df[(test_df['bet_action'] == True) | (test_df['finishing_position'] == 1)][
    [
        'race_id', 'pred_win_probability', 'win_odds', 'cost', 
        'expected_payoff', 'expected_profit', 'actual_profit', 'bet_action', 
        'finishing_position'
    ]
].head(20)


In [None]:
pnl = test_df.groupby('race_id').sum()['actual_profit']
cumulative_pnl = pnl.cumsum()
cumulative_pnl_df = cumulative_pnl.reset_index()
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df = cumulative_pnl_df.set_index('race_id').join(test_df[['race_date', 'race_id']].set_index('race_id'), how='inner').reset_index()
cumulative_pnl_df.drop_duplicates(inplace=True)
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df['race_date'] = pd.to_datetime(cumulative_pnl_df['race_date'])

fig = px.line(cumulative_pnl_df, x='race_date', y='actual_profit', title='Cumulative Profit and Loss with Finish Position Classifiers using Kelly Criterion')
fig.update_xaxes(title='Date')
fig.update_yaxes(title='Profit and Loss ($)')
fig.show()


In [None]:
cumulative_pnl_df.tail()


In [None]:
def print_stats(profit,cost, scale = 365):
    pnl = profit/cost
    mean = pnl.mean()
    std = pnl.std()
    median = pnl.median()
    cumsum_pnl = profit.cumsum()
    # append 0 to start of cumsum_pnl
    # cumsum_pnl = pd.concat([pd.Series([0]), cumsum_pnl], ignore_index=True)
    drawdown = (cumsum_pnl - cumsum_pnl.cummax())
    max_drawdown = min(drawdown.min(),cumsum_pnl.min())
    sharpe_ratio = mean / std * np.sqrt(scale)
    win_rate = (pnl > 0).sum() / (pnl != 0).sum()

    total_bets =  ((profit != 0)*cost).sum() 
    total_pct_return = cumsum_pnl[-1]/total_bets

    print("Win Rate:", round(win_rate*100,2), "%")
    print("Total Bets:", round(total_bets,2))
    print("Total Return:", round(cumsum_pnl[-1],2))
    print("Total Pct Return:", round(total_pct_return*100,2), "%")
    print("Mean: ", round(mean*100,2),"%")
    print("Standard Deviation: ", round(std*100,2),"%")
    print("Median: ", round(median*100,2),"%")
    print("Max Drawdown: ", round(max_drawdown,2))
    print("Sharpe Ratio: ", round(sharpe_ratio,4))

bets = test_df.groupby(["race_id"]).sum()
print_stats(
    bets["bet_action"] * bets["actual_profit"], 
    bets["bet_action"] * bets["cost"], 
    scale = 1
)
