In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

import pickle


In [None]:
with open("../model/regression/finish_time_lasso_model.pkl", "rb") as lasso_model_file:
    lasso = pickle.load(lasso_model_file)

with open("../model/regression/finish_time_ridge_model.pkl", "rb") as ridge_model_file:
    ridge = pickle.load(ridge_model_file)

with open("../model/regression/finish_time_gbr_model.pkl", "rb") as gbr_model_file:
    gbr = pickle.load(gbr_model_file)

with open("../model/regression/finish_time_rfr_model.pkl", "rb") as rfr_model_file:
    rfr = pickle.load(rfr_model_file)


In [None]:
test_df = pd.read_csv("../eda/test.csv")
print(test_df.shape)
test_df.head()


In [None]:
test_df['finishing_position'] = test_df['finishing_position'].astype(int)


In [None]:
FEATURES = [
    'draw_segments', 'horse_number_segments', 'race_distance',
    'temperature_2m_max', 'wind_speed_10m_max',
    'proportion_of_additional_weight', 'track_width', 'track_moisture',
    'colour_segment', 'sex_segment', 'country_segment',
    'win_freq', 'place_freq', 'average_placing', 'average_speed'
]

X = test_df[FEATURES]
X.shape


#### Use previously trained model to predict race speed and derive positions based on ranking of predicted speed for each race

In [None]:
y_pred_lasso = lasso.predict(X)
y_pred_ridge = ridge.predict(X)
y_pred_gbr = gbr.predict(X)
y_pred_rfr = rfr.predict(X)

test_df['pred_time_lasso'] = y_pred_lasso
test_df['pred_time_ridge'] = y_pred_ridge
test_df['pred_time_gbr'] = y_pred_gbr
test_df['pred_time_rfr'] = y_pred_rfr
test_df.head()


In [None]:
test_df['pred_position_lasso'] = test_df.groupby('race_id')['pred_time_lasso'].rank(method='dense', ascending=True)
test_df['pred_position_ridge'] = test_df.groupby('race_id')['pred_time_ridge'].rank(method='dense', ascending=True)
test_df['pred_position_gbr'] = test_df.groupby('race_id')['pred_time_gbr'].rank(method='dense', ascending=True)
test_df['pred_position_rfr'] = test_df.groupby('race_id')['pred_time_rfr'].rank(method='dense', ascending=True)
test_df.head()


# Making Betting Decisions

In [None]:
test_df['implied_win_probability'] = 1 / test_df['win_odds']
test_df['pred_win_probability'] = (
  (1 / test_df['pred_position_lasso'] * 0.25) + 
  (1 / test_df['pred_position_ridge'] * 0.25) + 
  (1 / test_df['pred_position_gbr'] * 0.25) + 
  (1 / test_df['pred_position_rfr'] * 0.25)
)
test_df['cost'] = 100


#### Only bet on the top performing horse in each race

In [None]:
def place_bet(row):
    if row['bet_action']:
        if row['finishing_position'] == 1:
            return row['win_odds'] * row['cost'] - row['cost']
        else:
            return -row['cost']
    else:
        return 0

test_df['max_race_probability'] = test_df.groupby('race_id')['pred_win_probability'].transform('max')
test_df['bet_action'] = test_df['max_race_probability']  == test_df['pred_win_probability']
test_df['actual_profit'] = test_df.apply(place_bet, axis=1)


In [None]:
pnl = test_df.groupby('race_id').sum()['actual_profit']
cumulative_pnl = pnl.cumsum()
cumulative_pnl_df = cumulative_pnl.reset_index()
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df = cumulative_pnl_df.set_index('race_id').join(test_df[['race_date', 'race_id']].set_index('race_id'), how='inner').reset_index()
cumulative_pnl_df.drop_duplicates(inplace=True)
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df['race_date'] = pd.to_datetime(cumulative_pnl_df['race_date'])

fig = px.line(cumulative_pnl_df, x='race_date', y='actual_profit', title='Cumulative P/L from Finish Time Regressors with Flat Betting')
fig.update_xaxes(title='Date')
fig.update_yaxes(title='P/L ($)')

fig.add_shape(
    type="line",
    x0=cumulative_pnl_df['race_date'].iloc[0], x1=cumulative_pnl_df['race_date'].iloc[-1],
    y0=cumulative_pnl_df['actual_profit'].iloc[-1], y1=cumulative_pnl_df['actual_profit'].iloc[-1],
    line=dict(color="red", width=1, dash="dash")
)

fig.add_annotation(
    x=cumulative_pnl_df['race_date'].iloc[-1], y=cumulative_pnl_df['actual_profit'].iloc[-1], text=f"${cumulative_pnl_df['actual_profit'].iloc[-1]:,.2f}",
    showarrow=True, arrowhead=1
)

fig.update_layout(template='plotly_dark', width=1000, height=500)

fig.show()


In [None]:
cumulative_pnl_df.tail()


In [None]:
print(test_df[test_df['bet_action'] & test_df['finishing_position'] == 1]['bet_action'].count())
test_df[test_df['bet_action'] & test_df['finishing_position'] == 1]['bet_action'].count() / test_df['bet_action'].sum()


In [None]:
def print_stats(profit,cost, scale = 365):
    pnl = profit/cost
    mean = pnl.mean()
    std = pnl.std()
    median = pnl.median()
    cumsum_pnl = profit.cumsum()
    # append 0 to start of cumsum_pnl
    # cumsum_pnl = pd.concat([pd.Series([0]), cumsum_pnl], ignore_index=True)
    drawdown = (cumsum_pnl - cumsum_pnl.cummax())
    max_drawdown = min(drawdown.min(),cumsum_pnl.min())
    sharpe_ratio = mean / std * np.sqrt(scale)
    win_rate = (pnl > 0).sum() / (pnl != 0).sum()

    total_bets =  ((profit != 0)*cost).sum() 
    total_pct_return = cumsum_pnl[-1]/total_bets

    print("Win Rate:", round(win_rate*100,2), "%")
    print("Total Bets:", round(total_bets,2))
    print("Total Return:", round(cumsum_pnl[-1],2))
    print("Total Pct Return:", round(total_pct_return*100,2), "%")
    print("Mean: ", round(mean*100,2),"%")
    print("Standard Deviation: ", round(std*100,2),"%")
    print("Median: ", round(median*100,2),"%")
    print("Max Drawdown: ", round(max_drawdown,2))
    print("Sharpe Ratio: ", round(sharpe_ratio,4))

bets = test_df.groupby(["race_id"]).sum()
print_stats(
    pnl, 
    bets["bet_action"] * bets["cost"], 
    scale = 1
)


#### Optimal betting using Kelly Criterion

In [None]:
# set cost using kelley criterion
b = test_df['win_odds'] - 1
p = test_df['pred_win_probability']
q = 1 - p
betting_principal = 100
test_df['cost'] = ((b * p - q) / b) * betting_principal

# set cost to 0 if the cost is negative
test_df['cost'] = np.where(
    test_df['cost'] < 0, 0, test_df['cost']
)

test_df['expected_payoff'] = (
    test_df["win_odds"]
    * test_df['cost']
    * test_df['pred_win_probability']
)
test_df['expected_profit'] = (
    test_df['expected_payoff'] - test_df["cost"]
)


In [None]:
def place_bet(row):
    if row['bet_action']:
        if row['finishing_position'] == 1:
            return row['win_odds'] * row['cost'] - row['cost']
        else:
            return -row['cost']
    else:
        return 0

test_df['max_race_probability'] = test_df.groupby('race_id')['pred_win_probability'].transform('max')
test_df['bet_action'] = test_df['max_race_probability']  == test_df['pred_win_probability']
test_df['actual_profit'] = test_df.apply(place_bet, axis=1)


#### First 20 Race Bets Outcome

In [None]:
test_df[(test_df['bet_action'] == True) | (test_df['finishing_position'] == 1)][
    [
        'race_id', 'pred_win_probability', 'win_odds', 'cost', 
        'expected_payoff', 'expected_profit', 'actual_profit', 'bet_action', 
        'finishing_position'
    ]
].head(20)


In [None]:
pnl = test_df.groupby('race_id').sum()['actual_profit']
cumulative_pnl = pnl.cumsum()
cumulative_pnl_df = cumulative_pnl.reset_index()
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df = cumulative_pnl_df.set_index('race_id').join(test_df[['race_date', 'race_id']].set_index('race_id'), how='inner').reset_index()
cumulative_pnl_df.drop_duplicates(inplace=True)
print(cumulative_pnl_df.shape)
cumulative_pnl_df.head()


In [None]:
cumulative_pnl_df['race_date'] = pd.to_datetime(cumulative_pnl_df['race_date'])

fig = px.line(cumulative_pnl_df, x='race_date', y='actual_profit', title='Cumulative P/L from Finish Time Regressors with Kelly Criterion')
fig.update_xaxes(title='Date')
fig.update_yaxes(title='P/L ($)')

fig.add_shape(
    type="line",
    x0=cumulative_pnl_df['race_date'].iloc[0], x1=cumulative_pnl_df['race_date'].iloc[-1],
    y0=cumulative_pnl_df['actual_profit'].iloc[-1], y1=cumulative_pnl_df['actual_profit'].iloc[-1],
    line=dict(color="red", width=1, dash="dash"),
    name="Last Data Point"
)

fig.add_annotation(
    x=cumulative_pnl_df['race_date'].iloc[-1], y=cumulative_pnl_df['actual_profit'].iloc[-1], text=f"${cumulative_pnl_df['actual_profit'].iloc[-1]:,.2f}",
    showarrow=True, arrowhead=1
)

fig.update_layout(template='plotly_dark', width=1000, height=500)

fig.show()


In [None]:
cumulative_pnl_df.tail()


In [None]:
def print_stats(profit,cost, scale = 365):
    pnl = profit/cost
    mean = pnl.mean()
    std = pnl.std()
    median = pnl.median()
    cumsum_pnl = profit.cumsum()
    # append 0 to start of cumsum_pnl
    # cumsum_pnl = pd.concat([pd.Series([0]), cumsum_pnl], ignore_index=True)
    drawdown = (cumsum_pnl - cumsum_pnl.cummax())
    max_drawdown = min(drawdown.min(),cumsum_pnl.min())
    sharpe_ratio = mean / std * np.sqrt(scale)
    win_rate = (pnl > 0).sum() / (pnl != 0).sum()

    total_bets =  ((profit != 0)*cost).sum() 
    total_pct_return = cumsum_pnl[-1]/total_bets

    print("Win Rate:", round(win_rate*100,2), "%")
    print("Total Bets:", round(total_bets,2))
    print("Total Return:", round(cumsum_pnl[-1],2))
    print("Total Pct Return:", round(total_pct_return*100,2), "%")
    print("Mean: ", round(mean*100,2),"%")
    print("Standard Deviation: ", round(std*100,2),"%")
    print("Median: ", round(median*100,2),"%")
    print("Max Drawdown: ", round(max_drawdown,2))
    print("Sharpe Ratio: ", round(sharpe_ratio,4))

bets = test_df.groupby(["race_id"]).sum()
print_stats(
    pnl, 
    bets["bet_action"] * bets["cost"], 
    scale = 1
)
