# Predicts with SP (Model must have been trained with it)

In [1]:
import joblib
import pandas as pd

model = joblib.load("../models/model_with_sp.pkl")

path = r'../utils/futures/MOONEE_VALLEY_2025-10-24.csv'

new_data = pd.read_csv(path)

# The same feature set as used during training
features = [
    'Barrier', 'SP', 'Race Distance', 'Class', 'Track Condition', 'Weather',
    'Total Runners', 'JockeyWinRate', 'TrainerWinRate', 'Jockey_ID', 'Trainer_ID',
    'HorseWinRate', 'Horse_ID'
]

# Ensure categorical features have the correct dtype
categorical_features = ['Class', 'Track Condition', 'Weather', 'Jockey_ID', 'Trainer_ID', 'Horse_ID']
for col in categorical_features:
    new_data[col] = new_data[col].astype('category')

# Extract feature subset for prediction
X_new = new_data[features]

# Predict probabilities
new_data['Predicted_Prob'] = model.predict(X_new, num_iteration=model.best_iteration)

# Convert probabilities to binary win prediction if desired
new_data["Predicted_Win"] = (new_data["Predicted_Prob"] > 0.5).astype(int)

# Rank horses per race (1 = highest probability)
new_data['Predicted_Rank'] = new_data.groupby(
    ["Date", "Track", "Race Number"]
)["Predicted_Prob"].rank(ascending=False, method="first").astype(int)

# Sort for readability
new_data = new_data.sort_values(["Date", "Track", "Race Number", "Predicted_Rank"])

# Round probabilities
new_data['Predicted_Prob'] = new_data['Predicted_Prob'].round(3) * 100

# Select only the columns you care about for the final output
output_columns = ['Race Number', 'Predicted_Rank', 'Predicted_Prob', 'SP', 'Horse', 'Barrier', 'Jockey', 'Trainer']
output_data = new_data[output_columns].copy()

# Save to CSV
track_name = new_data["Track"].iloc[0].replace(" ", "_").upper()
race_date = str(new_data["Date"].iloc[0])
output_path = f"../utils/predictions/{track_name}_{race_date}_predictions.csv"
output_data.to_csv(output_path, index=False)

print(f"Predictions saved to {output_path}")
output_data

Predictions saved to ../utils/predictions/MOONEE_VALLEY_2025-10-24_predictions.csv


Unnamed: 0,Race Number,Predicted_Rank,Predicted_Prob,SP,Horse,Barrier,Jockey,Trainer
7,1,1,88.0,4.20,YACHIYO,4,CRAIG WILLIAMS,M PRICE & M KENT JNR
3,1,2,87.2,7.50,SISTER SHAY,6,EMILY POZMAN,CRAIG WIDDISON
5,1,3,87.0,5.00,COCO JEN,5,JAMIE MOTT,M M LAURIE
8,1,4,86.5,4.80,DANCING DOLLY,2,ANTHONY ALLEN,SIMON & KATRINA ALEXANDER
4,1,5,86.0,5.00,A DIVA,9,ETHAN BROWN,RICHARD LAMING
...,...,...,...,...,...,...,...,...
92,8,9,7.6,1.01,HAARACAINE,2,NOT NOTIFIED,SIMON RYAN
98,8,10,7.6,1.01,ASHAU VALLEY,4,BEN MELHAM,CRAIG WEEDING
101,8,11,7.5,1.01,MASTERFUL,8,NOT NOTIFIED,ENVER JUSUFOVIC
95,8,12,7.4,1.01,OPENING ADDRESS,7,THOMAS STOCKDALE,L HO


In [2]:
# --- Now save top 3 horses per race with only selected columns ---
top3 = new_data[new_data['Predicted_Rank'] <= 3]
top3_output = top3[['Race Number', 'Predicted_Rank', 'Predicted_Prob', 'SP', 'Horse', 'Barrier', 'Jockey', 'Trainer']].copy()

# Sort by rank for readability
# top3_output = top3_output.sort_values('Predicted_Rank')
top3_output = top3_output.sort_values('Race Number')

# Save top 3 CSV
output_path_top3 = f"../utils/predictions/{track_name}_{race_date}_top3.csv"
top3_output.to_csv(output_path_top3, index=False)
print(f"Top 3 horses per race saved to {output_path_top3}")
top3_output

Top 3 horses per race saved to ../utils/predictions/MOONEE_VALLEY_2025-10-24_top3.csv


Unnamed: 0,Race Number,Predicted_Rank,Predicted_Prob,SP,Horse,Barrier,Jockey,Trainer
7,1,1,88.0,4.2,YACHIYO,4,CRAIG WILLIAMS,M PRICE & M KENT JNR
3,1,2,87.2,7.5,SISTER SHAY,6,EMILY POZMAN,CRAIG WIDDISON
5,1,3,87.0,5.0,COCO JEN,5,JAMIE MOTT,M M LAURIE
9,2,1,88.5,4.6,SONOFKIRK,4,JAMIE MOTT,A & S FREEDMAN
12,2,2,88.5,4.4,INVINCIBLE WOMAN,5,DAMIAN LANE,LLOYD KENNEWELL
13,2,3,87.5,4.8,BOLD SECRET,6,LACHLAN NEINDORF,PHILLIP STOKES
23,3,1,90.2,2.25,AMLETO,4,ETHAN BROWN,C MAHER & D EUSTACE
31,3,2,88.3,5.0,HOT TOO GO,6,CRAIG WILLIAMS,DANNY O'BRIEN
29,3,3,84.4,7.0,FAREWELL TO EIRE,11,ZAC SPAIN,P G MOODY
35,4,1,90.5,3.5,MYTEMPTATION,5,CRAIG WILLIAMS,ENVER JUSUFOVIC


# Model predicts without SP

In [5]:
model = joblib.load("../models/model_no_sp.pkl")

path = r'../utils/futures/MOONEE_VALLEY_2025-10-24.csv'

new_data = pd.read_csv(path)

# The same feature set as used during training
features = [
    'Barrier', 'Race Distance', 'Class', 'Track Condition', 'Weather',
    'Total Runners', 'JockeyWinRate', 'TrainerWinRate', 'Jockey_ID', 'Trainer_ID',
    'HorseWinRate', 'Horse_ID'
]

# Ensure categorical features have the correct dtype
categorical_features = ['Class', 'Track Condition', 'Weather', 'Jockey_ID', 'Trainer_ID', 'Horse_ID']
for col in categorical_features:
    new_data[col] = new_data[col].astype('category')

# Extract feature subset for prediction
X_new = new_data[features]

# Predict probabilities
new_data['Predicted_Prob'] = model.predict(X_new, num_iteration=model.best_iteration)

# Convert probabilities to binary win prediction if desired
new_data["Predicted_Win"] = (new_data["Predicted_Prob"] > 0.5).astype(int)

# Rank horses per race (1 = highest probability)
new_data['Predicted_Rank'] = new_data.groupby(
    ["Date", "Track", "Race Number"]
)["Predicted_Prob"].rank(ascending=False, method="first").astype(int)

# Sort for readability
new_data = new_data.sort_values(["Date", "Track", "Race Number", "Predicted_Rank"])

# Round probabilities
new_data['Predicted_Prob'] = new_data['Predicted_Prob'].round(5)

# Select only the columns you care about for the final output
output_columns = ['Race Number', 'Predicted_Rank', 'Predicted_Prob', 'SP', 'Horse', 'Barrier', 'Jockey', 'Trainer']
output_data = new_data[output_columns].copy()

# Save to CSV
track_name = new_data["Track"].iloc[0].replace(" ", "_").upper()
race_date = str(new_data["Date"].iloc[0])
output_path = f"../utils/predictions/{track_name}_{race_date}_predictions_no_sp.csv"
output_data.to_csv(output_path, index=False)

print(f"Predictions saved to {output_path}")
output_data

Predictions saved to ../utils/predictions/MOONEE_VALLEY_2025-10-24_predictions_no_sp.csv


Unnamed: 0,Race Number,Predicted_Rank,Predicted_Prob,SP,Horse,Barrier,Jockey,Trainer
1,1,1,0.90570,8.50,SAVILLA,3,ANTHONY ALLEN,"M, W & J HAWKES"
7,1,2,0.90236,4.20,YACHIYO,4,CRAIG WILLIAMS,M PRICE & M KENT JNR
8,1,3,0.89590,4.80,DANCING DOLLY,2,ANTHONY ALLEN,SIMON & KATRINA ALEXANDER
5,1,4,0.89334,5.00,COCO JEN,5,JAMIE MOTT,M M LAURIE
4,1,5,0.87615,5.00,A DIVA,9,ETHAN BROWN,RICHARD LAMING
...,...,...,...,...,...,...,...,...
92,8,9,0.86187,1.01,HAARACAINE,2,NOT NOTIFIED,SIMON RYAN
93,8,10,0.84802,4.80,HE'LL RIP,4,ANTHONY ALLEN,L & T CORSTENS
101,8,11,0.83568,1.01,MASTERFUL,8,NOT NOTIFIED,ENVER JUSUFOVIC
96,8,12,0.83193,1.01,DOUBLE MARKET,11,M DEE,"M, W & J HAWKES"


In [6]:
# --- Now save top 3 horses per race with only selected columns ---
top3 = new_data[new_data['Predicted_Rank'] <= 3]
top3_output = top3[['Race Number', 'Predicted_Rank', 'Predicted_Prob', 'SP', 'Horse', 'Barrier', 'Jockey', 'Trainer']].copy()

# Sort by rank for readability
top3_output = top3_output.sort_values('Race Number')

# Save top 3 CSV
output_path_top3 = f"../utils/predictions/{track_name}_{race_date}_top3_no_sp.csv"
top3_output.to_csv(output_path_top3, index=False)
print(f"Top 3 horses per race saved to {output_path_top3}")
top3_output

Top 3 horses per race saved to ../utils/predictions/MOONEE_VALLEY_2025-10-24_top3_no_sp.csv


Unnamed: 0,Race Number,Predicted_Rank,Predicted_Prob,SP,Horse,Barrier,Jockey,Trainer
1,1,1,0.9057,8.5,SAVILLA,3,ANTHONY ALLEN,"M, W & J HAWKES"
7,1,2,0.90236,4.2,YACHIYO,4,CRAIG WILLIAMS,M PRICE & M KENT JNR
8,1,3,0.8959,4.8,DANCING DOLLY,2,ANTHONY ALLEN,SIMON & KATRINA ALEXANDER
13,2,1,0.89471,4.8,BOLD SECRET,6,LACHLAN NEINDORF,PHILLIP STOKES
11,2,2,0.89438,13.0,ETERNAL DARKNESS,4,ANTHONY ALLEN,ANTHONY CHIBNALL
14,2,3,0.8877,4.4,SISSTAINABLE,2,BLAKE SHINN,T & C MCEVOY
31,3,1,0.87263,5.0,HOT TOO GO,6,CRAIG WILLIAMS,DANNY O'BRIEN
21,3,2,0.86839,11.0,EXPEETEE,2,LOGAN MCNEIL,GERALD EGAN
23,3,3,0.8604,2.25,AMLETO,4,ETHAN BROWN,C MAHER & D EUSTACE
35,4,1,0.86795,3.5,MYTEMPTATION,5,CRAIG WILLIAMS,ENVER JUSUFOVIC


# Simulate Betting

In [27]:
import pandas as pd
import random

# Load updated predictions CSV
predictions = pd.read_csv("../utils/predictions/MOONEE_VALLEY_2025-10-25_predictions.csv")

# --- Betting simulation ---
results = []

min_bet, max_bet = 10, 100

# Loop over each race
for race_num in predictions['Race Number'].unique():
    race_data = predictions[predictions['Race Number'] == race_num]
    
    # Get top-ranked horse for the race
    top_horse = race_data.loc[race_data['Predicted_Rank'] == 1].iloc[0]
    
    # Simulate race outcome using predicted probability
    predicted_prob = top_horse['Predicted_Prob']
    win = random.random() < predicted_prob
    
    # Determine bet amount based on confidence
    bet_amount = min_bet + predicted_prob * (max_bet - min_bet)
    bet_amount = round(bet_amount, 2)
    
    # Calculate payout using SP
    payout = bet_amount * top_horse['SP'] if win else -bet_amount
    
    # Record results
    results.append({
        'Race Number': race_num,
        'Horse': top_horse['Horse'],
        'Jockey': top_horse['Jockey'],
        'Predicted_Rank': top_horse['Predicted_Rank'],
        'Predicted_Prob': predicted_prob,
        'SP': top_horse['SP'],
        'Win': win,
        'Bet': bet_amount,
        'Payout': payout
    })

# Convert to DataFrame and add cumulative payout
sim_df = pd.DataFrame(results)
sim_df['Cumulative'] = sim_df['Payout'].cumsum()

sim_df


Unnamed: 0,Race Number,Horse,Jockey,Predicted_Rank,Predicted_Prob,SP,Win,Bet,Payout,Cumulative
0,1,TONKIN,ZOE LLOYD,1,89.5,2.4,True,8065.0,19356.0,19356.0
1,2,PRINCE TYCOON,DAMIAN LANE,1,89.5,3.6,True,8065.0,29034.0,48390.0
2,3,ROHESIA,ETHAN BROWN,1,89.0,4.2,True,8020.0,33684.0,82074.0
3,4,SALTY PEARL,ANTHONY ALLEN,1,89.4,2.5,True,8056.0,20140.0,102214.0
4,5,CASINO SEVENTEEN,DAMIAN LANE,1,88.4,4.2,True,7966.0,33457.2,135671.2
5,6,SHE'S UNUSUAL,ZOE LLOYD,1,87.5,3.1,True,7885.0,24443.5,160114.7
6,7,OH TOO GOOD,DAMIAN LANE,1,89.6,4.2,True,8074.0,33910.8,194025.5
7,8,CHARM STONE,BLAKE SHINN,1,89.6,2.3,True,8074.0,18570.2,212595.7
8,9,OBSERVER,MARK ZAHRA,1,90.2,2.0,True,8128.0,16256.0,228851.7
9,10,VIA SISTINA,J MCDONALD,1,90.1,2.2,True,8119.0,17861.8,246713.5
