Total_Turnover: 3422065.0586887286  
Total_PnL: 147777.01029799972  
Total_Commission: 72974.63452212434  
Total_Net_PnL: 74802.37577587538  

In [1]:
import pandas as pd, numpy as np

In [2]:
df = pd.read_csv('horses.csv')

In [3]:
df.head()

Unnamed: 0,race_number,saddle_number,win_fair_price,win_starting_price,winner
0,1,4,1.7353,1.7098,1
1,1,1,6.0313,6.0914,0
2,1,5,7.6923,7.5101,0
3,1,6,20.3325,20.4978,0
4,1,2,23.9991,23.471,0


In [4]:
df.tail()

Unnamed: 0,race_number,saddle_number,win_fair_price,win_starting_price,winner
86643,10000,7,14.4772,13.8269,0
86644,10000,5,29.0062,29.8636,0
86645,10000,3,50.8005,48.9715,0
86646,10000,9,310.1959,305.6448,0
86647,10000,2,621.193,601.6791,0


In [5]:
df.dtypes

race_number             int64
saddle_number           int64
win_fair_price        float64
win_starting_price    float64
winner                  int64
dtype: object

In [6]:
len(df)

86648

### Calculate overlay
As an example if the *win_starting_price* is the price that the bookmaker offers on the horse as it starts the race and if our own probability of the horse winning is the *win_fair_price* we calculate the value (overlay):  
`( (1 / win_fair_price )% multiplied by win_starting_price)) - 100%.`

In [7]:
estimated_prob_winning = (1 / df["win_fair_price"])
odds = df["win_starting_price"]
df["overlay"] = (estimated_prob_winning * odds) - 1
df

Unnamed: 0,race_number,saddle_number,win_fair_price,win_starting_price,winner,overlay
0,1,4,1.7353,1.7098,1,-0.014695
1,1,1,6.0313,6.0914,0,0.009965
2,1,5,7.6923,7.5101,0,-0.023686
3,1,6,20.3325,20.4978,0,0.008130
4,1,2,23.9991,23.4710,0,-0.022005
...,...,...,...,...,...,...
86643,10000,7,14.4772,13.8269,0,-0.044919
86644,10000,5,29.0062,29.8636,0,0.029559
86645,10000,3,50.8005,48.9715,0,-0.036004
86646,10000,9,310.1959,305.6448,0,-0.014672


### Kelly Criterion

The formula used to calculate the fraction for a lay bet is:
Full Lay Kelly = $ f = (q - p × (d - 1)) / (d - 1) $ where 
- f = fraction of the current bank to wager.  
- d = decimal betting odds.
- p = perceived probability of selection winning.
- q = perceived probability of selection losing, which is 1 – p.

The formula used to calculate the fraction for a back bet is:
Full Back Kelly = $ f = (p × (d – 1) – q) / (d – 1) $ where

- f = fraction of the current bank to wager.
- d = decimal betting odds.
- p = perceived probability of selection winning.
- q = perceived probability of selection losing, which is 1 – p.  

Betting full Kelly we can also calculate the stake as: `bankroll * (overlay/(odds - 1))` where overlay is the absolute value

In [8]:
overlay_is_null = df["overlay"].isnull()
print(overlay_is_null.value_counts())
overlay_is_not_null = df["overlay"].notnull()
print(overlay_is_not_null.value_counts())

False    86648
Name: overlay, dtype: int64
True    86648
Name: overlay, dtype: int64


In [9]:
# df["side"] = ["lay" if x < 0 else "back" for x in df["overlay"]]
df["side"] = df["overlay"].map( lambda x: "lay" if x < 0 else "back")
# df.loc[df["overlay"] < 0, "kelly"] = 10000 * ((df["overlay"] * (-1)) / (odds - 1))
# df.loc[df["overlay"] > 0, "kelly"] = 10000 * (df["overlay"] / (odds - 1))
df["kelly"] = np.where(df["overlay"] < 0, 10000 * ((df["overlay"] * (-1)) / (odds - 1)), 10000 * (df["overlay"] / (odds - 1)))

### Turnover

In [10]:
# df.loc[df["kelly"] > 2, "turnover"] = df["kelly"]
# df.loc[df["kelly"] < 2, "turnover"] = 0
df["turnover"] = np.where(df["kelly"] > 2, df["kelly"], 0)
df

Unnamed: 0,race_number,saddle_number,win_fair_price,win_starting_price,winner,overlay,side,kelly,turnover
0,1,4,1.7353,1.7098,1,-0.014695,lay,207.028254,207.028254
1,1,1,6.0313,6.0914,0,0.009965,back,19.571600,19.571600
2,1,5,7.6923,7.5101,0,-0.023686,lay,36.383502,36.383502
3,1,6,20.3325,20.4978,0,0.008130,back,4.169620,4.169620
4,1,2,23.9991,23.4710,0,-0.022005,lay,9.792618,9.792618
...,...,...,...,...,...,...,...,...,...
86643,10000,7,14.4772,13.8269,0,-0.044919,lay,35.019301,35.019301
86644,10000,5,29.0062,29.8636,0,0.029559,back,10.240995,10.240995
86645,10000,3,50.8005,48.9715,0,-0.036004,lay,7.505203,7.505203
86646,10000,9,310.1959,305.6448,0,-0.014672,lay,0.481600,0.000000


### Expected Value

In [11]:
back_bets = df["side"] == "back"
# df.loc[back_bets, "EV"] = (estimated_prob_winning * ((odds - 1) * df["kelly"])) - ((1 - estimated_prob_winning) * df["kelly"])
lay_bets = df["side"] == "lay"
# df.loc[lay_bets, "EV"] = ((1 - estimated_prob_winning) * df["kelly"]) - (estimated_prob_winning * ((odds - 1) * df["kelly"]))
EV_backers = (estimated_prob_winning * ((odds - 1) * df["kelly"])) - ((1 - estimated_prob_winning) * df["kelly"])
EV_layers = ((1 - estimated_prob_winning) * df["kelly"]) - (estimated_prob_winning * ((odds - 1) * df["kelly"]))
df["EV"] = np.where(back_bets, EV_backers, EV_layers)
df

Unnamed: 0,race_number,saddle_number,win_fair_price,win_starting_price,winner,overlay,side,kelly,turnover,EV
0,1,4,1.7353,1.7098,1,-0.014695,lay,207.028254,207.028254,3.042252
1,1,1,6.0313,6.0914,0,0.009965,back,19.571600,19.571600,0.195025
2,1,5,7.6923,7.5101,0,-0.023686,lay,36.383502,36.383502,0.861780
3,1,6,20.3325,20.4978,0,0.008130,back,4.169620,4.169620,0.033898
4,1,2,23.9991,23.4710,0,-0.022005,lay,9.792618,9.792618,0.215486
...,...,...,...,...,...,...,...,...,...,...
86643,10000,7,14.4772,13.8269,0,-0.044919,lay,35.019301,35.019301,1.573029
86644,10000,5,29.0062,29.8636,0,0.029559,back,10.240995,10.240995,0.302716
86645,10000,3,50.8005,48.9715,0,-0.036004,lay,7.505203,7.505203,0.270214
86646,10000,9,310.1959,305.6448,0,-0.014672,lay,0.481600,0.000000,0.007066


### PnL

In [12]:
back_winners = (df["winner"] == 1) & (df["side"] == "back")
back_losers = (df["winner"] == 0) & (df["side"] == "back")
lay_winners = (df["winner"] == 0) & (df["side"] == "lay")
lay_losers = (df["winner"] == 1) & (df["side"] == "lay")

# df.loc[back_winners, "PnL"] = df["turnover"] * ( odds - 1 ) 
# df.loc[back_losers, "PnL"] = -df["turnover"]
# df.loc[lay_winners, "PnL"] = df["turnover"]
# df.loc[lay_losers, "PnL"] = -df["turnover"] * ( odds - 1 )

conditions = [
    back_winners,
    back_losers,
    lay_winners,
    lay_losers]
choices = [df["turnover"] * ( odds - 1 ), -df["turnover"], df["turnover"], -df["turnover"] * ( odds - 1 )]
df["PnL"] = np.select(conditions, choices)

print(back_winners.value_counts() + back_losers.value_counts() + lay_winners.value_counts() + lay_losers.value_counts())
df

False    259944
True      86648
dtype: int64


Unnamed: 0,race_number,saddle_number,win_fair_price,win_starting_price,winner,overlay,side,kelly,turnover,EV,PnL
0,1,4,1.7353,1.7098,1,-0.014695,lay,207.028254,207.028254,3.042252,-146.948654
1,1,1,6.0313,6.0914,0,0.009965,back,19.571600,19.571600,0.195025,-19.571600
2,1,5,7.6923,7.5101,0,-0.023686,lay,36.383502,36.383502,0.861780,36.383502
3,1,6,20.3325,20.4978,0,0.008130,back,4.169620,4.169620,0.033898,-4.169620
4,1,2,23.9991,23.4710,0,-0.022005,lay,9.792618,9.792618,0.215486,9.792618
...,...,...,...,...,...,...,...,...,...,...,...
86643,10000,7,14.4772,13.8269,0,-0.044919,lay,35.019301,35.019301,1.573029,35.019301
86644,10000,5,29.0062,29.8636,0,0.029559,back,10.240995,10.240995,0.302716,-10.240995
86645,10000,3,50.8005,48.9715,0,-0.036004,lay,7.505203,7.505203,0.270214,7.505203
86646,10000,9,310.1959,305.6448,0,-0.014672,lay,0.481600,0.000000,0.007066,0.000000


### Commission, Net PnL & RoI

In [13]:
# df["commission"] = [x * 0.05 if x > 0 else 0 for x in df["PnL"]]
# 5% of your race PnL in each race that your race PnL is positive.
race_pnl = df.groupby("race_number")["PnL"].transform("sum")
race_size = df.groupby("race_number")["PnL"].transform("count")
commission = race_pnl * 0.05
df["commission"] = np.where(df.groupby("race_number")["PnL"].transform("sum") > 0, commission / race_size, 0)
df["Net PnL"] = df["PnL"] - df["commission"]
df.loc[df["turnover"] == 0, "RoI"] = 0
df.loc[df["turnover"] != 0, "RoI"] = df["Net PnL"] / df["turnover"]
df

Unnamed: 0,race_number,saddle_number,win_fair_price,win_starting_price,winner,overlay,side,kelly,turnover,EV,PnL,commission,Net PnL,RoI
86618,9997,1,2.5353,2.6036,0,0.02694,back,167.994591,167.994591,4.525709,-167.994591,0.0,-167.994591,-1.0
86619,9997,10,7.0958,6.7215,1,-0.05275,lay,92.195253,92.195253,4.863255,-527.495138,0.0,-527.495138,-5.7215
86620,9997,4,7.2525,7.1381,0,-0.015774,lay,25.698296,25.698296,0.405362,25.698296,0.0,25.698296,1.0
86621,9997,8,9.4685,9.2764,0,-0.020288,lay,24.513465,24.513465,0.497337,24.513465,0.0,24.513465,1.0
86622,9997,7,15.8223,15.9724,0,0.009487,back,6.336065,6.336065,0.060108,-6.336065,0.0,-6.336065,-1.0
86623,9997,6,18.336,17.7262,0,-0.033257,lay,19.883166,19.883166,0.661254,19.883166,0.0,19.883166,1.0
86624,9997,2,22.5553,21.2111,0,-0.059596,lay,29.486643,29.486643,1.757279,29.486643,0.0,29.486643,1.0
86625,9997,3,32.0015,32.8333,0,0.025993,back,8.165202,8.165202,0.212234,-8.165202,0.0,-8.165202,-1.0
86626,9997,9,60.6745,61.3768,0,0.011575,back,1.917107,0.0,0.02219,-0.0,0.0,-0.0,0.0
86627,9997,5,88.2501,85.6967,0,-0.028934,lay,3.416152,3.416152,0.098842,3.416152,0.0,3.416152,1.0


### Total Turnover, Total EV, Total PnL, Total Net PnL, Total RoI


In [14]:
Total_Turnover = df["turnover"].sum()
Total_PnL = df["PnL"].sum()
Total_Commission = df["commission"].sum()
Total_Net_PnL = df["Net PnL"].sum()
print(f'Total_Turnover: {Total_Turnover}')
print(f'Total_PnL: {Total_PnL}')
print(f'Total_Commission: {Total_Commission}')
print(f'Total_Net_PnL: {Total_Net_PnL}')

Total_Turnover: 3422065.0586887286
Total_PnL: 147777.01029799972
Total_Commission: 72974.63452212434
Total_Net_PnL: 74802.37577587538


### Running a Monte Carlo simulation

In [15]:
import random

This is a very simplistic way of running a Monte Carlo simulation. I understand the idea is that you want to generate some random probability of winning and check how that impacts our PnL. What I have tried to do is:
- generate a random number between 0 and 1 for each bet
- if random number associated with each bet is less than the expected win probability, we have a winner = 1. Otherwise we should have a loss.

In [16]:
randomly_generated_results = {}
for i in range(10):
    df = pd.read_csv('horses.csv')
    random_numbers = [] 
    for price in df["win_fair_price"]:
          random_numbers.append(random.uniform(0, 1)) 
    df["random_prob_winning"] = random_numbers
    df["estimated_prob_winning"] = estimated_prob_winning

    # start the process again
    df["overlay"] = (estimated_prob_winning * odds) - 1
    df.loc[df["overlay"] < 0, "kelly"] = 10000 * ((df["overlay"] * (-1)) / (odds - 1))
    df.loc[df["overlay"] > 0, "kelly"] = 10000 * (df["overlay"] / (odds - 1))
    df.loc[df["kelly"] > 2, "turnover"] = df["kelly"]
    df.loc[df["kelly"] < 2, "turnover"] = 0
    
    df["side"] = ["lay" if x < 0 else "back" for x in df["overlay"]]
    back_bets = df["side"] == "back"
    lay_bets = df["side"] == "lay"
    
#     df.loc[back_bets & (estimated_prob_winning > df["random_prob_winning"]), "winner"] = 1
#     df.loc[back_bets & (estimated_prob_winning < df["random_prob_winning"]), "winner"] = 0
#     df.loc[lay_bets & (estimated_prob_winning < df["random_prob_winning"]), "winner"] = 1 
#     df.loc[lay_bets & (estimated_prob_winning > df["random_prob_winning"]), "winner"] = 0 
    
    df.loc[estimated_prob_winning > df["random_prob_winning"], "winner"] = 1
    df.loc[estimated_prob_winning < df["random_prob_winning"], "winner"] = 0
    
    back_winners = (df["winner"] == 1) & (df["side"] == "back")
    back_losers = (df["winner"] == 0) & (df["side"] == "back")
    lay_winners = (df["winner"] == 0) & (df["side"] == "lay")
    lay_losers = (df["winner"] == 1) & (df["side"] == "lay")
    
    df.loc[back_bets, "EV"] = (estimated_prob_winning * ((odds - 1) * df["kelly"])) - ((1 - estimated_prob_winning) * df["kelly"])
    df.loc[lay_bets, "EV"] = ((1 - estimated_prob_winning) * df["kelly"]) - (estimated_prob_winning * ((odds - 1) * df["kelly"]))

    df.loc[back_winners, "PnL"] = df["turnover"] * ( odds - 1 ) 
    df.loc[back_losers, "PnL"] = -df["turnover"]
    df.loc[lay_winners, "PnL"] = df["turnover"]
    df.loc[lay_losers, "PnL"] = -df["turnover"] * ( odds - 1 ) 
    df["commission"] = [x * 0.05 if x > 0 else 0 for x in df["PnL"]]
    df["Net PnL"] = df["PnL"] - df["commission"]
    df.loc[df["turnover"] == 0, "RoI"] = 0
    df.loc[df["turnover"] != 0, "RoI"] = df["Net PnL"] / df["turnover"]
    randomly_generated_results[f'Net_PnL_{i+1}'] = df["Net PnL"].sum()

print(randomly_generated_results)


{'Net_PnL_1': 61884.33312982467, 'Net_PnL_2': 69950.45950790982, 'Net_PnL_3': 60364.31264668525, 'Net_PnL_4': 21327.360984510775, 'Net_PnL_5': 6486.86270558596, 'Net_PnL_6': 23269.32170650651, 'Net_PnL_7': 63438.47977949769, 'Net_PnL_8': 28985.992274403652, 'Net_PnL_9': -52618.68697264405, 'Net_PnL_10': 100342.86084470681}
