In [31]:
"""Generate requirements file"""
!pip3 freeze > requirements.txt

In [18]:
"""Import libraries"""
import numpy as np
import pandas as pd
import random

In [33]:
def implied_to_decimal(implied: np.float) -> np.float:
    """Converts implied probabilities to decimal probabilities"""
    return np.float(100)/implied

def implied_to_fractional(implied: np.float) -> np.float:
    """Converts implied probabilities to fractional probabilities"""
    return implied_to_decimal(implied) - np.float(1)

def implied_to_moneyline(implied: np.float) -> np.float:
    """Converts implied probabilities to moneyline probabilities"""
    if implied >= np.float(50):
        return - (implied / (np.float(100) - implied)) * np.float(100)
    else:
        return (np.float(100)-implied)/implied * np.float(100)
    
def kelly(odds: np.float, prob_of_success: np.float, bank: np.float, min_bet: np.float) -> np.float:
    """Calculates amount of money to be betted according to the Kelly criteria"""
    bet = bank * (prob_of_success - (np.float(1)-prob_of_success) / (odds - np.float(1)))
    if bet >= min_bet:
        return bet
    elif bet <= -min_bet:
        return bet
    else:
        return 0
     
def back_to_lay(back: np.float) -> np.float:
    """Converts probabilities to back an outcome into probabilities to lay said outcome"""
    return np.float(1) / (back - np.float(1)) + np.float(1)

def profit(turnover: np.float, money_won: np.float) -> np.float:
    """Calculates the profit made based on money won and the turnover"""
    return money_won - turnover

def return_on_investment(turnover: np.float, profit: np.float) -> np.float:
    """Calculates the RoI based on the profit made and the turnover"""
    return profit/turnover

def price_to_prob(price: np.float) -> np.float:
    """Calculates the probability for the outcome based on the win fair price"""
    return 1/price

def estimated_value(odds: np.float, prob_of_success: np.float, turnover: np.float) -> np.float:
    """Calculates the estimated value of the investment"""
    prob_of_failure = 1 - prob_of_success
    ev = prob_of_success * odds * turnover - prob_of_failure * turnover
    return abs(ev)

def PnL(turnover: np.float, odds: np.float, outcome: np.float, back_or_lay: str) -> np.float:
    """Calculates the profit (loss if negative)"""
    if back_or_lay == 'lay':
        turnover = - turnover/odds
    if outcome == np.float(1):
        profit = turnover * odds
    else:
        profit = - turnover
    return profit

def calc_commission(profit: np.float) -> np.float:
    """Calculates the commission on the profit"""
    if profit > np.float(0):
        return (profit * np.float(0.05))
    else:
        return np.float(0)
    
def calc_net_profit(profit: np.float, commission: np.float) -> np.float:
    """Calculates the net profit (profit-commission)"""
    if profit > np.float(0):
        return (profit-commission)
    else:
        return profit
        
def calc_roi(profit: np.float, turnover: np.float) -> np.float:
    """Calculates the Return on Investment in percentages"""
    if np.isclose(turnover, np.float(0)):
        return np.NaN
    return (profit/turnover) * np.float(100)

def monte_carlo(probs: np.array) -> np.int:
    """Runs a Monte Carlo simulation based on all the probabilities.
    Returns the indices associated with the winning events."""
    
    rand_num = random.random()
    
    races_won = []
    
    total_prob = np.float(0)
    i = np.int(0)
    for prob in probs:
        i += np.int(1)
        total_prob += prob
        if rand_num <= total_prob:
            races_won.append(i)
            total_prob = total_prob - 1
            rand_num = random.random()
    return races_won

In [34]:
# potential improvement: if a race is not valid, remove all data associated with it
def data_valid(df: pd.core.frame.DataFrame, tolerance: np.float) -> bool:
    """Asserts if the dataset we receive is valud.

    One thing we know for sure is that the 

    Parameters
    ----------
    df : pandas.core.frame.DataFrame
        the dataframe we are checking
    tolerance : numpy.float
        the tolerance for the probabilities adding up to something slightly different than 1

    Returns
    -------
    bool
    """
    races = set(df["race_number"])
    probs_per_race = np.array([price_to_prob((df.loc[df['race_number'] == i])["win_fair_price"])
                               for i in races])
    prob_sums_per_race = [sum(x) for x in probs_per_race]
    probs = np.array(prob_sums_per_race)
    return np.allclose(prob_sums_per_race, np.ones(len(prob_sums_per_race)), rtol=tolerance)

In [35]:
"""Declare bank and minimum stake"""
bank = np.float(10000)
min_stake = np.float(2)

In [36]:
"""Read the dataset"""
horses = pd.read_csv("horses.csv")

In [37]:
"""Sanity check on data set"""
assert(data_valid(horses, tolerance = 0.0001))

In [38]:
"""Update table with the type of bet (back or lay) and the turnover values"""
prob_of_success = price_to_prob(horses["win_fair_price"])
odds = horses['win_starting_price']
bets = []
back_or_lay = []
for i in range(len(odds)):
    bet = kelly(odds[i], prob_of_success[i], bank, min_stake)
    if bet > 0:
        back_or_lay.append('back')
    elif bet < 0:
        back_or_lay.append('lay')
    else:
        back_or_lay.append('neither')
    bets.append(np.abs(bet))
back_or_lay = np.array(back_or_lay)
bets = np.array(bets)
horses['back_or_lay'] = back_or_lay 
horses['turnover'] = bets

In [39]:
"""Update table with the Estimated Values"""
odds = horses['win_starting_price']
prob_of_success = [price_to_prob(x) for x in horses['win_fair_price']]
turnover = horses['turnover']
ev = np.array([estimated_value(o, p, t) for o, p, t in zip(odds, prob_of_success, turnover)])
horses['EV'] = ev

In [40]:
"""Update table with the Profit and Loss"""
odds = horses['win_starting_price']
outcome = horses['winner']
turnover = horses['turnover']
back_or_lay = horses['back_or_lay']
profit = np.array([PnL(t, od, ou, b) for t, od, ou, b in zip(turnover, odds, outcome, back_or_lay)])
horses['PnL'] = profit

In [41]:
"""Update table with the commission"""
profit = horses['PnL']
commission = np.array([calc_commission(p) for p in profit])
horses['commission'] = commission

In [42]:
"""Update table with the net profit"""
profit = horses['PnL']
commission = horses['commission']
net_profit = np.array([calc_net_profit(p, c) for p, c in zip(profit, commission)])
horses['net_profit'] = net_profit

In [43]:
"""Update table with the Return on Investment"""
profit = horses['net_profit']
turnover = horses['turnover']
RoI = np.array([calc_roi(p, t) for p, t in zip(profit, turnover)])
horses['RoI'] = RoI

In [44]:
"""Calculate the sums of the recently computed metrics"""
sum_turnover = np.sum(horses['turnover'])
sum_ev = np.sum(horses['EV'])
sum_pnl = np.sum(horses['PnL'])
sum_commission = np.sum(horses['commission'])
sum_net_pnl = np.sum(horses['net_profit'])
sum_RoI = np.sum(horses['RoI'])

print(sum_turnover)
print(sum_ev)
print(sum_pnl)
print(sum_commission)
print(sum_net_pnl)
print(sum_RoI)

3422065.058688728
1026675.972384035
293211.0064365475
100376.32309211753
192834.68334442997
296006.0070564557


In [45]:
"""Update table with the probability of winning"""
win_fair_price = horses['win_fair_price']
prob_of_win = np.array([1/w for w in win_fair_price])
horses['prob_of_win'] = prob_of_win

In [46]:
"""Save updated table to csv file"""
horses.to_csv('horses_new.csv')

In [47]:
"""Sanity check for Monte Carlo"""
num_races = 10000
assert(len(monte_carlo(horses['prob_of_win'])) == num_races)

In [49]:
"""Running some Monte Carlo experiments and logging the results, plotting histograms."""

sum_turnover = []
sum_ev = []
sum_pnl = []
sum_commission = []
sum_net_pnl = []
sum_RoI = []

num_experiments = 100

for i in range(num_experiments):

    """Generate new winners with Monte Carlo"""
    monte_carlo_winner = [0]*len(horses['prob_of_win'])
    for i in monte_carlo(horses['prob_of_win']):
        monte_carlo_winner[i] = 1
    horses['winner'] = monte_carlo_winner

    """Declare bank and minimum stake"""
    bank = np.float(10000)
    min_stake = np.float(2)

    """Update table with the type of bet (back or lay) and the turnover values"""
    prob_of_success = price_to_prob(horses["win_fair_price"])
    odds = horses['win_starting_price']
    bets = []
    back_or_lay = []
    for i in range(len(odds)):
        bet = kelly(odds[i], prob_of_success[i], bank, min_stake)
        if bet > 0:
            back_or_lay.append('back')
        elif bet < 0:
            back_or_lay.append('lay')
        else:
            back_or_lay.append('neither')
        bets.append(np.abs(bet))
    back_or_lay = np.array(back_or_lay)
    bets = np.array(bets)
    horses['back_or_lay'] = back_or_lay 
    horses['turnover'] = bets

    """Update table with the Estimated Values"""
    odds = horses['win_starting_price']
    prob_of_success = [price_to_prob(x) for x in horses['win_fair_price']]
    turnover = horses['turnover']
    ev = np.array([estimated_value(o, p, t) for o, p, t in zip(odds, prob_of_success, turnover)])
    horses['EV'] = ev

    """Update table with the Profit and Loss"""
    odds = horses['win_starting_price']
    outcome = horses['winner']
    turnover = horses['turnover']
    back_or_lay = horses['back_or_lay']
    profit = np.array([PnL(t, od, ou, b) for t, od, ou, b in zip(turnover, odds, outcome, back_or_lay)])
    horses['PnL'] = profit

    """Update table with the commission"""
    profit = horses['PnL']
    commission = np.array([calc_commission(p) for p in profit])
    horses['commission'] = commission

    """Update table with the net profit"""
    profit = horses['PnL']
    commission = horses['commission']
    net_profit = np.array([calc_net_profit(p, c) for p, c in zip(profit, commission)])
    horses['net_profit'] = net_profit

    """Update table with the Return on Investment"""
    profit = horses['net_profit']
    turnover = horses['turnover']
    RoI = np.array([calc_roi(p, t) for p, t in zip(profit, turnover)])
    horses['RoI'] = RoI

    """Calculate the sums of the recently computed metrics"""
    sum_pnl.append(np.sum(horses['PnL']))
    sum_commission.append(np.sum(horses['commission']))
    sum_net_pnl.append(np.sum(horses['net_profit']))
    sum_RoI.append((np.sum(horses['net_profit']) / np.sum(horses['turnover']) * np.float(100)))

logfile = open("experiments.txt","a+")

logfile.write("\n profit: " + str(sum_pnl) + " GBP")
logfile.write("\n commission: " + str(sum_commission) + " GBP")
logfile.write("\n net profit: " + str(sum_net_pnl) + " GBP")
logfile.write("\n return on investment: " + str(sum_RoI) + " %")

logfile.close()

import matplotlib.pyplot as plt

plt.hist(sum_pnl)
plt.title('Total Profit (GBP)')
plt.savefig('pnl_histogram.png')

plt.clf()
plt.hist(sum_commission)
plt.title('Total Commission (GBP)')
plt.savefig('commission_histogram.png')

plt.clf()
plt.hist(sum_net_pnl)
plt.title('Total Net Profit (GBP)')
plt.savefig('net_pnl_histogram.png')

plt.clf()
plt.hist(sum_RoI)
plt.title('Total Return On Investment (%)')
plt.savefig('roi_histogram.png')
