In [1]:
import pandas as pd
import numpy as np
import math
import ast
import warnings
warnings.filterwarnings("ignore")

def get_price_volume(row):
    best_bv = row.atb_ladder['v'][0]
    best_lv = row.atl_ladder['v'][0]
    return pd.Series([best_bv, best_lv])

def get_EP(row):
    best_BV = row.best_BV
    best_LV = row.best_LV
    best_BP = row.back_best
    best_LP = row.lay_best
    P_up = best_BV/(best_BV+best_LV)
    xprice = (best_BP *(1 - P_up)) + (best_LP * P_up)
    return pd.Series([xprice])

def get_spread(row):
    amt_on_market = row.traded_volume
    total_traded = row.total_volume
    traded_prices = row.traded_volume_ladder['p']
    actual_spread = row.best_LV - row.best_BV
    tick = get_tick((row.back_best+row.lay_best)/2)
    xprice = row.expected_price
    liquidity_ratio = amt_on_market/total_traded * 100
    stdev = np.std(traded_prices)
    spread = max(1, round(pow(math.e, -liquidity_ratio)*actual_spread/2*(1+stdev)))
    spread = (spread + 1)/2
    # for horses we want to back more
    back_BP = float('%.2f'%(tick_round(xprice + spread * tick))) # back price (7)
    back_LP = float('%.2f'%(tick_round(xprice - ((spread + 1) * tick)))) # lay price (8)
    # for horses we want to lay more
    lay_BP = float('%.2f'%(tick_round(xprice + ((spread + 1) * tick)))) # back price (9)
    lay_LP = float('%.2f'%(tick_round(xprice - (spread * tick)))) # lay price (10)
    
    return pd.Series([back_BP, back_LP, lay_BP, lay_LP])
# Function to calculate total liability on the k-th horse

def get_liability(row, tradebook):
    selection = row.selection_id
    selection = tradebook.loc[tradebook['selection_id'] == selection].iloc[0]
    tradebook['lay_v_sum'] = tradebook['lay_trades'].apply(sum_v_values)
    sum_X = tradebook['lay_v_sum'].sum() # Amount layed on all horses(X_i)
    tradebook['back_v_sum'] = tradebook['back_trades'].apply(sum_v_values)
    sum_Y = sum_X + tradebook['back_v_sum'].sum() # Amount backed and layed on all horses (Y_i)

    Y_k = selection.back_trades['v']  # Amounts betted on each back-order (Y_{k,l}) on the K'th horse
    BP_k = selection.back_trades['p'] # Back prices (BP_{k,l}) on the K'th horse
    X_k = selection.lay_trades['v']  # Amounts betted on each lay-order (X_{k,j}) on the K'th horse
    LP_k = selection.lay_trades['p']  # Lay prices (LP_{k,j}) on the K'th horse

    # Indicator function for the k-th horse win
    # I_k = 1/row.expected_price
    I_k = 1

    #changing ik gives interesting results, keeping it at 1 for now to assume if all horses will win we need to match their liability
    # Total liability calculation
    TL_k = sum_X - sum_Y + I_k * (sum(np.multiply(Y_k, BP_k)) - sum(np.multiply(X_k, LP_k)))
    #TL_K = sum(volume layed) - sum(volume layed and backed)
    # + W or Loss * (sum(if kth horse wins, return from lay and back) - sum(amount payable if kth horse wins))   
    '''
    if horse loses then liablity is how much is backed -b
    else if horse wins then liablity is backed -b +profit back -loss lay
    '''
    # print(TL_k)
    return pd.Series([TL_k])

def sum_p_values(row):
    return sum(row['p'])

def sum_v_values(row):
    return sum(row['v'])

def tick_round(price):
    tick = get_tick(price)
    return tick * round(price/tick)

def get_tick(price):
    if price <= 2:
        return 0.01
    elif price <= 3:
        return 0.02
    elif price <=4:
        return 0.05
    elif price <= 6:
        return 0.1
    elif price <= 10:
        return 0.2
    elif price <= 20:
        return 0.5
    elif price <=30:
        return 1
    elif price <= 50:
        return 2
    elif price <= 100:
        return 5
    else:
        return 10
    
def init_tradebook():
    df = pd.read_csv('test_data.csv') # 1 race only (distinct market_id)
    
    # Initialize the tradebook DataFrame
    tradebook = df[['selection_id', 'selection_name', 'win']].drop_duplicates()
    tradebook['back_orders'] = None
    tradebook['lay_orders'] = None
    tradebook['back_trades'] = None
    tradebook['lay_trades'] = None

    for col in ['back_orders', 'lay_orders', 'back_trades', 'lay_trades']:
        tradebook[col] = tradebook[col].apply(lambda x: {'p': [], 'v': []})
    return tradebook

def bet_apply_commission(df, com = 0.05):

    # Total Market GPL
    df['market_gpl'] = df.groupby('market_id')['gpl'].transform(sum)

    # Apply 5% commission
    df['market_commission'] = np.where(df['market_gpl'] <= 0, 0, 0.05 * df['market_gpl'])

    # Sum of Market Winning Bets
    df['floored_gpl'] = np.where(df['gpl'] <= 0, 0, df['gpl'])
    df['market_netwinnings'] = df.groupby('market_id')['floored_gpl'].transform(sum)

    # Partition Commission According to Selection GPL
    df['commission'] = np.where(df['market_netwinnings'] == 0, 0, (df['market_commission'] * df['floored_gpl']) / (df['market_netwinnings']))

    # Calculate Selection NPL
    df['npl'] = df['gpl'] - df['commission']

    # Drop excess columns
    df = df.drop(columns = ['floored_gpl', 'market_netwinnings', 'market_commission', 'market_gpl'])

    return(df)

In [61]:
def stream(input, tradebook):
    # print(input.atb_ladder)
    input['atb_ladder'] = [ast.literal_eval(x) for x in input['atb_ladder']]
    input['atl_ladder'] = [ast.literal_eval(x) for x in input['atl_ladder']]
    input['traded_volume_ladder'] = [ast.literal_eval(x) for x in input['traded_volume_ladder']]

    input[['best_BV', 'best_LV']] = input.apply(lambda row: get_price_volume(row), axis=1, result_type='expand')
    input[['expected_price']] = input.apply(lambda row: get_EP(row), axis=1, result_type='expand')
    input[['total_volume']] = input['traded_volume'].sum()
    input[['lay_BP', 'lay_LP', 'back_BP', 'back_LP']] = input.apply(lambda row: get_spread(row), axis=1, result_type='expand')
    
    input['favorites'] = input['bsp'].rank().astype(int)
    # print(input)
    # print(np.sum(1/input['expected_price'].to_numpy()))
    capital = 1
    input.apply(lambda row: trade(row, tradebook, capital), axis=1)

    # limit:= multiplying the overround with the wagered amount

def trade(row, tradebook, capital):
    '''trade/row'''
    n_horses = len(tradebook)
    # do not trade for least favorite horse
    if row.favorites == n_horses:
        # print(f"not trading: {row.selection_id}")
        return
    
    row['liability'] = get_liability(row, tradebook)

    selection = row['selection_id']
    selection = tradebook.loc[tradebook['selection_id'] == selection].iloc[0]

    # table 18 model behaviour
    # liability_k = get_liability(_____, tradebook) # not sure 

    # overround = np.sum(1/input['expected_price'].to_numpy()) - 1 # should be the implied probability, not sure to use expected price or bsp?
    # ^ probably use XP, bsp is unknown until the start of the race
    # limit = overround * X # wagered amount, not sure how to get this

    # behaviour = 'backing' if liability_k < 0 else ('laying' if liability_k > limit else 'do not trade')
    
    # ORDER SIZING
    # top 3 favorite horses get 75% of the capital
    if row.favorites <= 3:
        proportion = 0.75 / 3
    else:
        # rest: allocate the remaining 25% with decreasing sequence
        remaining_horses = n_horses - 3
        proportion = (0.25 / remaining_horses) * (n_horses - row.favorites)
    # print(row.favorites, proportion)
    stake = capital * proportion

    if row.liability.values[0] < 0:
        # print('prefer to back')  # Submit the order
        selection.back_orders['p'].append(row.back_BP)
        selection.back_orders['v'].append(stake)
    elif row.liability.values[0] > 1000:
        # if greater than limit lay
        # print('prefer to lay')
        selection.lay_orders['p'].append(row.lay_LP)
        selection.lay_orders['v'].append(stake)
    else:
        # print('trade')
        # otherwise trade both sides with largest spread  # Submit the order
        selection.back_orders['p'].append(row.back_BP)
        selection.back_orders['v'].append(stake)

        selection.lay_orders['p'].append(row.lay_LP)
        selection.lay_orders['v'].append(stake)

    for idx, back in enumerate(selection.back_orders['p']):
        if row.back_best >= back:
            selection.back_trades['p'].append(back)
            selection.back_trades['v'].append(selection.back_orders['v'][idx])
            selection.back_orders['p'].pop(idx)
            selection.back_orders['v'].pop(idx)
            #append to trade book

    for idx, lay in enumerate(selection.lay_orders['p']):
        if row.lay_best <= lay:
            selection.lay_trades['p'].append(lay)
            selection.lay_trades['v'].append(selection.lay_orders['v'][idx])
            selection.lay_orders['p'].pop(idx)
            selection.lay_orders['v'].pop(idx)


In [62]:
# initialise trade book
tradebook = init_tradebook()
chunksize = len(tradebook['selection_id'].unique().tolist()) # get num runners in market

# start trading for each tick in the market stream
for chunk in pd.read_csv('test_data.csv', chunksize=chunksize):
    stream(chunk, tradebook)
    # print(chunk)
    # print(tradebook)
    # break
tradebook = tradebook.drop('back_orders', axis=1)
tradebook = tradebook.drop('lay_orders', axis=1)
# tradebook.to_csv('test_tradebook.csv')

In [67]:
tradebook.to_csv('test_tradebook_with_order_sizing.csv', index=False)

In [47]:
def trade2(df, stake = 1, back_odds = 'bsp', lay_odds = 'bsp'):

    """
    Betting DF should always contain: expected_price, and win (binary encoded), and the specified odds column columns
    """
    df['atb_ladder'] = [ast.literal_eval(x) for x in df['atb_ladder']]
    df['atl_ladder'] = [ast.literal_eval(x) for x in df['atl_ladder']]
    df['traded_volume_ladder'] = [ast.literal_eval(x) for x in df['traded_volume_ladder']]
    df[['best_BV', 'best_LV']] = df.apply(lambda row: get_price_volume(row), axis=1, result_type='expand')
    df[['expected_price']] = df.apply(lambda row: get_EP(row), axis=1, result_type='expand')
    df[['total_volume']] = df['traded_volume'].sum()
    df[['lay_BP', 'lay_LP', 'back_BP', 'back_LP']] = df.apply(lambda row: get_spread(row), axis=1, result_type='expand')
    
    df['bet_side'] = np.where((df["lay_BP"] >= df[back_odds]) & (df["lay_LP"] <= df[lay_odds]),
                            "P", # PUSH
                            np.where(
                                df["lay_BP"] < df[back_odds],
                                "B",
                                "L"
                            )
                       )

    df['stake'] = np.where(df['bet_side'] == "P", # PUSH
                           0,
                           np.where(
                             df['bet_side'] == "B",
                             ( (1 / df['back_BP']) - (1 / df[back_odds]) ) / (1 - (1 / df[back_odds])),
                             ( (1 / df[lay_odds]) - (1 / df['back_LP']) ) / (1 - (1 / df[lay_odds])),
                           )
                          )

    df['gpl'] = np.where(df['bet_side'] == "B", 
                         np.where(df['win'] == 1, df['stake'] * (df[back_odds]-1), -df['stake']), # PL for back bets
                         np.where(df['win'] == 1, -df['stake'] * (df[lay_odds]-1), df['stake']) # PL for lay bets
                        )

    # Apply commission and NPL
    # df = bet_apply_commission(df, com=0)

    return(df)

# Create simple PL and POT table
def bet_eval_metrics(d, side = False):

    if side:
        metrics = (d
         .groupby('bet_side', as_index=False)
         .agg({"npl": "sum", "stake": "sum"})
         .assign(pot=lambda x: x['npl'] / x['stake'])
        )
    else:
        metrics = pd.DataFrame(d
         .agg({"npl": "sum", "stake": "sum"})
        ).transpose().assign(pot=lambda x: x['npl'] / x['stake'])

    return(metrics[metrics['stake'] != 0])

# Cumulative PL by market to visually see trend and consistency
def bet_eval_chart_cPl(d):

    d = (
        d
        .groupby('market_id')
        .agg({'npl': 'sum'})
    )

    d['market_number'] = np.arange(len(d))
    d['cNpl'] = d.npl.cumsum()

    chart = px.line(d, x="market_number", y="cNpl", title='Cumulative Net Profit', template='simple_white')

    return(chart)

# Initialize an empty DataFrame to hold the results
bets = pd.DataFrame()

# Iterate over each group (each race)
for chunk in pd.read_csv('test_data.csv', chunksize=chunksize):
    # print(len(race_df['selection_id'].unique()))
    # Process the race using bet_random
    processed_race = trade2(chunk, stake=1)
    # Append the processed race to the master result DataFrame
    bets = pd.concat([bets, processed_race], ignore_index=True)

bet_eval_metrics(bets, side = True)

Unnamed: 0,bet_side,npl,stake,pot
0,B,-31.392976,32.615748,-0.96251
1,L,6.800781,25.761925,0.263986


In [63]:
def calculate_back_lay_profit_liability(df):
    '''
    if horse loses then liablity is how much is backed -b + lay profit
    else if horse wins then liablity is +back profit -loss lay
    '''

    #changed label from profit to return because we are summing that column

    df['back_liability'] = 0
    df['back_return'] = 0
    df['lay_liability'] = 0
    df['lay_return'] = 0
    
    for index, row in df.iterrows():
        back_prices = np.array(row['back_trades']['p'])
        back_volumes = np.array(row['back_trades']['v'])
        
        lay_prices = np.array(row['lay_trades']['p'])
        lay_volumes = np.array(row['lay_trades']['v'])
        
        back_liability = sum(back_volumes)
        if row['win'] == 1:
            back_return = np.dot(back_prices, back_volumes) - sum(back_volumes)
            #profit does not include outlay, ie. $1@2.00 = $2 return - $1 stake => $1 profit
        else:
            back_return = -back_liability # lose your backing stake
        
        lay_return = sum(lay_volumes)
        if row['win'] == 1:
            lay_liability = np.dot(lay_volumes, (lay_prices - 1))
        else:
            lay_liability = 0 # if horse loses then no liablity
        
        df.at[index, 'back_liability'] = back_liability
        df.at[index, 'back_return'] = back_return
        df.at[index, 'lay_liability'] = lay_liability
        df.at[index, 'lay_return'] = lay_return
    
    return df
# print(tradebook)
summary = calculate_back_lay_profit_liability(tradebook)
summary

Unnamed: 0,selection_id,selection_name,win,back_trades,lay_trades,lay_v_sum,back_v_sum,back_liability,back_return,lay_liability,lay_return
0,64318416,1. Border Leicester,1,"{'p': [8.4, 9.4, 9.4, 9.2, 9.4, 9.0, 9.0, 8.6,...","{'p': [8.6, 8.6, 9.6, 9.4, 9.4, 9.4, 8.6, 8.6,...",7.5,44.25,44.25,279.15,55.6,7.5
1,26982063,8. Serenade The Stars,0,"{'p': [], 'v': []}","{'p': [], 'v': []}",0.0,0.0,0.0,0.0,0.0,0.0
2,28092060,7. Guns Ablazing,0,"{'p': [8.2, 8.2, 8.2, 8.2], 'v': [0.25, 0.25, ...","{'p': [7.8, 7.8, 7.6, 7.6, 7.6, 7.2], 'v': [0....",1.5,1.0,1.0,-1.0,0.0,1.5
3,6465237,6. Bon Appetit,0,"{'p': [5.9, 5.8, 5.9, 5.8, 5.9, 5.9, 6.0, 6.0,...","{'p': [7.0, 6.8, 6.8, 6.8, 6.8, 6.8, 6.8, 6.6,...",14.8,91.6,91.6,-91.6,0.0,14.8
4,23947555,4. Flambeur,0,"{'p': [13.5, 13.5, 13.5, 13.5, 13.5, 13.5, 13....","{'p': [17.0, 15.5, 15.0, 15.0, 15.0, 15.0, 15....",30.15,15.3,15.3,-15.3,0.0,30.15
5,36894802,3. Mr Blackwood,0,"{'p': [26.0, 26.0, 26.0, 26.0, 21.0, 18.0, 19....","{'p': [24.0, 24.0, 24.0, 24.0, 22.0, 20.0, 20....",1.1,26.6,26.6,-26.6,0.0,1.1
6,66294934,2. Galilaeus,0,"{'p': [2.18, 2.22, 2.2, 2.24, 2.1, 2.14, 2.14,...","{'p': [2.24, 2.22, 2.14, 2.2, 2.24, 2.1, 2.2, ...",30.0,125.5,125.5,-125.5,0.0,30.0
7,41510961,9. Sir Peter,0,"{'p': [55.0, 60.0, 60.0, 70.0, 70.0, 80.0, 70....","{'p': [75.0, 75.0, 70.0, 65.0, 70.0, 70.0, 70....",3.15,5.9,5.9,-5.9,0.0,3.15


In [64]:
back_return = summary['back_return'].sum()
lay_return = summary['lay_return'].sum()
total_return = back_return + lay_return
total_return

101.44999999999908

In [65]:
total_trades = (summary['back_v_sum'] + summary['lay_v_sum']).sum()
total_trades

398.35000000000076

In [66]:
total_return/total_trades * 100

25.467553658842444