In [37]:
import json
import os
import numpy as np
import pandas as pd

def get_file_paths(directory_path):
    file_paths = []

    if not os.path.exists(directory_path):
        return file_paths
    
    for root, dirs, files in os.walk(directory_path):
        for file in files:
            file_paths.append(os.path.join(root, file))
    return file_paths

import json
import numpy as np
import pandas as pd

def open_data(filepath):
    with open(filepath, 'r') as file:
        data = json.load(file)
    return data

def find_best_odds(df):
    team1_cols = [col for col in df.columns if 'team1' in col]
    team2_cols = [col for col in df.columns if 'team2' in col]

    def max_with_source(lst, source_lst):
        max_value, max_source = None, None
        for i, value in enumerate(lst):
            if value is not None and (max_value is None or value > max_value):
                max_value, max_source = value, source_lst[i]
        return max_value, max_source

    df['best_team1_odds'], df['best_team1_source'] = zip(*df.apply(
        lambda row: max_with_source([row[col] for col in team1_cols], team1_cols), axis=1))
    df['best_team2_odds'], df['best_team2_source'] = zip(*df.apply(
        lambda row: max_with_source([row[col] for col in team2_cols], team2_cols), axis=1))

    return df[['match', 'best_team1_odds', 'best_team1_source', 'best_team2_odds', 'best_team2_source']]

def arbitrage(row):
    implied_team1 = 1 / float(row['best_team1_odds'])
    implied_team2 = 1 / float(row['best_team2_odds'])
    total = implied_team1 + implied_team2
    return round((1 - total)*100,2) if total < 1 else 0


def get_names(data):
    names = []
    for d in data:
        match_names = [x['id'] for x in d]
        names.extend(match_names)
    return np.unique(names)


In [40]:

bookie_names = []
data = []
filepaths = get_file_paths('data/rugby_union')

for path in filepaths:
    data.append(open_data(path))
    bookie_names.append(path.split('/')[-1].split('.')[0])
    
match_names = get_names(data)

df = pd.DataFrame({'match': match_names})

for d, name in zip(data, bookie_names):
    team1_dict = {x['id']: x['odds'][0] for x in d}
    team2_dict = {x['id']: x['odds'][2] for x in d}

    team1 = [team1_dict.get(match) for match in match_names]
    team2 = [team2_dict.get(match) for match in match_names]

    df[f'{name}_team1'] = team1
    df[f'{name}_team2'] = team2 

best_odds = find_best_odds(df).copy()
best_odds['%_profit'] = best_odds.apply(lambda row: arbitrage(row), axis=1)
mask = best_odds['%_profit'] != 0
best_odds[mask]



Unnamed: 0,match,best_team1_odds,best_team1_source,best_team2_odds,best_team2_source,%_profit
16,Hurricanes Reds,1.3,TAB_team1,4.4,sportsbet_team2,0.35


In [None]:
def payout(s1, s2, odds):


In [125]:
# implied_probs = [0.7692307692307692, 0.22727272727272727]
# odds = [1.3, 4.4]

#return bet sizes that are arbitrage oportunities and are whole number multiples of 5
#this is to avoid possible account suspension
#start at 5,5 then incement the value at the index of the smallest implied probability - Switch to incrementing the other
# when the ration of this value to the total is greater than the implied prob of likely outcome
def real_bets(implied_probs, odds):
    # get the max idex of the max prob for the algo later on
    if implied_probs[0] > implied_probs[1]:
        max_index = 0
        min_index = 1
    else:
        max_index = 1
        min_index = 0

    stakes = [5,5]
    winning_stakes = []
    opp = False
    while(sum(stakes) < 150 ):
        total = sum(stakes)
        payout_max = stakes[max_index] * odds[min_index]
        payout_min = stakes[min_index] * odds[max_index]

        #Arbitrage case so add to winning_stakes
        if payout_max >= total and payout_min >= total:
            opp = True
            winning_stakes.append(stakes.copy())

        #core of the algorithm
        if stakes[min_index] / total < implied_probs[max_index]:
            stakes[min_index] += 5
        else:
            stakes[max_index] += 5
    
    #flip the inner arrays so they match up with odds order
    for i, s in enumerate(winning_stakes):
        winning_stakes[i] = s[::-1]

    return winning_stakes


In [127]:
real_bets([0.7692307692307692, 0.22727272727272727], [1.3, 4.4])

[[50, 15], [85, 25], [100, 30]]