In [5]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import glob 
import re

### Procedure for getting relevant files
Go to Microsoft Storage Explorer and navigate to the following filepath: fbg-quants-core > Storage Accounts > quantsdata > Blob Containers > models-blending > PreGameDeriverAudits > {select relevant version} > Groups

There should be 45 files for each blender version, download all of them as CSVs. Create a folder called PreGameDeriverAudits and within it another folder for the specific model versions that you want to test. Add the CSVs for each model version to the relevant folders

To change this configuration for windows, the folder filepath needs to be changes to a Windows-one and all /'s need to be replaced to \'s

In [17]:
folder_filepath = "/Users/sara.hippmann/Documents/PreGameDeriverAudits"
model1_version = "NFLWithBets"
model2_version = "NFLNoBets"

In [18]:
file_paths1 = glob.glob(f"{folder_filepath}/{model1_version}/PreGameBlenderOnBets-*.csv")
group_data1 = {}
pair_data1 = {}

for path in file_paths1:
    filename = path.split("/")[-1]
    parts = filename.replace(".csv", "").split("-")[1:]

    if len(parts) == 1:
        group = parts[0]
        group_data1[group] = pd.read_csv(path)
    elif len(parts) == 2:
        g1, g2 = parts
        pair_data1[(g1, g2)] = pd.read_csv(path)

In [19]:
file_paths2 = glob.glob(f"{folder_filepath}/{model2_version}/PreGameBlenderOnBets-*.csv")
group_data2 = {}
pair_data2 = {}

for path in file_paths2:
    filename = path.split("/")[-1]
    parts = filename.replace(".csv", "").split("-")[1:]

    if len(parts) == 1:
        group = parts[0]
        group_data2[group] = pd.read_csv(path)
    elif len(parts) == 2:
        g1, g2 = parts
        pair_data2[(g1, g2)] = pd.read_csv(path)

In [20]:
def kelly_value(implied_prob, predicted_prob):
    if predicted_prob == 0 or predicted_prob == 1:
        return np.nan
    return implied_prob * np.log(implied_prob / predicted_prob) + (1 - implied_prob) * np.log((1-implied_prob) / (1 - predicted_prob))

In [21]:
def kelly_value_std_dev(implied_prob, predicted_prob):
    if predicted_prob == 0 or predicted_prob == 1:
        return np.nan
    return np.sqrt(implied_prob * np.power(np.log(implied_prob / predicted_prob) - kelly_value, 2) + (1 - implied_prob) * np.power(np.log((1 - implied_prob) / (1 - predicted_prob)) - kelly_value, 2))

In [22]:
def kelly_sharpe(implied_prob, predicted_prob, mean, std_dev):
    if predicted_prob > implied_prob:
        sign = 1
    else:
        sign = -1
    return sign * (mean / std_dev)

In [23]:
for key, df in group_data1.items():
    df.columns = ['Market Type', 'Line', 'Minutes Until Off', 'Update Type', 'Consecutive Bets', 'Consecutive Prices', 'Bookmakers', 'Bet Count', 'Risk Limit', '% of Overs', 'Probability Band', 'Actual', 'Expected', 'Weight-1', 'Implied-1', 'Predicted-1']
    #columns need to be adapted to include errors
    group_data1[key] = df[[key, 'Weight-1', 'Implied-1', 'Predicted-1']] 

In [24]:
for key, df in pair_data1.items():
    df.columns = ['Market Type', 'Line', 'Minutes Until Off', 'Update Type', 'Consecutive Bets', 'Consecutive Prices', 'Bookmakers', 'Bet Count', 'Risk Limit', '% of Overs', 'Probability Band', 'Actual', 'Expected', 'Weight-1', 'Implied-1', 'Predicted-1']
    #columns need to be adapted to include errors
    pair_data1[key] = df[[key[0], key[1], 'Weight-1', 'Implied-1', 'Predicted-1']]

In [25]:
for key, df in group_data2.items():
    df.columns = ['Market Type', 'Line', 'Minutes Until Off', 'Update Type', 'Consecutive Bets', 'Consecutive Prices', 'Bookmakers', 'Bet Count', 'Risk Limit', '% of Overs', 'Probability Band', 'Actual', 'Expected', 'Weight-2', 'Implied-2', 'Predicted-2']
    #columns need to be adapted to include errors
    group_data2[key] = df[[key, 'Weight-2', 'Implied-2', 'Predicted-2']] 

In [26]:
for key, df in pair_data2.items():
    df.columns = ['Market Type', 'Line', 'Minutes Until Off', 'Update Type', 'Consecutive Bets', 'Consecutive Prices', 'Bookmakers', 'Bet Count', 'Risk Limit', '% of Overs', 'Probability Band', 'Actual', 'Expected', 'Weight-2', 'Implied-2', 'Predicted-2']
    #columns need to be adapted to include errors
    pair_data2[key] = df[[key[0], key[1], 'Weight-2', 'Implied-2', 'Predicted-2']]

In [27]:
print(group_data1['Bet Count'])

   Bet Count      Weight-1  Implied-1  Predicted-1
0          0  2.543241e+07   0.476567     0.476600
1          1  2.555621e+06   0.498120     0.475407
2      [2-3]  2.853749e+06   0.472810     0.476837
3      [4-6]  2.614423e+06   0.502261     0.476165
4     [7-10]  2.143540e+06   0.495631     0.477768
5    [11-16]  2.116426e+06   0.489643     0.477788
6    [17-25]  2.003949e+06   0.497631     0.478342
7    [26-41]  1.901589e+06   0.506584     0.480014
8    [42-78]  1.867072e+06   0.509076     0.482847
9  [79-4687]  1.804154e+06   0.512516     0.487422


In [28]:
print(group_data2['Bet Count'])

   Bet Count      Weight-2  Implied-2  Predicted-2
0          0  2.543241e+07   0.476567     0.476600
1          1  2.555621e+06   0.498120     0.475338
2      [2-3]  2.853749e+06   0.472811     0.476656
3      [4-6]  2.614423e+06   0.502261     0.475890
4     [7-10]  2.143540e+06   0.495631     0.477298
5    [11-16]  2.116427e+06   0.489643     0.477131
6    [17-25]  2.004162e+06   0.497579     0.477044
7    [26-41]  1.901990e+06   0.506477     0.478122
8    [42-77]  1.835765e+06   0.508895     0.479982
9  [78-4687]  1.835637e+06   0.512578     0.481669
