In [1]:
import numpy as np
import pandas as pd
from joblib import load
import os
DIR_NAME = os.getcwd()
TEST_YR = '18-19'
BET_COLS = ['B365H','B365D','B365A']

In [2]:
clf = load(DIR_NAME + '/Mods/mod.joblib')
full_df = pd.read_csv(DIR_NAME + '/Data/' + 'Feats.csv', index_col=[0,1])
X = full_df.iloc[:,1:5]
y = full_df['FTR']
mask = full_df.index.get_level_values(0) != TEST_YR
X_test = X[~mask]
y_test = pd.Series(y[~mask].values, name='Result')
odds_df = full_df[~mask].loc[:,BET_COLS]

In [3]:
def gen_betting_df(clf, X_test, y_test, odds_df):
    '''
    Computes the probabilities, predictions and amount wagered, according to the kelly criterion, for the test set.
    Stores results in DataFrame, along with the bookmakers' odds associated with the outcome bet on.
    Parameters:
    clf (estimator): the best classifier
    X_test (np Array): the test features
    y_test (np Array): the test labels
    odds_df (DataFrame): Dataframe containing the odds associated with the 3 possible outcomes of a match
    Returns:
    betting_df (Dataframe): Dataframe as described above
    '''
    probs = pd.Series(clf.predict_proba(X_test).max(axis=1), name='Prob')
    preds = pd.Series(clf.predict(X_test), name='Prediction')
    odds = pd.Series(np.where(preds == 'H', odds_df['B365H'], odds_df['B365A']), name='Odds')
    kelly_wager = pd.Series(probs - (1-probs)/(odds-1), name='Wager')
    return pd.concat((probs, preds, odds, kelly_wager, y_test),axis=1)
    
    

In [4]:
bet_df = gen_betting_df(clf, X_test, y_test, odds_df)

In [5]:
mask = (bet_df['Wager'] > 0) & (bet_df['Prob'] > .75)
total_wagered = bet_df[mask]['Wager'].sum()
final_value = sum(np.where(bet_df[mask]['Prediction']==bet_df[mask]['Result'], bet_df[mask]['Wager']*bet_df[mask]['Odds'], -bet_df[mask]['Wager']))

In [6]:
(final_value - total_wagered) / total_wagered

0.42096616131499115