In [1]:
import numpy as np
import pandas as pd 

# custom pckgs (sourced from 229 ps1): 
from logreg import LogisticRegression
from util   import load_dataset

In [2]:
# load logreg data:
x_train, y_train = load_dataset('../data/xy_train_data.csv', add_intercept=True)
x_test, y_test   = load_dataset('../data/xy_test_data.csv', add_intercept=True)

In [3]:
# instantiate and fit model: 
model = LogisticRegression(step_size=1e-1, eps=1e-5, verbose=False)
model.fit(x_train, y_train) 

# generate predictions using model: 
preds = model.predict(x_test)

# bin preds using decision threshold = 0.5:
picks = ['away' if pred >= 0.5 else 'home' for pred in preds]

In [4]:
test = pd.read_csv('../data/test_data.csv')

# add `probs` column to dataframe:
test['preds'] = preds

# add the picks column to the dataframe:
test['strategy_picks'] = picks

# data preview:
test.head(5)

Unnamed: 0,home_avg_odds_hr_0,home_avg_odds_hr_1,home_avg_odds_hr_2,home_avg_odds_hr_3,home_avg_odds_hr_4,home_avg_odds_hr_5,home_avg_odds_hr_6,home_avg_odds_hr_7,home_avg_odds_hr_8,home_avg_odds_hr_9,...,away_avg_odds_hr_68,away_avg_odds_hr_69,away_avg_odds_hr_70,away_avg_odds_hr_71,winner,favorite,has_favorite_for_winner,closely_contested,preds,strategy_picks
0,110.0,-110.0,100.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,...,190.0,190.0,190.0,190.0,away,home,False,True,0.191402,home
1,-200.0,-200.0,-200.0,-200.0,-190.0,-160.0,-150.0,-170.0,-200.0,-200.0,...,280.0,270.0,270.0,280.0,home,home,True,False,0.549023,away
2,160.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,...,150.0,150.0,140.0,140.0,away,away,False,True,0.46458,home
3,-190.0,-190.0,-180.0,-180.0,-170.0,-170.0,-170.0,-170.0,-180.0,-190.0,...,420.0,420.0,420.0,400.0,home,home,True,False,0.295706,home
4,-140.0,-140.0,-140.0,-140.0,-140.0,-140.0,-150.0,-140.0,-140.0,-140.0,...,400.0,400.0,400.0,400.0,home,home,True,False,0.221232,home


In [5]:
# drop any unnecessary columns: 
test = test[['home_avg_odds_hr_71', 'away_avg_odds_hr_71', 'winner', 'favorite', 'has_favorite_for_winner', 'closely_contested', 'strategy_picks', 'preds']]

# create a winner_avg_odds column: 
test['winner_avg_odds_hr_71'] = np.where(
    test['winner'] == 'home', test['home_avg_odds_hr_71'], test['away_avg_odds_hr_71']
)

# create 'relative_underdog' column: 
test['relative_underdog'] = np.where(
    test['favorite'] == 'home', 'away', 'home'
)

# preview:
test.head(3)

Unnamed: 0,home_avg_odds_hr_71,away_avg_odds_hr_71,winner,favorite,has_favorite_for_winner,closely_contested,strategy_picks,preds,winner_avg_odds_hr_71,relative_underdog
0,110.0,190.0,away,home,False,True,home,0.191402,190.0,away
1,-140.0,280.0,home,home,True,False,away,0.549023,-140.0,away
2,150.0,140.0,away,away,False,True,home,0.46458,140.0,home


In [8]:
N_PROFITABLE_BETS = 0
N_TOTAL_BETS      = 0

NET_VALUE_OF_BETS_WON  = []
NET_VALUE_OF_BETS_LOST = []

UNDERDOG_BETS_HITS  = []
N_UNDERDOG_BETS_TOTAL = 0

CLEAR_WINNER_BETS_HITS = []
N_CLEAR_WINNER_BETS_TOTAL = 0

CLOSELY_CONTESTED_BETS_HITS = []
N_CLOSELY_CONTESTED_BETS_TOTAL = 0

balance = 1000

# iterate through dataframe: 
for row in test.itertuples(index=False):
    
    # if there is balance remaining: 
    if balance >= 0:
    
        # extract elements: 
        final_home_odds, final_away_odds, winner, favorite, has_favorite_for_winner, \
            closely_contested, strategy_pick, pred_prob, final_winner_odds, relative_underdog = row
            
         # update counts:
        if has_favorite_for_winner: N_CLEAR_WINNER_BETS_TOTAL += 1
        if closely_contested: N_CLOSELY_CONTESTED_BETS_TOTAL += 1
        
        # compute `pred` multiplier:
        # if betting on away (pred = [0.5, 1.0]), mulitplier = (pred - 0.5) / 0.5
        # if betting on home (pred = [0.0, 0.5)), multiplier = -1 * (pred - 0.5) / 0.5
        if pred_prob >= 0.5:
            pred_multiplier = (pred_prob - 0.5) / 0.5
        else: 
            pred_multiplier = -1 * (pred_prob - 0.5) / 0.5
        
        # if strategy bet on underdog: 
        if strategy_pick == relative_underdog: 
            N_UNDERDOG_BETS_TOTAL += 1
        
        # if pick was correct: 
        if winner == strategy_pick:
            N_PROFITABLE_BETS += 1
            
            if final_winner_odds >= 0: 
                NET_VALUE_OF_BETS_WON.append(final_winner_odds * pred_multiplier)
            if final_winner_odds < 0:
                NET_VALUE_OF_BETS_WON.append(100 * pred_multiplier)
            
            # if strategy correctly bet on underdog:
            if strategy_pick == relative_underdog:
                if final_winner_odds >= 0: 
                    UNDERDOG_BETS_HITS.append(final_winner_odds * pred_multiplier)
                if final_winner_odds < 0:
                    UNDERDOG_BETS_HITS.append(100 * pred_multiplier)
                    
            # if the strategy correctly bet in a 'clear winner' match:
            if has_favorite_for_winner:
                if final_winner_odds >= 0: 
                    CLEAR_WINNER_BETS_HITS.append(final_winner_odds * pred_multiplier)
                if final_winner_odds < 0:
                    CLEAR_WINNER_BETS_HITS.append(100 * pred_multiplier)
                    
            # if the strategy correctly bet in a 'closely contested' match:
            if closely_contested: 
                if final_winner_odds >= 0: 
                    CLOSELY_CONTESTED_BETS_HITS.append(final_winner_odds * pred_multiplier)
                if final_winner_odds < 0:
                    CLOSELY_CONTESTED_BETS_HITS.append(100 * pred_multiplier) 
                    
            # update balance: 
            if final_winner_odds >= 0: 
                balance += final_winner_odds * pred_multiplier
            if final_winner_odds < 0:
                balance += 100  * pred_multiplier
                                
        # if the pick was incorrect:
        elif winner != strategy_pick:
            if final_winner_odds >= 0: 
                NET_VALUE_OF_BETS_LOST.append(100 * pred_multiplier)
            if final_winner_odds < 0:
                NET_VALUE_OF_BETS_LOST.append(abs(final_winner_odds) * pred_multiplier)
                
            # update balance: 
            if final_winner_odds >= 0: 
                balance -= 100 * pred_multiplier
            if final_winner_odds < 0:
                balance -= abs(final_winner_odds) * pred_multiplier
                
        # update total bet count:
        N_TOTAL_BETS += 1

In [9]:
profitable_bet_rate       = N_PROFITABLE_BETS / N_TOTAL_BETS
overall_profit            = sum(NET_VALUE_OF_BETS_WON) - sum(NET_VALUE_OF_BETS_LOST)
largest_amt_won           = max(NET_VALUE_OF_BETS_WON)
smallest_amt_won          = min(NET_VALUE_OF_BETS_WON)
avg_amt_won               = sum(NET_VALUE_OF_BETS_WON) / len(NET_VALUE_OF_BETS_WON)
avg_amt_loss              = sum(NET_VALUE_OF_BETS_LOST) / len(NET_VALUE_OF_BETS_LOST)

underdog_bet_hr           = len(UNDERDOG_BETS_HITS) / N_UNDERDOG_BETS_TOTAL  if (N_UNDERDOG_BETS_TOTAL > 0) else 'no underdog bets made.'
avg_underdog_hit_winnings = sum(UNDERDOG_BETS_HITS) / len(UNDERDOG_BETS_HITS) if len(UNDERDOG_BETS_HITS) > 0 else 'no winning underdog bets made.'

clearwinner_bet_hr           = len(CLEAR_WINNER_BETS_HITS) / N_CLEAR_WINNER_BETS_TOTAL  if (N_CLEAR_WINNER_BETS_TOTAL > 0) else 'no clear favorite bets made.'
avg_clearwinner_hit_winnings = sum(CLEAR_WINNER_BETS_HITS) / len(CLEAR_WINNER_BETS_HITS) if len(CLEAR_WINNER_BETS_HITS) > 0 else 'no winning clear favorite bets made.'

closelycontested_bet_hr           = len(CLOSELY_CONTESTED_BETS_HITS) / N_CLOSELY_CONTESTED_BETS_TOTAL  if (N_CLOSELY_CONTESTED_BETS_TOTAL > 0) else 'no closely contested bets made.'
avg_closelycontested_hit_winnings = sum(CLOSELY_CONTESTED_BETS_HITS) / len(CLOSELY_CONTESTED_BETS_HITS) if len(CLOSELY_CONTESTED_BETS_HITS) > 0 else 'no winning closely contested bets made.'


metrics = {
    'profitable_bet_rate' : f'{profitable_bet_rate:.2f} ({N_PROFITABLE_BETS} / {N_TOTAL_BETS})',
    'overall_profit' : overall_profit,
    'largest_amt_won' : largest_amt_won,
    'smallest_amt_won' : smallest_amt_won,
    'avg_amt_won' : avg_amt_won,
    'avg_amt_loss' : avg_amt_loss,
    'underdog_bet_hr' : underdog_bet_hr, 
    'avg_underdog_hit_winnings' : avg_underdog_hit_winnings, 
    'clearwinner_bet_hr' : clearwinner_bet_hr,
    'avg_clearwinner_hit_winnings' : avg_clearwinner_hit_winnings,
    'closelycontested_bet_hr' : closelycontested_bet_hr,
    'avg_closelycontested_hit_winnings' : avg_closelycontested_hit_winnings
}

# report metrics
for metric in metrics.keys():    
    if type(metrics[metric]) is str: 
        print(f'{metric} :: {metrics[metric]}')
    else: 
        print(f'{metric} :: {metrics[metric]:.2f}')


profitable_bet_rate :: 0.65 (654 / 1000)
overall_profit :: 17652.90
largest_amt_won :: 108.24
smallest_amt_won :: 0.03
avg_amt_won :: 40.44
avg_amt_loss :: 25.41
underdog_bet_hr :: 0.36
avg_underdog_hit_winnings :: 14.44
clearwinner_bet_hr :: 0.76
avg_clearwinner_hit_winnings :: 59.57
closelycontested_bet_hr :: 0.56
avg_closelycontested_hit_winnings :: 17.81
