# IMPORTS

In [249]:
import time 

import random as rd
import pandas as pd 
import numpy  as np
import util 

import torch
import torch.nn as nn 
import torch.nn.functional as F 

# CONSTANTS

In [250]:
NUM_INPUTS       = 144 + 1  # num. network inputs.
NUM_OUTPUTS      = 2        # num. network outputs (1 per state, i.e. bet home/away).
N_ODDS_PER_TEAM  = 72       # num. odds figures per team. 
eta              = 1e-3     # optimizer learning rate.
discount_factor  = 0.95     # discount factor for rl update.
max_train_eps    = 1000     # max. number of training episodes. 
num_exps_to_copy = 20000    # num. exps. per train/target network sync.

# NEURAL NETWORK CLASS

In [251]:
# class to represent the deep, fully-connected neural network: 
class DeepQNN(nn.Module): 
    def __init__(self):
        super().__init__()
        # instantiate the model, which consists of a three layer 
        # (input-hidden-output) structure. 
        self.ll1 = nn.Linear(NUM_INPUTS, NUM_INPUTS)
        self.ll2 = nn.Linear(NUM_INPUTS, NUM_OUTPUTS)
        
        # initialize layers: 
        nn.init.xavier_uniform_(self.ll1.weight)
        nn.init.xavier_uniform_(self.ll2.weight)
    
    def forward(self, state):
        x1  = F.relu(self.ll1(state))
        out = self.ll2(x1)
        return out

# DATA

In [252]:
# load data:
x_train, y_train = util.load_dataset('../data/logreg_train_data.csv')

# state generator function: 
def get_curr_and_next_state():
    # track idx: 
    idx = 0
    while idx < len(x_train):
        # get current state and final winner odds: 
        curr_state, curr_lbl = x_train[idx], y_train[idx]
        final_winner_odds = curr_state[-1] if curr_lbl == 1 else curr_state[N_ODDS_PER_TEAM - 1]
        # get some random next state, it doesn't matter: 
        rd.seed(229)
        next_state_idx = rd.randint(0, len(x_train) - 1)
        next_state, next_lbl = x_train[next_state_idx], y_train[next_state_idx]
        # yield result: 
        yield (curr_state, next_state, curr_lbl, next_lbl, final_winner_odds)
        # increment the index: 
        idx += 1  
    
# ex. of how to use the generator: 
# state_gen = get_curr_and_next_state()
# curr_state, next_state, curr_lbl, next_lbl = next(state_gen)

# TRAINING LOOP SET-UP

In [253]:
# take over any available gpus: 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# init. the networks. convert to float. 
train_net, target_net = DeepQNN().to(device).float(), DeepQNN().to(device).float()

# init. optimizer: 
optimizer = torch.optim.SGD(train_net.parameters(), lr=eta)

# init. loss fn: 
loss_fn = nn.MSELoss()

# track total experiences: 
num_exps = 0 

# MAIN TRAINING LOOP

In [254]:
# set epsilon var: 
e = 0.3

# log training time: 
time_start = time.time()

# main training loop: 
for ep_idx in range(max_train_eps):
    
    # refresh loss log, balance, state generator:  
    loss_log = []
    balance = 6000     
    state_gen = get_curr_and_next_state()
    
    # episode concludes when balance <= 0 or all training states visited: 
    while (balance > 0):
        
        # acquire next state: 
        curr_state, next_state, curr_lbl, next_lbl, final_winner_odds = next(state_gen, (None, None, None, None, None))
        
        # only proceed with training if next training state is
        # non-visited (i.e., != None): 
        if curr_state is not None: 
            
            # STATE: concatenate the balance to `curr_state` to make the 
            # complete state vector and convert to tensor:
            curr_state = np.append(curr_state, [balance])
            curr_state = torch.from_numpy(curr_state).to(device)
            
            # ACTION: generate action (i.e., team to bet on and assoc. 
            # pred. prob from 0 to 1) for this state using `target_net`:
            
            # first compute q-val of each action (i.e., logits for betting
            # home or away, output from sigmoid): 
            q_vals = target_net(curr_state.float()).cpu().data.numpy()
            
            # normalize q-vals to produce prediction probabilities: 
            tot = sum(q_vals)
            pred_probs = [q_val / tot for q_val in q_vals]
            
            # identify the bet-upon team: 
            pred_team = np.argmax(pred_probs)
            
            # apply epsilon-greedy: 
            epsilon_prob = np.random.uniform()
            if epsilon_prob < e: 
                pred_team = rd.randint(0, 1)
            
            # extract associated `pred_prob``:
            pred_prob = pred_probs[pred_team]
        
            # REWARD: compute reward (i.e., winnings/losings) depending on 
            # `pred_prob` and whether `pred_team` == `curr_lbl`:   
            reward = 0
            pred_multiplier = (pred_prob - 0.5) / 0.5 if pred_prob >= 0.5 else -1 * (pred_prob - 0.5) / 0.5
            
            if pred_team == curr_lbl: 
                winnings = final_winner_odds * pred_multiplier if final_winner_odds >= 0 else 100 * pred_multiplier
                reward   = winnings
                balance += winnings  
            else: 
                losings  = 100 * pred_multiplier if final_winner_odds >= 0 else abs(final_winner_odds) * pred_multiplier
                reward   = -1 * losings
                balance -= winnings
            
            # NEXT STATE: concatenate the updated balance to the next
            # state array and convert to tensor: 
            next_state = np.append(next_state, [balance])
            next_state = torch.from_numpy(next_state).to(device)
            
            # STATE TERMINAL: state is terminal if balance <= 0:
            terminal = 1 if balance <= 0 else 0 
            
            # UPDATE NETWORK WEIGHTS: determine max q-value assoc. with
            # next state and use as target value for q-update. 
            
            # re-compute q-vals for current episode: 
            q_vals = train_net(curr_state.float())
            
            # compute the maximum q-vals for next state: 
            max_sp_q_val = target_net(next_state.float()).max(-1).values 
            
            # compute the q-update target value:
            targets = reward + (1 - terminal) * discount_factor * max_sp_q_val
            
            # update the q-values to only retain those corresponding to 
            # the executed action (i.e., which team was bet upon): 
            action_mask = F.one_hot(torch.tensor(np.array([pred_team], dtype='int64'), device=device), num_classes=2)
            # apply mask and collapse tensor for scalar comps: 
            q_vals = (action_mask * q_vals).sum(-1) 
             
            # compute loss and backprop! 
            loss = loss_fn(q_vals, targets.detach().view(1))
            optimizer.zero_grad()
            loss.backward()
            # clip gradients to prevent exploding loss:
            torch.nn.utils.clip_grad_norm_(train_net.parameters(), 1.)
            optimizer.step()
            
            # record loss: 
            loss_log.append(loss)
            
            # update experience count: 
            num_exps += 1 
            
            # copy training network weights to target network if 
            # `num_exps_to_copy` reached: 
            if num_exps % num_exps_to_copy == 0: 
                print('syncing network weights...')
                target_net.load_state_dict(train_net.state_dict())
        
        # if all training states visited, break current loop:  
        else:     
            break 
    
    # report episode count, final balance, avg. loss:
    print(f'episode {ep_idx}: ending balance :: {balance:.2f}, average exp. loss :: {sum(loss_log) / len(loss_log):.2e}')
    
    # decay epsilon before next training episode if it is still above 
    # some threshold: 
    if e > 0.05:
        e -= -0.001
            
# log training end time: 
time_end = time.time() 
print(f'training complete! took approx. {round((time_end - time_start) / 60, 2):.3f} minutes.')

# save model parameters: 
print('saving model parameters...')
torch.save(target_net.state_dict(), './trained_dqnn.params')

episode 0: ending balance :: -20776.80, average exp. loss :: 1.24e+06
episode 1: ending balance :: -1059.25, average exp. loss :: 2.27e+05
episode 2: ending balance :: -273.47, average exp. loss :: 5.38e+03
episode 3: ending balance :: -273.47, average exp. loss :: 4.82e+03
episode 4: ending balance :: -589.06, average exp. loss :: 1.81e+05
episode 5: ending balance :: -11.28, average exp. loss :: 5.55e+05
episode 6: ending balance :: -10.95, average exp. loss :: 3.70e+04
episode 7: ending balance :: -1054.24, average exp. loss :: 1.58e+05
episode 8: ending balance :: -13886.14, average exp. loss :: 5.72e+06
episode 9: ending balance :: -1108.82, average exp. loss :: 6.06e+07
episode 10: ending balance :: -17.28, average exp. loss :: 2.89e+05
episode 11: ending balance :: -1492.88, average exp. loss :: 6.05e+04
episode 12: ending balance :: -11.05, average exp. loss :: 8.09e+03
episode 13: ending balance :: 9555.77, average exp. loss :: 8.97e+05
episode 14: ending balance :: 110273.30,

# TEST SET EVALUATION

In [258]:
trained_model = DeepQNN().to(device=device)
trained_model.load_state_dict(torch.load('./trained_dqnn.params'))

<All keys matched successfully>

In [259]:
x_test, y_test = util.load_dataset('../data/logreg_test_data.csv', True)

balance = 1000

preds = []
picks = []

for entry_idx in range(len(x_test)):
    # extract entry:
    curr_state, curr_lbl = x_test[entry_idx], y_test[entry_idx]
    
    # compute q-vals & normalize for probs:
    curr_state = np.append(curr_state, [balance])
    curr_state = torch.from_numpy(curr_state).to(device)
    
    q_vals = trained_model(curr_state.float()).cpu().data.numpy()
    
    tot = sum(q_vals)
    pred_probs = [q_val / tot for q_val in q_vals]
    
    # identify the bet-upon team: 
    pred_team = np.argmax(pred_probs)
        
    # extract associated `pred_prob``:
    pred_prob = pred_probs[pred_team]

    # REWARD: compute reward (i.e., winnings/losings) depending on 
    # `pred_prob` and whether `pred_team` == `curr_lbl`:   
    reward = 0
    pred_multiplier = (pred_prob - 0.5) / 0.5 if pred_prob >= 0.5 else -1 * (pred_prob - 0.5) / 0.5
    
    if pred_team == curr_lbl: 
        winnings = final_winner_odds * pred_multiplier if final_winner_odds >= 0 else 100 * pred_multiplier
        reward   = winnings
        balance += winnings  
    else: 
        losings  = 100 * pred_multiplier if final_winner_odds >= 0 else abs(final_winner_odds) * pred_multiplier
        reward   = -1 * losings
        balance -= winnings
    
    preds.append(pred_prob)
    picks.append(pred_team)

In [265]:
# convert to 'home'/'away':
picks = ['away' if pick == 1. else 'home' for pick in picks]

In [270]:
# re-map probs. 
for idx, (pred, pick) in enumerate(zip(preds, picks)): 
    if pick == 'home': 
        preds[idx] = (preds[idx] - 1) / -2

In [272]:
test = pd.read_csv('../data/test_data.csv')

# add `probs` column to dataframe:
test['preds'] = preds

# add the picks column to the dataframe:
test['strategy_picks'] = picks

# data preview:
test.head(5)

Unnamed: 0,home_avg_odds_hr_0,home_avg_odds_hr_1,home_avg_odds_hr_2,home_avg_odds_hr_3,home_avg_odds_hr_4,home_avg_odds_hr_5,home_avg_odds_hr_6,home_avg_odds_hr_7,home_avg_odds_hr_8,home_avg_odds_hr_9,...,away_avg_odds_hr_68,away_avg_odds_hr_69,away_avg_odds_hr_70,away_avg_odds_hr_71,winner,favorite,has_favorite_for_winner,closely_contested,preds,strategy_picks
0,110.0,-110.0,100.0,110.0,110.0,110.0,110.0,110.0,110.0,110.0,...,190.0,190.0,190.0,190.0,away,home,False,True,0.125684,home
1,-200.0,-200.0,-200.0,-200.0,-190.0,-160.0,-150.0,-170.0,-200.0,-200.0,...,280.0,270.0,270.0,280.0,home,home,True,False,0.014892,home
2,160.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,140.0,...,150.0,150.0,140.0,140.0,away,away,False,True,0.012421,home
3,-190.0,-190.0,-180.0,-180.0,-170.0,-170.0,-170.0,-170.0,-180.0,-190.0,...,420.0,420.0,420.0,400.0,home,home,True,False,0.015121,home
4,-140.0,-140.0,-140.0,-140.0,-140.0,-140.0,-150.0,-140.0,-140.0,-140.0,...,400.0,400.0,400.0,400.0,home,home,True,False,0.014643,home


In [273]:
# drop any unnecessary columns: 
test = test[['home_avg_odds_hr_71', 'away_avg_odds_hr_71', 'winner', 'favorite', 'has_favorite_for_winner', 'closely_contested', 'strategy_picks', 'preds']]

# create a winner_avg_odds column: 
test['winner_avg_odds_hr_71'] = np.where(
    test['winner'] == 'home', test['home_avg_odds_hr_71'], test['away_avg_odds_hr_71']
)

# create 'relative_underdog' column: 
test['relative_underdog'] = np.where(
    test['favorite'] == 'home', 'away', 'home'
)

In [274]:
N_PROFITABLE_BETS = 0
N_TOTAL_BETS      = 0

NET_VALUE_OF_BETS_WON  = []
NET_VALUE_OF_BETS_LOST = []

UNDERDOG_BETS_HITS  = []
N_UNDERDOG_BETS_TOTAL = 0

CLEAR_WINNER_BETS_HITS = []
N_CLEAR_WINNER_BETS_TOTAL = 0

CLOSELY_CONTESTED_BETS_HITS = []
N_CLOSELY_CONTESTED_BETS_TOTAL = 0

balance = 1000

# iterate through dataframe: 
for row in test.itertuples(index=False):
    
    # if there is balance remaining: 
    if balance >= 0:
    
        # extract elements: 
        final_home_odds, final_away_odds, winner, favorite, has_favorite_for_winner, \
            closely_contested, strategy_pick, pred_prob, final_winner_odds, relative_underdog = row
            
         # update counts:
        if has_favorite_for_winner: N_CLEAR_WINNER_BETS_TOTAL += 1
        if closely_contested: N_CLOSELY_CONTESTED_BETS_TOTAL += 1
        
        # compute `pred` multiplier:
        # if betting on away (pred = [0.5, 1.0]), mulitplier = (pred - 0.5) / 0.5
        # if betting on home (pred = [0.0, 0.5)), multiplier = -1 * (pred - 0.5) / 0.5
        if pred_prob >= 0.5:
            pred_multiplier = (pred_prob - 0.5) / 0.5
        else: 
            pred_multiplier = -1 * (pred_prob - 0.5) / 0.5
        
        # if strategy bet on underdog: 
        if strategy_pick == relative_underdog: 
            N_UNDERDOG_BETS_TOTAL += 1
        
        # if pick was correct: 
        if winner == strategy_pick:
            N_PROFITABLE_BETS += 1
            
            if final_winner_odds >= 0: 
                NET_VALUE_OF_BETS_WON.append(final_winner_odds * pred_multiplier)
            if final_winner_odds < 0:
                NET_VALUE_OF_BETS_WON.append(100 * pred_multiplier)
            
            # if strategy correctly bet on underdog:
            if strategy_pick == relative_underdog:
                if final_winner_odds >= 0: 
                    UNDERDOG_BETS_HITS.append(final_winner_odds * pred_multiplier)
                if final_winner_odds < 0:
                    UNDERDOG_BETS_HITS.append(100 * pred_multiplier)
                    
            # if the strategy correctly bet in a 'clear winner' match:
            if has_favorite_for_winner:
                if final_winner_odds >= 0: 
                    CLEAR_WINNER_BETS_HITS.append(final_winner_odds * pred_multiplier)
                if final_winner_odds < 0:
                    CLEAR_WINNER_BETS_HITS.append(100 * pred_multiplier)
                    
            # if the strategy correctly bet in a 'closely contested' match:
            if closely_contested: 
                if final_winner_odds >= 0: 
                    CLOSELY_CONTESTED_BETS_HITS.append(final_winner_odds * pred_multiplier)
                if final_winner_odds < 0:
                    CLOSELY_CONTESTED_BETS_HITS.append(100 * pred_multiplier) 
                    
            # update balance: 
            if final_winner_odds >= 0: 
                balance += final_winner_odds * pred_multiplier
            if final_winner_odds < 0:
                balance += 100  * pred_multiplier
                                
        # if the pick was incorrect:
        elif winner != strategy_pick:
            if final_winner_odds >= 0: 
                NET_VALUE_OF_BETS_LOST.append(100 * pred_multiplier)
            if final_winner_odds < 0:
                NET_VALUE_OF_BETS_LOST.append(abs(final_winner_odds) * pred_multiplier)
                
            # update balance: 
            if final_winner_odds >= 0: 
                balance -= 100 * pred_multiplier
            if final_winner_odds < 0:
                balance -= abs(final_winner_odds) * pred_multiplier
                
        # update total bet count:
        N_TOTAL_BETS += 1

In [275]:
profitable_bet_rate       = N_PROFITABLE_BETS / N_TOTAL_BETS
overall_profit            = sum(NET_VALUE_OF_BETS_WON) - sum(NET_VALUE_OF_BETS_LOST)
largest_amt_won           = max(NET_VALUE_OF_BETS_WON)
smallest_amt_won          = min(NET_VALUE_OF_BETS_WON)
avg_amt_won               = sum(NET_VALUE_OF_BETS_WON) / len(NET_VALUE_OF_BETS_WON)
avg_amt_loss              = sum(NET_VALUE_OF_BETS_LOST) / len(NET_VALUE_OF_BETS_LOST)

underdog_bet_hr           = len(UNDERDOG_BETS_HITS) / N_UNDERDOG_BETS_TOTAL  if (N_UNDERDOG_BETS_TOTAL > 0) else 'no underdog bets made.'
avg_underdog_hit_winnings = sum(UNDERDOG_BETS_HITS) / len(UNDERDOG_BETS_HITS) if len(UNDERDOG_BETS_HITS) > 0 else 'no winning underdog bets made.'

clearwinner_bet_hr           = len(CLEAR_WINNER_BETS_HITS) / N_CLEAR_WINNER_BETS_TOTAL  if (N_CLEAR_WINNER_BETS_TOTAL > 0) else 'no clear favorite bets made.'
avg_clearwinner_hit_winnings = sum(CLEAR_WINNER_BETS_HITS) / len(CLEAR_WINNER_BETS_HITS) if len(CLEAR_WINNER_BETS_HITS) > 0 else 'no winning clear favorite bets made.'

closelycontested_bet_hr           = len(CLOSELY_CONTESTED_BETS_HITS) / N_CLOSELY_CONTESTED_BETS_TOTAL  if (N_CLOSELY_CONTESTED_BETS_TOTAL > 0) else 'no closely contested bets made.'
avg_closelycontested_hit_winnings = sum(CLOSELY_CONTESTED_BETS_HITS) / len(CLOSELY_CONTESTED_BETS_HITS) if len(CLOSELY_CONTESTED_BETS_HITS) > 0 else 'no winning closely contested bets made.'


metrics = {
    'profitable_bet_rate' : f'{profitable_bet_rate:.2f} ({N_PROFITABLE_BETS} / {N_TOTAL_BETS})',
    'overall_profit' : overall_profit,
    'largest_amt_won' : largest_amt_won,
    'smallest_amt_won' : smallest_amt_won,
    'avg_amt_won' : avg_amt_won,
    'avg_amt_loss' : avg_amt_loss,
    'underdog_bet_hr' : underdog_bet_hr, 
    'avg_underdog_hit_winnings' : avg_underdog_hit_winnings, 
    'clearwinner_bet_hr' : clearwinner_bet_hr,
    'avg_clearwinner_hit_winnings' : avg_clearwinner_hit_winnings,
    'closelycontested_bet_hr' : closelycontested_bet_hr,
    'avg_closelycontested_hit_winnings' : avg_closelycontested_hit_winnings
}

# report metrics
for metric in metrics.keys():    
    if type(metrics[metric]) is str: 
        print(f'{metric} :: {metrics[metric]}')
    else: 
        print(f'{metric} :: {metrics[metric]:.2f}')


profitable_bet_rate :: 0.59 (586 / 1000)
overall_profit :: 24592.08
largest_amt_won :: 660.70
smallest_amt_won :: 95.16
avg_amt_won :: 126.91
avg_amt_loss :: 120.24
underdog_bet_hr :: 0.34
avg_underdog_hit_winnings :: 230.27
clearwinner_bet_hr :: 0.68
avg_clearwinner_hit_winnings :: 119.55
closelycontested_bet_hr :: 0.51
avg_closelycontested_hit_winnings :: 134.42
