In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

# Importing Dataset

In [None]:
df = pd.read_excel("nhl odds 2021-22.xlsx")

In [None]:
#https://towardsdatascience.com/can-we-beat-the-bookmaker-with-machine-learning-45e3b30fc921
teams = list(set(df['Home'].values)) # creating a list containing every team
n_teams = len(teams) # number of teams
teamToIdx = {t: i for i, t in enumerate(teams)} # our team vocabulary

# assigning the games' home teams their corresponding team id
homeId = [teamToIdx[id] for id in list(df['Home'].values)]
df['Home_Id'] = homeId #creating a new column for the home team id

# assigning the games' Visitors teams their corresponding team id
visitorsId = [teamToIdx[id] for id in list(df['Visitors'].values)]
df['Visitors_Id'] = visitorsId # creating a new row for the away team id

In [None]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1238 entries, 0 to 1237
Data columns (total 17 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   Date                    1238 non-null   int64  
 1   Home                    1238 non-null   object 
 2   Home Moneyline          1238 non-null   int64  
 3   Home Puckline           1238 non-null   float64
 4   Home Puckline Odds      1238 non-null   int64  
 5   Over Under Total        1238 non-null   float64
 6   Over                    1238 non-null   int64  
 7   Under                   1238 non-null   int64  
 8   Home Goals              1238 non-null   int64  
 9   Visitors                1238 non-null   object 
 10  Visitors Moneyline      1238 non-null   int64  
 11  Visitors Puckline       1238 non-null   float64
 12  Visitors Puckline Odds  1238 non-null   int64  
 13  Visitors Goals          1238 non-null   int64  
 14  Winner                  1238 non-null   

## Assigning winners to puckline bet

In [None]:
puckline_winner = []

for i in range(len(df)):
  if (df.loc[i,'Home Puckline'] + df.loc[i,'Home Goals'] ) > df.loc[i,'Visitors Goals']:
    puckline_winner.append(1)
  elif (df.loc[i,'Visitors Puckline'] + df.loc[i,'Visitors Goals'] ) > df.loc[i,'Home Goals']:
    puckline_winner.append(0)
  else:
    puckline_winner.append("error")


In [None]:
df['puckline_winner'] = puckline_winner


## Test/train split

In [None]:
X = df.drop(columns=['Winner','Home','Visitors',"Home Goals", "Visitors Goals","puckline_winner"])
Y = df['Winner']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=False)


# Random Forest Function

In [None]:
rf = RandomForestClassifier(n_estimators = 40)
rf.fit(X_train, y_train)

RandomForestClassifier(n_estimators=40)

In [None]:
prediction = rf.predict(X_test)
accuracy = accuracy_score(y_test, prediction)
print("Model is {:.2%} accurate on test data".format(accuracy))

Model is 64.11% accurate on test data


## Determining Total Profit on Test case

In [None]:
def decimal_odds(amount):
  if amount > 0:  #If postive
    decimal = amount/100 + 1
  else:           #If negative
    decimal = 1 - 100/amount 
  return decimal

In [None]:
units = 100


home_odds = np.array(X_test.loc[:,'Home Moneyline'])
visitors_odds = np.array(X_test.loc[:,'Visitors Moneyline'])
game_outcome = np.array(y_test)

total_bets_placed = 0

possible_winnings = np.zeros(len(home_odds))

for i in range(len(possible_winnings)):
  total_bets_placed = total_bets_placed + units

  if  (game_outcome[i]+prediction[i]) == 2: #Home team wins and prediction correct
    possible_winnings[i] = units * decimal_odds(home_odds[i])
  elif (game_outcome[i]+prediction[i]) == 0: #Visitors team wins and prediction correct
    possible_winnings[i] = units * decimal_odds(visitors_odds[i])
  else:
    possible_winnings[i] = 0


winnings = sum(possible_winnings) - total_bets_placed

print("If we were to place ${:.2f} on every test bet and win {:.2%} of the time, we would win ${:.2f}. This means that ${:.2f} would have been staked".format(units,accuracy,winnings, total_bets_placed))  

If we were to place $100.00 on every test bet and win 64.11% of the time, we would win $1434.77. This means that $24800.00 would have been staked


### Determining if changing the units bet on underdogs/favourites has a benefical outcome on profits

In [None]:
def units_to_bet(odds,units):
  if odds > 150:  # heavy underdog
    bet = units*1
  elif odds < - 150: #heavy favourite
    bet = units*1
  else:
    bet = units
  return bet

In [None]:
units = 10


home_odds = np.array(X_test.loc[:,'Home Moneyline'])
visitors_odds = np.array(X_test.loc[:,'Visitors Moneyline'])
game_outcome = np.array(y_test)

total_bets_placed = 0

possible_winnings = np.zeros(len(home_odds))

for i in range(len(possible_winnings)):


  if  (game_outcome[i]+prediction[i]) == 2: #Home team wins and prediction correct
    possible_winnings[i] = units_to_bet(home_odds[i],units) * decimal_odds(home_odds[i])
    total_bets_placed = total_bets_placed + units_to_bet(home_odds[i],units)
  elif (game_outcome[i]+prediction[i]) == 0: #Visitors team wins and prediction correct
    possible_winnings[i] = units_to_bet(visitors_odds[i],units) * decimal_odds(visitors_odds[i])
    total_bets_placed = total_bets_placed + units_to_bet(visitors_odds[i],units)
  elif (game_outcome[i] == 1) and (prediction[i] == 0):
    possible_winnings[i] = 0
    total_bets_placed = total_bets_placed + units_to_bet(visitors_odds[i],units)
  else:
    total_bets_placed = total_bets_placed + units_to_bet(home_odds[i],units)


winnings = sum(possible_winnings) - total_bets_placed

print("If we were to place ${:.2f} on every test bet and win {:.2%} of the time, we would win ${:.2f}".format(units,accuracy,winnings))  

If we were to place $10.00 on every test bet and win 58.06% of the time, we would win $34.38


## Predicting Future Games

In [None]:
today=pd.read_excel('20220424.xlsx')

#https://towardsdatascience.com/can-we-beat-the-bookmaker-with-machine-learning-45e3b30fc921


# assigning the games' home teams their corresponding team id
homeId = [teamToIdx[id] for id in list(today['Home'].values)]
today['Home_Id'] = homeId #creating a new column for the home team id

# assigning the games' Visitors teams their corresponding team id
visitorsId = [teamToIdx[id] for id in list(today['Visitors'].values)]
today['Visitors_Id'] = visitorsId # creating a new row for the away team id

In [None]:
X = today.drop(columns=['Winner','Home','Visitors',"Home Goals", "Visitors Goals"])
prediction_today = rf.predict(X)

In [None]:
today["Prediction"] = prediction_today
pd.DataFrame(today).to_csv("prediction_today.csv")

# Puckline testing

In [None]:
X_puck = df.drop(columns=['Winner','Home','Visitors',"Home Goals", "Visitors Goals","puckline_winner"])
Y_puck = df['puckline_winner']

X_train_puck, X_test_puck, y_train_puck, y_test_puck = train_test_split(X_puck, Y_puck, test_size=0.2, shuffle=False)


rf_puck = RandomForestClassifier(n_estimators = 30)
rf_puck.fit(X_train_puck, y_train_puck)

prediction = rf_puck.predict(X_test_puck)
accuracy = accuracy_score(y_test_puck, prediction)
print("Model is {:.2%} accurate on test data".format(accuracy))

Model is 53.63% accurate on test data


In [None]:
units = 10


home_odds = np.array(X_test.loc[:,'Home Puckline Odds'])
visitors_odds = np.array(X_test.loc[:,'Visitors Puckline Odds'])
game_outcome = np.array(y_test)

total_bets_placed = 0

possible_winnings = np.zeros(len(home_odds))

for i in range(len(possible_winnings)):


  if  (game_outcome[i]+prediction[i]) == 2: #Home team wins and prediction correct
    possible_winnings[i] = units_to_bet(home_odds[i],units) * decimal_odds(home_odds[i])
    total_bets_placed = total_bets_placed + units_to_bet(home_odds[i],units)
  elif (game_outcome[i]+prediction[i]) == 0: #Visitors team wins and prediction correct
    possible_winnings[i] = units_to_bet(visitors_odds[i],units) * decimal_odds(visitors_odds[i])
    total_bets_placed = total_bets_placed + units_to_bet(visitors_odds[i],units)
  elif (game_outcome[i] == 1) and (prediction[i] == 0):
    possible_winnings[i] = 0
    total_bets_placed = total_bets_placed + units_to_bet(visitors_odds[i],units)
  else:
    total_bets_placed = total_bets_placed + units_to_bet(home_odds[i],units)


winnings = sum(possible_winnings) - total_bets_placed

print("If we were to place ${:.2f} on every test bet and win {:.2%} of the time, we would win ${:.2f}, betting ${:.2f}".format(units,accuracy,winnings, total_bets_placed))

If we were to place $10.00 on every test bet and win 53.63% of the time, we would win $-490.13, betting $2480.00
