# NFL GAME PREDICTOR

https://nflgamedata.com/schedule.php?season=2021&week=1

https://rbsdm.com/stats/box_scores/?_inputs_&type=%22reg%22&away=%22DAL%22&home=%22TB%22&year=%222021%22

In [146]:
import pandas as pd
import numpy as np
import requests
import bs4
from bs4 import BeautifulSoup
import time

# models
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [147]:
# GLOBAL CONSTANTS
ALL_TEAMS = ["BUF", "NE", "MIA", "NYJ", "BAL", "CIN", "CLE", "PIT", "TEN",
            "HOU", "IND", "JAX", "LAC", "DEN", "LV", "KC", "DAL", "WAS",
            "PHI", "NYG", "GB", "CHI", "MIN", "DET", "TB", "CAR", "NO",
            "ATL", "ARI", "LA", "SEA", "SF"]

CURRENT_WEEK = 5
SPREADS = {}
SPREADS[5] = [1.5, -2.5, -3, 3, 8.5, 4.5, -10, 1.5, -9.5, 2.5, -2.5, -5.5, -5.5, -7, -3, -7]
SPREADS[6] = [6.5, 3.5, -3, 1, 4.5, 3.5, -9.5, 10.5, 7, -2.5, -3.5, 4, -4.5, 5.5]

week = CURRENT_WEEK
GAMES_LIST = []
WEEK_6_GAMES = []
TEAMS = {}
GAMES_DF = pd.DataFrame()

In [148]:
class Team:
    def __init__(self, name, wins, losses, diff, oRank, dRank):
        self.name = name
        self.wins = wins
        self.losses = losses
        self.diff = diff
        self.oRank = oRank
        self.dRank = dRank
        
        self.games = {} # indexed by week
        
    def addGame(self, week, game):
        self.games[week] = game
        
    def getRanking(self):
        return self.ranking
    
    def __str__(self):
        return "{}, ELO: {} | W-L: {}-{} | PD: {} | OR: {} | DR: {}".format(self.name, self.elo, self.wins, self.losses, self.diff, self.oRank, self.dRank)
        

In [149]:
class Game:
    def __init__(self, week, away, awayScore, home, homeScore, awayDR, awayOR, awayPD, homeDR, homeOR, homePD, result):
        self.week = week
        self.away = away
        self.awayScore = awayScore
        self.home = home
        self.homeScore = homeScore
        self.awayDR = awayDR
        self.awayOR = awayOR
        self.awayPD = awayPD
        self.homeDR = homeDR
        self.homeOR = homeOR
        self.homePD = homePD
        
        self.result = result
        
    def __str__(self):
        return "AWAY: {}: {} | HOME: {}: {} | RESULT IS HOME BY: {}".format(self.away, self.awayScore, self.home, self.homeScore, self.result)

In [150]:
def pullMatchups(week):
    time.sleep(1) # FOR SAKE OF NFLGAMEDATA
    url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(week)
    table = pd.read_html(url)
    matchups = table[2][1]
    matchups = " ".join(matchups).split(" ")

    this_week = []
    for i in range(len(matchups)):
        if matchups[i] in ALL_TEAMS:
            this_week.append(matchups[i])

    games = []
    for i in range(0, len(this_week), 2):
        games.append((this_week[i], this_week[i+1]))

    return games

# m = pullMatchups(week = 2)

# for matchup in m:
#     url = "https://rbsdm.com/stats/box_scores/?_inputs_&type=%22reg%22&away=%22{}%22&home=%22{}%22&year=%222021%22"
#     url = url.format(matchup[0], matchup[1])

#     r = requests.get(url)
#     bs = BeautifulSoup(r.content)
#     bs 

# TODO, pull stats from rbsdm

In [151]:
def populateTeams():
    url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(CURRENT_WEEK)
    table = pd.read_html(url)
    summary = table[2][0]
    summary = " ".join(summary)
    summary = summary.split("  ")

    for i in range(10, len(summary)):
        if i % 6 == 4:
            pass # rank seems useless, i'll use w/l and pd
        elif i % 6 == 5:
            name = summary[i]
        elif i % 6 == 0:
            wins = int(summary[i].split("-")[0])
            losses = int(summary[i].split("-")[1])
        elif i % 6 == 1:
            diff = int(summary[i])
        elif i % 6 == 2:
            oRank = int(summary[i])
        else: # i % 6 == 3
            dRank = int(summary[i])
            TEAMS[name] = Team(name, wins, losses, diff, oRank, dRank)

In [152]:
def populateGames(week):
#     time.sleep(1) # FOR SAKE OF NFLGAMEDATA
    url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(week)
    table = pd.read_html(url)
    game_data = table[2][1]
    game_data = " ".join(game_data)
    game_data = game_data.split("  ")

    if week == 1: # corrects inconsistency in nflgamedata.com
        game_data.insert(14, "if it works it works") 
        
    if week == 6: # temporary for testing purposes (10/18/21)
        game_data.insert(166, 0)

    awayNext = True
    for i in range(len(game_data)):
        if "-- BYE --" in str(game_data[i]):
            break
        if game_data[i] in ALL_TEAMS:
            if "-- BYE --" in str(game_data[i + 1]):
                break
            if awayNext:
                away = game_data[i]
                awayScore = int(game_data[i+1])
                awayNext = False
            else:
                home = game_data[i]
                homeScore = int(game_data[i-1])
                awayDR = TEAMS[away].dRank
                awayOR = TEAMS[away].oRank
                awayPD = TEAMS[away].diff
                homeDR = TEAMS[home].dRank
                homeOR = TEAMS[home].oRank
                homePD = TEAMS[home].diff
                GAMES_LIST.append(Game(week, away, awayScore, home, homeScore, awayDR, awayOR, awayPD, homeDR, homeOR, homePD, awayScore - homeScore))
                awayNext = True

In [153]:
def getDataUntilWeek(i):
    populateTeams()
    for i in range(1, i + 1):
        populateGames(i)
        
    GAMES_DF = pd.DataFrame()

    GAMES_DF['Week'] = [game.week for game in GAMES_LIST]
    GAMES_DF['Away'] = [game.away for game in GAMES_LIST]
    GAMES_DF['Away Score'] = [game.awayScore for game in GAMES_LIST]
    GAMES_DF['Home'] = [game.home for game in GAMES_LIST]
    GAMES_DF['Home Score'] = [game.homeScore for game in GAMES_LIST]
    GAMES_DF['Away DR'] = [game.awayDR for game in GAMES_LIST]
    GAMES_DF['Away OR'] = [game.awayOR for game in GAMES_LIST]
    GAMES_DF['Away PD'] = [game.awayPD for game in GAMES_LIST]
    GAMES_DF['Home DR'] = [game.homeDR for game in GAMES_LIST]
    GAMES_DF['Home OR'] = [game.homeOR for game in GAMES_LIST]
    GAMES_DF['Home PD'] = [game.homePD for game in GAMES_LIST]
    GAMES_DF['Result'] = [game.result for game in GAMES_LIST]

    # populating Team.games for team in TEAMS
    for index, row in GAMES_DF.iterrows():
        TEAMS[row["Away"]].addGame(row["Week"], GAMES_LIST[index])
        TEAMS[row["Home"]].addGame(row["Week"], GAMES_LIST[index])
        
    return GAMES_DF

In [154]:
def predictWeek(week):
    GAMES_DF = getDataUntilWeek(week)
    
    X = GAMES_DF[["Week", "Away OR", "Away PD", "Home DR", "Home OR", "Home PD"]]
    y = GAMES_DF[["Week", "Result", "Away Score", "Home Score"]]
    X_train = X[X["Week"] < week].drop("Week", axis=1)
    X_test = X[X["Week"] == week].drop("Week", axis=1)

    y_train_result = y[y["Week"] < week]["Result"]
    y_test_result = y[y["Week"] == week]["Result"]

    y_train_away = y[y["Week"] < week]["Away Score"]
    y_test_away = y[y["Week"] == week]["Away Score"]

    y_train_home = y[y["Week"] < week]["Home Score"]
    y_test_home = y[y["Week"] == week]["Home Score"]
    
    mdl_result = LinearRegression().fit(X_train, y_train_result)
    mdl_away = LinearRegression().fit(X_train, y_train_away)
    mdl_home = LinearRegression().fit(X_train, y_train_home)

    preds_result = mdl_result.predict(X_test)
    preds_away = mdl_away.predict(X_test)
    preds_home = mdl_home.predict(X_test)

    analysis = pd.DataFrame()
    analysis["away"] = GAMES_DF[X_train.shape[0]:]["Away"]
    analysis["true away score"] = GAMES_DF[X_train.shape[0]:]["Away Score"]
    analysis["pred away"] = [round(float(pred), 2) for pred in list(preds_away)]
    analysis["pred home"] = [round(float(pred), 2) for pred in list(preds_home)]
    analysis["true home score"] = GAMES_DF[X_train.shape[0]:]["Home Score"]
    analysis["home"] = GAMES_DF[X_train.shape[0]:]["Home"]
    analysis["true result"] = GAMES_DF[X_train.shape[0]:]["Result"]
    analysis["pred result"] = [round(float(pred), 2) for pred in list(preds_result)]
    analysis["spread pred"] = SPREADS[week]
    analysis["bettable?"] = analysis.apply(lambda row: True if (row['spread pred'] > 0 and row['pred result'] > row['spread pred']) or (row['spread pred'] <= 0 and row['pred result'] < row['spread pred']) else False, axis = 1)
    return analysis


In [155]:
WEEK_5_BETS = predictWeek(5)
WEEK_5_BETS = WEEK_5_BETS[WEEK_5_BETS["bettable?"]]
WEEK_5_BETS["bet confidence"] = abs(WEEK_5_BETS["pred result"] - WEEK_5_BETS["spread pred"]) / abs(WEEK_5_BETS["spread pred"])
WEEK_5_BETS["bet recommended?"] = WEEK_5_BETS["bet confidence"] > 1
# WEEK_5_BETS = WEEK_5_BETS[WEEK_5_BETS["bet recommended?"]]
WEEK_5_BETS["won the bet?"] = WEEK_5_BETS.apply(lambda row: True if (row['spread pred'] > 0 and row['true result'] > row['spread pred']) or (row['spread pred'] <= 0 and row['true result'] < row['spread pred']) else False, axis = 1)
WEEK_5_BETS.to_csv("WEEK5BETS.csv")
WEEK_5_BETS


Unnamed: 0,away,true away score,pred away,pred home,true home score,home,true result,pred result,spread pred,bettable?,bet confidence,bet recommended?,won the bet?
64,LA,26,29.81,21.87,17,SEA,9,7.94,1.5,True,4.293333,True,True
66,PHI,21,17.84,23.28,18,CAR,3,-5.44,-3.0,True,0.813333,False,False
68,NE,25,20.6,11.48,22,HOU,3,9.12,8.5,True,0.072941,False,False
69,TEN,37,26.85,17.73,19,JAX,18,9.12,4.5,True,1.026667,True,True
70,DET,17,14.44,27.39,19,MIN,-2,-12.94,-10.0,True,0.294,False,False
71,DEN,19,23.67,19.44,27,PIT,-8,4.23,1.5,True,1.82,True,False
72,MIA,17,18.5,37.72,45,TB,-28,-19.22,-9.5,True,1.023158,True,True
73,NO,33,27.64,14.83,22,WAS,11,12.81,2.5,True,4.124,True,True
75,CHI,20,16.92,22.48,9,LV,11,-5.56,-5.5,True,0.010909,False,False
76,SF,10,21.74,34.29,17,ARI,-7,-12.55,-5.5,True,1.281818,True,True


In [156]:
WEEK_5_BETS = WEEK_5_BETS[WEEK_5_BETS["bet recommended?"]]
WEEK_5_NUM_BETS = len(WEEK_5_BETS)
WEEK_5_ACCURACY = np.mean(WEEK_5_BETS["won the bet?"])
print("numBets:", WEEK_5_NUM_BETS, "accuracy:", WEEK_5_ACCURACY)
WEEK_5_BETS.groupby("won the bet?").mean()['bet confidence']

numBets: 7 accuracy: 0.8571428571428571


won the bet?
False    1.820000
True     2.231972
Name: bet confidence, dtype: float64

In [157]:
WEEK_6_BETS = predictWeek(6)
WEEK_6_BETS = WEEK_6_BETS[WEEK_6_BETS["bettable?"]]
WEEK_6_BETS["bet confidence"] = abs(WEEK_6_BETS["pred result"] - WEEK_6_BETS["spread pred"]) / abs(WEEK_6_BETS["spread pred"])
WEEK_6_BETS["bet recommended?"] = WEEK_6_BETS["bet confidence"] > 1
# WEEK_6_BETS = WEEK_6_BETS[WEEK_6_BETS["bet recommended?"]]
WEEK_6_BETS["won the bet?"] = WEEK_6_BETS.apply(lambda row: True if (row['spread pred'] > 0 and row['true result'] > row['spread pred']) or (row['spread pred'] <= 0 and row['true result'] < row['spread pred']) else False, axis = 1)
WEEK_6_BETS.to_csv("WEEK6BETS.csv")
WEEK_6_BETS


Unnamed: 0,away,true away score,pred away,pred home,true home score,home,true result,pred result,spread pred,bettable?,bet confidence,bet recommended?,won the bet?
160,TB,28,29.53,20.03,22,PHI,6,9.51,6.5,True,0.463077,False,False
162,LAC,6,24.52,31.04,34,BAL,-28,-6.52,-3.0,True,1.173333,True,True
165,CIN,34,27.61,12.6,11,DET,23,15.01,3.5,True,3.288571,True,True
166,HOU,3,19.07,33.32,31,IND,-28,-14.25,-9.5,True,0.5,False,True
167,LA,38,31.65,17.81,11,NYG,27,13.84,10.5,True,0.318095,False,True
168,KC,31,31.46,23.2,13,WAS,18,8.26,7.0,True,0.18,False,True
170,LV,34,20.29,24.87,24,DEN,10,-4.58,-3.5,True,0.308571,False,False
171,DAL,35,26.72,14.98,29,NE,6,11.74,4.0,True,1.935,True,True
173,BUF,-6,30.76,13.42,0,TEN,-6,17.34,5.5,True,2.152727,True,False


In [158]:
WEEK_6_BETS = WEEK_6_BETS[WEEK_6_BETS["bet recommended?"]].drop(173) # temporary, bills haven't played yet
WEEK_6_NUM_BETS = len(WEEK_6_BETS)
WEEK_6_ACCURACY = np.mean(WEEK_6_BETS["won the bet?"])
print("numBets:", WEEK_6_NUM_BETS, "accuracy:", WEEK_6_ACCURACY)  # IT WORKS!
WEEK_6_BETS.groupby("won the bet?").mean()['bet confidence']

numBets: 3 accuracy: 1.0


won the bet?
True    2.132302
Name: bet confidence, dtype: float64

In [159]:
TOTAL_ACCURACY = (WEEK_5_ACCURACY * WEEK_5_NUM_BETS + WEEK_6_ACCURACY * WEEK_6_NUM_BETS) / (WEEK_5_NUM_BETS + WEEK_6_NUM_BETS) * 100
TOTAL_ACCURACY # not too shabby

90.0

In [160]:
# TODO
# 1) organize code!
# 2) implement rbsdm to make a more complex model, consider power rankings and defensive stats