# NFL GAME PREDICTOR

https://nflgamedata.com/schedule.php?season=2021&week=1

https://rbsdm.com/stats/box_scores/?_inputs_&type=%22reg%22&away=%22DAL%22&home=%22TB%22&year=%222021%22

In [1]:
import pandas as pd
import numpy as np
import requests
import bs4
from bs4 import BeautifulSoup
import time

# models
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [2]:
# GLOBAL CONSTANTS
ALL_TEAMS = ["BUF", "NE", "MIA", "NYJ", "BAL", "CIN", "CLE", "PIT", "TEN",
            "HOU", "IND", "JAX", "LAC", "DEN", "LV", "KC", "DAL", "WAS",
            "PHI", "NYG", "GB", "CHI", "MIN", "DET", "TB", "CAR", "NO",
            "ATL", "ARI", "LA", "SEA", "SF"]

CURRENT_WEEK = 5
SPREADS = {}
SPREADS[5] = [1.5, -2.5, -3, 3, 8.5, 4.5, -10, 1.5, -9.5, 2.5, -2.5, -5.5, -5.5, -7, -3, -7]
SPREADS[6] = [6.5, 3.5, -3, 1, 4.5, 3.5, -9.5, 10.5, 7, -2.5, -3.5, 4, -4.5, 5.5]
SPREADS[7] = [-3.5, -6, -9.5, 2.5, -7.5, 3, 5.5, -15, -3, -18.5, -12.5, -4, 4.5]

week = CURRENT_WEEK
GAMES_LIST = []
WEEK_6_GAMES = []
TEAMS = {}
GAMES_DF = pd.DataFrame()

In [3]:
class Team:
    def __init__(self, name, wins, losses, diff, oRank, dRank):
        self.name = name
        self.wins = wins
        self.losses = losses
        self.diff = diff
        self.oRank = oRank
        self.dRank = dRank
        
        self.games = {} # indexed by week
        
    def addGame(self, week, game):
        self.games[week] = game
        
    def getRanking(self):
        return self.ranking
    
    def __str__(self):
        return "{}, ELO: {} | W-L: {}-{} | PD: {} | OR: {} | DR: {}".format(self.name, self.elo, self.wins, self.losses, self.diff, self.oRank, self.dRank)
        

In [4]:
class Game:
    def __init__(self, week, away, awayScore, home, homeScore, awayDR, awayOR, awayPD, homeDR, homeOR, homePD, result):
        self.week = week
        self.away = away
        self.awayScore = awayScore
        self.home = home
        self.homeScore = homeScore
        self.awayDR = awayDR
        self.awayOR = awayOR
        self.awayPD = awayPD
        self.homeDR = homeDR
        self.homeOR = homeOR
        self.homePD = homePD
        
        self.result = result
        
    def __str__(self):
        return "AWAY: {}: {} | HOME: {}: {} | RESULT IS HOME BY: {}".format(self.away, self.awayScore, self.home, self.homeScore, self.result)

In [5]:
# def pullMatchups(week):
#     time.sleep(1) # FOR SAKE OF NFLGAMEDATA
#     url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(week)
#     table = pd.read_html(url)
#     matchups = table[2][1]
#     matchups = " ".join(matchups).split(" ")

#     this_week = []
#     for i in range(len(matchups)):
#         if matchups[i] in ALL_TEAMS:
#             this_week.append(matchups[i])

#     games = []
#     for i in range(0, len(this_week), 2):
#         games.append((this_week[i], this_week[i+1]))

#     return games

# # m = pullMatchups(week = 2)

# # for matchup in m:
# #     url = "https://rbsdm.com/stats/box_scores/?_inputs_&type=%22reg%22&away=%22{}%22&home=%22{}%22&year=%222021%22"
# #     url = url.format(matchup[0], matchup[1])

# #     r = requests.get(url)
# #     bs = BeautifulSoup(r.content)
# #     bs 

# # TODO, pull stats from rbsdm

In [6]:
def populateTeams():
    url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(CURRENT_WEEK)
    table = pd.read_html(url)
    summary = table[2][0]
    summary = " ".join(summary)
    summary = summary.split("  ")

    for i in range(10, len(summary)):
        if i % 6 == 4:
            pass # rank seems useless, i'll use w/l and pd
        elif i % 6 == 5:
            name = summary[i]
        elif i % 6 == 0:
            wins = int(summary[i].split("-")[0])
            losses = int(summary[i].split("-")[1])
        elif i % 6 == 1:
            diff = int(summary[i])
        elif i % 6 == 2:
            oRank = int(summary[i])
        else: # i % 6 == 3
            dRank = int(summary[i])
            TEAMS[name] = Team(name, wins, losses, diff, oRank, dRank)

In [7]:
def populateGames(week):
#     time.sleep(1) # FOR SAKE OF NFLGAMEDATA
    url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(week)
    table = pd.read_html(url)
    game_data = table[2][1]
    game_data = " ".join(game_data)
    game_data = game_data.split("  ")

    if week == 1: # corrects inconsistency in nflgamedata.com
        game_data.insert(14, "if it works it works")

    awayNext = True
    for i in range(len(game_data)):
        if "-- BYE --" in str(game_data[i]):
            break
        if game_data[i] in ALL_TEAMS:
            if "-- BYE --" in str(game_data[i + 1]):
                break
            if awayNext:
                away = game_data[i]
                try:
                    awayScore = int(game_data[i+1])
                except ValueError:
                    awayScore = 0
                awayNext = False
            else:
                home = game_data[i]
                try:
                    homeScore = int(game_data[i-1])
                except ValueError:
                    homeScore = 0
                awayDR = TEAMS[away].dRank
                awayOR = TEAMS[away].oRank
                awayPD = TEAMS[away].diff
                homeDR = TEAMS[home].dRank
                homeOR = TEAMS[home].oRank
                homePD = TEAMS[home].diff
                GAMES_LIST.append(Game(week, away, awayScore, home, homeScore, awayDR, awayOR, awayPD, homeDR, homeOR, homePD, awayScore - homeScore))
                awayNext = True

In [8]:
def getDataUntilWeek(i):
    populateTeams()
    for i in range(1, i + 1):
        populateGames(i)
        
    GAMES_DF = pd.DataFrame()

    GAMES_DF['Week'] = [game.week for game in GAMES_LIST]
    GAMES_DF['Away'] = [game.away for game in GAMES_LIST]
    GAMES_DF['Away Score'] = [game.awayScore for game in GAMES_LIST]
    GAMES_DF['Home'] = [game.home for game in GAMES_LIST]
    GAMES_DF['Home Score'] = [game.homeScore for game in GAMES_LIST]
    GAMES_DF['Away DR'] = [game.awayDR for game in GAMES_LIST]
    GAMES_DF['Away OR'] = [game.awayOR for game in GAMES_LIST]
    GAMES_DF['Away PD'] = [game.awayPD for game in GAMES_LIST]
    GAMES_DF['Home DR'] = [game.homeDR for game in GAMES_LIST]
    GAMES_DF['Home OR'] = [game.homeOR for game in GAMES_LIST]
    GAMES_DF['Home PD'] = [game.homePD for game in GAMES_LIST]
    GAMES_DF['Result'] = [game.result for game in GAMES_LIST]

    # populating Team.games for team in TEAMS
    for index, row in GAMES_DF.iterrows():
        TEAMS[row["Away"]].addGame(row["Week"], GAMES_LIST[index])
        TEAMS[row["Home"]].addGame(row["Week"], GAMES_LIST[index])
        
    return GAMES_DF

In [9]:
def predictWeek(week):
    GAMES_DF = getDataUntilWeek(week)
    
    X = GAMES_DF[["Week", "Away OR", "Away PD", "Home DR", "Home OR", "Home PD"]]
    y = GAMES_DF[["Week", "Result", "Away Score", "Home Score"]]
    X_train = X[X["Week"] < week].drop("Week", axis=1)
    X_test = X[X["Week"] == week].drop("Week", axis=1)

    y_train_result = y[y["Week"] < week]["Result"]
    y_test_result = y[y["Week"] == week]["Result"]

    y_train_away = y[y["Week"] < week]["Away Score"]
    y_test_away = y[y["Week"] == week]["Away Score"]

    y_train_home = y[y["Week"] < week]["Home Score"]
    y_test_home = y[y["Week"] == week]["Home Score"]
    
    mdl_result = LinearRegression().fit(X_train, y_train_result)
    mdl_away = LinearRegression().fit(X_train, y_train_away)
    mdl_home = LinearRegression().fit(X_train, y_train_home)

    preds_result = mdl_result.predict(X_test)
    preds_away = mdl_away.predict(X_test)
    preds_home = mdl_home.predict(X_test)

    analysis = pd.DataFrame()
    analysis["away"] = GAMES_DF[X_train.shape[0]:]["Away"]
    analysis["true away score"] = GAMES_DF[X_train.shape[0]:]["Away Score"]
    analysis["pred away"] = [round(float(pred), 2) for pred in list(preds_away)]
    analysis["pred home"] = [round(float(pred), 2) for pred in list(preds_home)]
    analysis["true home score"] = GAMES_DF[X_train.shape[0]:]["Home Score"]
    analysis["home"] = GAMES_DF[X_train.shape[0]:]["Home"]
    analysis["true result"] = GAMES_DF[X_train.shape[0]:]["Result"]
    analysis["pred result"] = [round(float(pred), 2) for pred in list(preds_result)]
    analysis["spread pred"] = SPREADS[week]
    analysis["bettable?"] = analysis.apply(lambda row: True if (row['spread pred'] > 0 and row['pred result'] > row['spread pred']) or (row['spread pred'] <= 0 and row['pred result'] < row['spread pred']) else False, axis = 1)
    return analysis


# CURRENT WEEK (7):

In [10]:
WEEK_7_BETS = predictWeek(7)
# WEEK_7_BETS = WEEK_7_BETS[WEEK_7_BETS["bettable?"]]
WEEK_7_BETS["bet confidence"] = abs(WEEK_7_BETS["pred result"] - WEEK_7_BETS["spread pred"])
# WEEK_7_BETS["bet recommended?"] = WEEK_7_BETS["bet confidence"] > 1
# WEEK_7_BETS = WEEK_7_BETS[WEEK_7_BETS["bet recommended?"]]
# WEEK_7_BETS["won the bet?"] = WEEK_7_BETS.apply(lambda row: True if (row['spread pred'] > 0 and row['true result'] > row['spread pred']) or (row['spread pred'] <= 0 and row['true result'] < row['spread pred']) else False, axis = 1)
# WEEK_7_BETS.to_csv("WEEK7BETS.csv")
WEEK_7_BETS

Unnamed: 0,away,true away score,pred away,pred home,true home score,home,true result,pred result,spread pred,bettable?,bet confidence
94,DEN,0,27.78,25.1,0,CLE,0,2.68,-3.5,False,6.18
95,CIN,0,24.26,25.56,0,BAL,0,-1.3,-6.0,False,4.7
96,WAS,0,20.44,31.47,0,GB,0,-11.02,-9.5,True,1.52
97,ATL,0,23.31,19.32,0,MIA,0,3.99,2.5,True,1.49
98,NYJ,0,17.58,22.96,0,NE,0,-5.38,-7.5,False,2.12
99,CAR,0,26.95,16.23,0,NYG,0,10.72,3.0,True,7.72
100,KC,0,28.88,28.71,0,TEN,0,0.17,5.5,False,5.33
101,DET,0,15.85,34.34,-16,LA,16,-18.49,-15.0,True,3.49
102,PHI,0,20.38,23.46,-3,LV,3,-3.08,-3.0,True,0.08
103,HOU,0,12.88,37.64,0,ARI,0,-24.76,-18.5,True,6.26


In [151]:
# WEEK 7 BETS: PATRIOTS, CHIEFS, (PANTHERS & COLTS), (CARDINALS & RAMS)

# PAST WEEKS:

In [150]:
WEEK_6_BETS = predictWeek(6)
WEEK_6_BETS = WEEK_6_BETS[WEEK_6_BETS["bettable?"]]
WEEK_6_BETS["bet confidence"] = abs(WEEK_6_BETS["pred result"] - WEEK_6_BETS["spread pred"])
WEEK_6_BETS["bet recommended?"] = WEEK_6_BETS["bet confidence"] > 1
# WEEK_6_BETS = WEEK_6_BETS[WEEK_6_BETS["bet recommended?"]]
WEEK_6_BETS["won the bet?"] = WEEK_6_BETS.apply(lambda row: True if (row['spread pred'] > 0 and row['true result'] > row['spread pred']) or (row['spread pred'] <= 0 and row['true result'] < row['spread pred']) else False, axis = 1)
###### WEEK_6_BETS.to_csv("WEEK6BETS.csv") # DO NOT UNCOMMENT!
WEEK_6_BETS

ValueError: Length of values does not match length of index

In [None]:
WEEK_6_BETS = WEEK_6_BETS[WEEK_6_BETS["bet recommended?"]]
WEEK_6_NUM_BETS = len(WEEK_6_BETS)
WEEK_6_ACCURACY = np.mean(WEEK_6_BETS["won the bet?"])
print("numBets:", WEEK_6_NUM_BETS, "accuracy:", WEEK_6_ACCURACY)  # IT WORKS!
WEEK_6_BETS.groupby("won the bet?").mean()['bet confidence']

In [178]:
# TODO
# 1) fix analysis columns: bettable, bet confidence, etc.
# 2) fix 
# 3) implement rbsdm to make a more complex model, consider power rankings and defensive stats