# NFL GAME PREDICTOR

https://nflgamedata.com/schedule.php?season=2021&week=1

https://rbsdm.com/stats/box_scores/?_inputs_&type=%22reg%22&away=%22DAL%22&home=%22TB%22&year=%222021%22

In [2]:
import pandas as pd
import numpy as np
import requests
import bs4
from bs4 import BeautifulSoup
import time

# models
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [3]:
# GLOBAL CONSTANTS
ALL_TEAMS = ["BUF", "NE", "MIA", "NYJ", "BAL", "CIN", "CLE", "PIT", "TEN",
            "HOU", "IND", "JAX", "LAC", "DEN", "LV", "KC", "DAL", "WAS",
            "PHI", "NYG", "GB", "CHI", "MIN", "DET", "TB", "CAR", "NO",
            "ATL", "ARI", "LA", "SEA", "SF"]

CURRENT_WEEK = 5
WEEK_5_SPREADS = [1.5, -2.5, -3, 3, 8.5, 4.5, -10, 1.5, -9.5, 2.5, -2.5, -5.5, -5.5, -7, -3, -7]
WEEK_6_SPREADS = [6.5, 3.5, -3, 1, 4.5, 3.5, -9.5, 10.5, 7, -2.5, -3.5, 4, -4.5, 5.5]

week = CURRENT_WEEK
GAMES_LIST = []
WEEK_6_GAMES = []
TEAMS = {}
GAMES_DF = pd.DataFrame()

In [4]:
class Team:
    def __init__(self, name, wins, losses, diff, oRank, dRank):
        self.name = name
        self.wins = wins
        self.losses = losses
        self.diff = diff
        self.oRank = oRank
        self.dRank = dRank
        
        self.games = {} # indexed by week
        
    def addGame(self, week, game):
        self.games[week] = game
        
    def getRanking(self):
        return self.ranking
    
    def __str__(self):
        return "{}, ELO: {} | W-L: {}-{} | PD: {} | OR: {} | DR: {}".format(self.name, self.elo, self.wins, self.losses, self.diff, self.oRank, self.dRank)
        

In [5]:
class Game:
    def __init__(self, week, away, awayScore, home, homeScore, awayDR, awayOR, awayPD, homeDR, homeOR, homePD, result):
        self.week = week
        self.away = away
        self.awayScore = awayScore
        self.home = home
        self.homeScore = homeScore
        self.awayDR = awayDR
        self.awayOR = awayOR
        self.awayPD = awayPD
        self.homeDR = homeDR
        self.homeOR = homeOR
        self.homePD = homePD
        
        self.result = result
        
    def __str__(self):
        return "AWAY: {}: {} | HOME: {}: {} | RESULT IS HOME BY: {}".format(self.away, self.awayScore, self.home, self.homeScore, self.result)

In [6]:
def pullMatchups(week):
    time.sleep(1) # FOR SAKE OF NFLGAMEDATA
    url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(week)
    table = pd.read_html(url)
    matchups = table[2][1]
    matchups = " ".join(matchups).split(" ")

    this_week = []
    for i in range(len(matchups)):
        if matchups[i] in ALL_TEAMS:
            this_week.append(matchups[i])

    games = []
    for i in range(0, len(this_week), 2):
        games.append((this_week[i], this_week[i+1]))

    return games

# m = pullMatchups(week = 2)

# for matchup in m:
#     url = "https://rbsdm.com/stats/box_scores/?_inputs_&type=%22reg%22&away=%22{}%22&home=%22{}%22&year=%222021%22"
#     url = url.format(matchup[0], matchup[1])

#     r = requests.get(url)
#     bs = BeautifulSoup(r.content)
#     bs 

# TODO, pull stats from rbsdm

In [7]:
def populateTeams():
    url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(CURRENT_WEEK)
    table = pd.read_html(url)
    summary = table[2][0]
    summary = " ".join(summary)
    summary = summary.split("  ")

    for i in range(10, len(summary)):
        if i % 6 == 4:
            pass # rank seems useless, i'll use w/l and pd
        elif i % 6 == 5:
            name = summary[i]
        elif i % 6 == 0:
            wins = int(summary[i].split("-")[0])
            losses = int(summary[i].split("-")[1])
        elif i % 6 == 1:
            diff = int(summary[i])
        elif i % 6 == 2:
            oRank = int(summary[i])
        else: # i % 6 == 3
            dRank = int(summary[i])
            TEAMS[name] = Team(name, wins, losses, diff, oRank, dRank)

In [8]:
def populateGames(week):
#     time.sleep(1) # FOR SAKE OF NFLGAMEDATA
    url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(week)
    table = pd.read_html(url)
    game_data = table[2][1]
    game_data = " ".join(game_data)
    game_data = game_data.split("  ")

    if week == 1:
        game_data.insert(14, "if it works it works") # corrects inconsistency in nflgamedata.com

    for i in range(7, len(game_data)):
        if i % 12 == 7:
            away = game_data[i]
        elif i % 12 == 8:
            awayScore = int(game_data[i])
        elif i % 12 == 10:
            homeScore = int(game_data[i])
        elif i % 12 == 11:
            home = game_data[i]
            awayDR = TEAMS[away].dRank
            awayOR = TEAMS[away].oRank
            awayPD = TEAMS[away].diff
            homeDR = TEAMS[home].dRank
            homeOR = TEAMS[home].oRank
            homePD = TEAMS[home].diff
            GAMES_LIST.append(Game(week, away, awayScore, home, homeScore, awayDR, awayOR, awayPD, homeDR, homeOR, homePD, awayScore - homeScore))
        else:
            pass

In [9]:
populateTeams()
for i in range(1, CURRENT_WEEK + 1):
    populateGames(i)

In [10]:
GAMES_DF = pd.DataFrame()
    
GAMES_DF['Week'] = [game.week for game in GAMES_LIST]
GAMES_DF['Away'] = [game.away for game in GAMES_LIST]
GAMES_DF['Away Score'] = [game.awayScore for game in GAMES_LIST]
GAMES_DF['Home'] = [game.home for game in GAMES_LIST]
GAMES_DF['Home Score'] = [game.homeScore for game in GAMES_LIST]
GAMES_DF['Away DR'] = [game.awayDR for game in GAMES_LIST]
GAMES_DF['Away OR'] = [game.awayOR for game in GAMES_LIST]
GAMES_DF['Away PD'] = [game.awayPD for game in GAMES_LIST]
GAMES_DF['Home DR'] = [game.homeDR for game in GAMES_LIST]
GAMES_DF['Home OR'] = [game.homeOR for game in GAMES_LIST]
GAMES_DF['Home PD'] = [game.homePD for game in GAMES_LIST]
GAMES_DF['Result'] = [game.result for game in GAMES_LIST]

# populating Team.games for team in TEAMS
for index, row in GAMES_DF.iterrows():
    TEAMS[row["Away"]].addGame(row["Week"], GAMES_LIST[index])
    TEAMS[row["Home"]].addGame(row["Week"], GAMES_LIST[index])

In [11]:
# Simple model time! Using weeks 1-4 to advise for week 5
X = GAMES_DF[["Week", "Away DR", "Away OR", "Away PD", "Home DR", "Home OR", "Home PD"]]
y = GAMES_DF[["Week", "Result"]]
X_train = X[X["Week"] < 5].drop("Week", axis=1)
X_test = X[X["Week"] == 5].drop("Week", axis=1)
y_train = y[y["Week"] < 5].drop("Week", axis=1)
y_test = y[y["Week"] == 5].drop("Week", axis=1)

mdl = LinearRegression().fit(X_train, y_train)
preds = mdl.predict(X_test)

analysis = pd.DataFrame()
analysis["away"] = GAMES_DF[64:]["Away"]
analysis["away score"] = GAMES_DF[64:]["Away Score"]
analysis["home score"] = GAMES_DF[64:]["Home Score"]
analysis["home"] = GAMES_DF[64:]["Home"]
analysis["true"] = y_test["Result"]
analysis["my pred"] = [round(float(pred), 2) for pred in list(preds)]
analysis["spread pred"] = WEEK_5_SPREADS
analysis["make the bet?"] = analysis.apply(lambda row: True if (row['spread pred'] > 0 and row['my pred'] > row['spread pred']) or (row['spread pred'] <= 0 and row['my pred'] < row['spread pred']) else False, axis = 1)

In [12]:
bets = analysis.copy()
bets = bets[bets["make the bet?"]]
bets["bet confidence"] = abs(bets["my pred"] - bets["spread pred"])
bets["won the bet?"] = bets.apply(lambda row: True if (row['spread pred'] > 0 and row['true'] > row['spread pred']) or (row['spread pred'] <= 0 and row['true'] < row['spread pred']) else False, axis = 1)
bets


Unnamed: 0,away,away score,home score,home,true,my pred,spread pred,make the bet?,bet confidence,won the bet?
64,LA,26,17,SEA,9,4.28,1.5,True,2.78,True
66,PHI,21,18,CAR,3,-6.2,-3.0,True,3.2,False
69,TEN,37,19,JAX,18,9.94,4.5,True,5.44,True
71,DEN,19,27,PIT,-8,8.67,1.5,True,7.17,False
72,MIA,17,45,TB,-28,-22.44,-9.5,True,12.94,True
73,NO,33,22,WAS,11,10.91,2.5,True,8.41,True
76,SF,10,17,ARI,-7,-11.88,-5.5,True,6.38,True
77,NYG,20,44,DAL,-24,-15.61,-7.0,True,8.61,True


In [13]:
numBets = len(bets)
accuracy = np.mean(bets["won the bet?"])
print("numBets:", numBets, "accuracy:", accuracy)  # IT WORKS!
bets.groupby("won the bet?").mean()['bet confidence']

numBets: 8 accuracy: 0.75


won the bet?
False    5.185000
True     7.426667
Name: bet confidence, dtype: float64

In [14]:
week = 6
url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(week)
table = pd.read_html(url)
game_data = table[2][1]
game_data = " ".join(game_data)
game_data = game_data.split("  ")

for i in range(7, len(game_data)):
    if game_data[i] == "DOME": # corrects for website inconsistency
        game_data.insert(i + 1, "if it works it works")
    if game_data[i] == "-- BYE --":
        break
    if i % 10 == 7:
        away = game_data[i]
    elif i % 10 == 0:
        home = game_data[i]
        awayScore = 0
        homeScore = 0
        awayDR = TEAMS[away].dRank
        awayOR = TEAMS[away].oRank
        awayPD = TEAMS[away].diff
        homeDR = TEAMS[home].dRank
        homeOR = TEAMS[home].oRank
        homePD = TEAMS[home].diff
        WEEK_6_GAMES.append(Game(week, away, awayScore, home, homeScore, awayDR, awayOR, awayPD, homeDR, homeOR, homePD, awayScore - homeScore))
    else:
        pass

WEEK_6_DF = pd.DataFrame()
    
WEEK_6_DF['Week'] = [game.week for game in WEEK_6_GAMES]
WEEK_6_DF['Away'] = [game.away for game in WEEK_6_GAMES]
WEEK_6_DF['Away Score'] = [game.awayScore for game in WEEK_6_GAMES]
WEEK_6_DF['Home'] = [game.home for game in WEEK_6_GAMES]
WEEK_6_DF['Home Score'] = [game.homeScore for game in WEEK_6_GAMES]
WEEK_6_DF['Away DR'] = [game.awayDR for game in WEEK_6_GAMES]
WEEK_6_DF['Away OR'] = [game.awayOR for game in WEEK_6_GAMES]
WEEK_6_DF['Away PD'] = [game.awayPD for game in WEEK_6_GAMES]
WEEK_6_DF['Home DR'] = [game.homeDR for game in WEEK_6_GAMES]
WEEK_6_DF['Home OR'] = [game.homeOR for game in WEEK_6_GAMES]
WEEK_6_DF['Home PD'] = [game.homePD for game in WEEK_6_GAMES]
WEEK_6_DF['Result'] = [game.result for game in WEEK_6_GAMES]

# populating Team.games for team in TEAMS
for index, row in GAMES_DF.iterrows():
    TEAMS[row["Away"]].addGame(row["Week"], GAMES_LIST[index])
    TEAMS[row["Home"]].addGame(row["Week"], GAMES_LIST[index])

In [15]:
# Predict week 6!
X_train = GAMES_DF[["Away DR", "Away OR", "Away PD", "Home DR", "Home OR", "Home PD"]]
y_train = GAMES_DF[["Result"]]
X_test = WEEK_6_DF[["Away DR", "Away OR", "Away PD", "Home DR", "Home OR", "Home PD"]]
# there is no y test because the games havent happened yet :)

mdl = LinearRegression().fit(X_train, y_train)
# mdl = DecisionTreeClassifier().fit(X_train, y_train)

preds = mdl.predict(X_test)

analysis = pd.DataFrame()
analysis["away"] = WEEK_6_DF["Away"]
analysis["home"] = WEEK_6_DF["Home"]
analysis["my pred"] = [round(float(pred), 2) for pred in list(preds)]
analysis["spread pred"] = WEEK_6_SPREADS
analysis["make the bet?"] = analysis.apply(lambda row: True if (row['spread pred'] > 0 and row['my pred'] > row['spread pred']) or (row['spread pred'] <= 0 and row['my pred'] < row['spread pred']) else False, axis = 1)

bets = analysis.copy()
bets = bets[bets["make the bet?"]]
bets["bet confidence"] = abs(bets["my pred"] - bets["spread pred"])
# bets["won the bet?"] = bets.apply(lambda row: True if (row['spread pred'] > 0 and row['true'] > row['spread pred']) or (row['spread pred'] <= 0 and row['true'] < row['spread pred']) else False, axis = 1)
bets

Unnamed: 0,away,home,my pred,spread pred,make the bet?,bet confidence
0,TB,PHI,9.65,6.5,True,3.15
5,CIN,DET,10.37,3.5,True,6.87
10,LV,DEN,-7.87,-3.5,True,4.37
11,DAL,NE,12.09,4.0,True,8.09
13,BUF,TEN,17.83,5.5,True,12.33


In [319]:
# TODO
# 1) organize code!
# 2) implement rbsdm to make a more complex model, consider power rankings and defensive stats