# NFL GAME PREDICTOR

https://nflgamedata.com/schedule.php?season=2021&week=1

https://rbsdm.com/stats/box_scores/?_inputs_&type=%22reg%22&away=%22DAL%22&home=%22TB%22&year=%222021%22

In [82]:
# ignore warnings :)
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
import requests
import bs4
from bs4 import BeautifulSoup
import time

# models
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [83]:
# GLOBAL CONSTANTS
ALL_TEAMS = ["BUF", "NE", "MIA", "NYJ", "BAL", "CIN", "CLE", "PIT", "TEN",
            "HOU", "IND", "JAX", "LAC", "DEN", "LV", "KC", "DAL", "WAS",
            "PHI", "NYG", "GB", "CHI", "MIN", "DET", "TB", "CAR", "NO",
            "ATL", "ARI", "LA", "SEA", "SF"]

CURRENT_WEEK = 13
SPREADS = {}
SPREADS[5] = [1.5, -2.5, -3, 3, 8.5, 4.5, -10, 1.5, -9.5, 2.5, -2.5, -5.5, -5.5, -7, -3, -7]
SPREADS[6] = [6.5, 3.5, -3, 1, 4.5, 3.5, -9.5, 10.5, 7, -2.5, -3.5, 4, -4.5, 5.5]
SPREADS[7] = [-3.5, -6, -9.5, 2.5, -7.5, 3, 5.5, -15, -3, -18.5, -12.5, -4, 4.5]
SPREADS[8] = [-6, -3, -13.5, 3, -3.5, 3, 14.5, 1.5, 9, 5.5, 0, -3, 0, 2.5, 0]
SPREADS[9] = [] # didnt do week 9 oops
SPREADS[10] = [7.5, -10, -10.5, -1, 13, -9, -3, 10, -10, -2.5, -2.5, -3.5, 2.5, 4]
SPREADS[13] = [0, 11, 7.5, -3, 7.5, 10, -6, 7, -1, -13, 4, 3.5, -10, -2.5]

week = CURRENT_WEEK
GAMES_LIST = []
WEEK_6_GAMES = []
TEAMS = {}
GAMES_DF = pd.DataFrame()

In [84]:
class Team:
    def __init__(self, name, wins, losses, diff, oRank, dRank):
        self.name = name
        self.wins = wins
        self.losses = losses
        self.diff = diff
        self.oRank = oRank
        self.dRank = dRank
        
        self.games = {} # indexed by week
        
    def addGame(self, week, game):
        self.games[week] = game
        
    def getRanking(self):
        return self.ranking
    
    def __str__(self):
        return "{}, ELO: {} | W-L: {}-{} | PD: {} | OR: {} | DR: {}".format(self.name, self.elo, self.wins, self.losses, self.diff, self.oRank, self.dRank)
        

In [85]:
class Game:
    def __init__(self, week, away, awayScore, home, homeScore, awayDR, awayOR, awayPD, homeDR, homeOR, homePD, result):
        self.week = week
        self.away = away
        self.awayScore = awayScore
        self.home = home
        self.homeScore = homeScore
        self.awayDR = awayDR
        self.awayOR = awayOR
        self.awayPD = awayPD
        self.homeDR = homeDR
        self.homeOR = homeOR
        self.homePD = homePD
        
        self.result = result
        
    def __str__(self):
        return "AWAY: {}: {} | HOME: {}: {} | RESULT IS HOME BY: {}".format(self.away, self.awayScore, self.home, self.homeScore, self.result)

In [86]:
# def pullMatchups(week):
#     time.sleep(1) # FOR SAKE OF NFLGAMEDATA
#     url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(week)
#     table = pd.read_html(url)
#     matchups = table[2][1]
#     matchups = " ".join(matchups).split(" ")

#     this_week = []
#     for i in range(len(matchups)):
#         if matchups[i] in ALL_TEAMS:
#             this_week.append(matchups[i])

#     games = []
#     for i in range(0, len(this_week), 2):
#         games.append((this_week[i], this_week[i+1]))

#     return games

# # m = pullMatchups(week = 2)

# # for matchup in m:
# #     url = "https://rbsdm.com/stats/box_scores/?_inputs_&type=%22reg%22&away=%22{}%22&home=%22{}%22&year=%222021%22"
# #     url = url.format(matchup[0], matchup[1])

# #     r = requests.get(url)
# #     bs = BeautifulSoup(r.content)
# #     bs 

# # TODO, pull stats from rbsdm

In [87]:
def populateTeams():
    url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(CURRENT_WEEK)
    table = pd.read_html(url)
    summary = table[2][0]
    summary = " ".join(summary)
    summary = summary.split("  ")

    for i in range(10, len(summary)):
        if i % 6 == 4:
            pass # rank seems useless, i'll use w/l and pd
        elif i % 6 == 5:
            name = summary[i]
        elif i % 6 == 0:
            wins = int(summary[i].split("-")[0])
            losses = int(summary[i].split("-")[1])
        elif i % 6 == 1:
            diff = int(summary[i])
        elif i % 6 == 2:
            oRank = int(summary[i])
        else: # i % 6 == 3
            dRank = int(summary[i])
            TEAMS[name] = Team(name, wins, losses, diff, oRank, dRank)

In [88]:
def populateGames(week):
#     time.sleep(1) # FOR SAKE OF NFLGAMEDATA
    url = "https://nflgamedata.com/schedule.php?season=2021&week={}".format(week)
    table = pd.read_html(url)
    game_data = table[2][1]
    game_data = " ".join(game_data)
    game_data = game_data.split("  ")

    if week == 1: # corrects inconsistency in nflgamedata.com
        game_data.insert(14, "if it works it works")

    awayNext = True
    for i in range(len(game_data)):
        if "-- BYE --" in str(game_data[i]):
            break
        if game_data[i] in ALL_TEAMS:
            if "-- BYE --" in str(game_data[i + 1]):
                break
            if awayNext:
                away = game_data[i]
                try:
                    awayScore = int(game_data[i+1])
                except ValueError:
                    awayScore = 0
                awayNext = False
            else:
                home = game_data[i]
                try:
                    homeScore = int(game_data[i-1])
                except ValueError:
                    homeScore = 0
                awayDR = TEAMS[away].dRank
                awayOR = TEAMS[away].oRank
                awayPD = TEAMS[away].diff
                homeDR = TEAMS[home].dRank
                homeOR = TEAMS[home].oRank
                homePD = TEAMS[home].diff
                GAMES_LIST.append(Game(week, away, awayScore, home, homeScore, awayDR, awayOR, awayPD, homeDR, homeOR, homePD, awayScore - homeScore))
                awayNext = True

In [89]:
def getDataUntilWeek(i):
    populateTeams()
    for i in range(1, i + 1):
        populateGames(i)
        
    GAMES_DF = pd.DataFrame()

    GAMES_DF['Week'] = [game.week for game in GAMES_LIST]
    GAMES_DF['Away'] = [game.away for game in GAMES_LIST]
    GAMES_DF['Away Score'] = [game.awayScore for game in GAMES_LIST]
    GAMES_DF['Home'] = [game.home for game in GAMES_LIST]
    GAMES_DF['Home Score'] = [game.homeScore for game in GAMES_LIST]
    GAMES_DF['Away DR'] = [game.awayDR for game in GAMES_LIST]
    GAMES_DF['Away OR'] = [game.awayOR for game in GAMES_LIST]
    GAMES_DF['Away PD'] = [game.awayPD for game in GAMES_LIST]
    GAMES_DF['Home DR'] = [game.homeDR for game in GAMES_LIST]
    GAMES_DF['Home OR'] = [game.homeOR for game in GAMES_LIST]
    GAMES_DF['Home PD'] = [game.homePD for game in GAMES_LIST]
    GAMES_DF['Result'] = [game.result for game in GAMES_LIST]

    # populating Team.games for team in TEAMS
    for index, row in GAMES_DF.iterrows():
        TEAMS[row["Away"]].addGame(row["Week"], GAMES_LIST[index])
        TEAMS[row["Home"]].addGame(row["Week"], GAMES_LIST[index])
        
    return GAMES_DF

In [90]:
def predictWeek(week):
    GAMES_DF = getDataUntilWeek(week)
    
    X = GAMES_DF[["Week", "Away DR", "Away OR", "Away PD", "Home DR", "Home OR", "Home PD"]]
    y = GAMES_DF[["Week", "Result", "Away Score", "Home Score"]]
    X_train = X[X["Week"] < week].drop("Week", axis=1)
    X_test = X[X["Week"] == week].drop("Week", axis=1)

    y_train_result = y[y["Week"] < week]["Result"]
    y_test_result = y[y["Week"] == week]["Result"]

    y_train_away = y[y["Week"] < week]["Away Score"]
    y_test_away = y[y["Week"] == week]["Away Score"]

    y_train_home = y[y["Week"] < week]["Home Score"]
    y_test_home = y[y["Week"] == week]["Home Score"]
    
    mdl_result = LinearRegression().fit(X_train, y_train_result)
    # temporary start
    features = ["Away DR", "Away OR", "Away PD", "Home DR", "Home OR", "Home PD"]
    coefs = list(mdl_result.coef_)
    for i in range(len(features)):
        print(features[i], coefs[i])
    # temporary end
    mdl_away = LinearRegression().fit(X_train, y_train_away)
    mdl_home = LinearRegression().fit(X_train, y_train_home)

    preds_result = mdl_result.predict(X_test)
    preds_away = mdl_away.predict(X_test)
    preds_home = mdl_home.predict(X_test)

    analysis = pd.DataFrame()
    analysis["away"] = GAMES_DF[X_train.shape[0]:]["Away"]
    analysis["true away score"] = GAMES_DF[X_train.shape[0]:]["Away Score"]
    analysis["pred away"] = [round(float(pred), 2) for pred in list(preds_away)]
    analysis["pred home"] = [round(float(pred), 2) for pred in list(preds_home)]
    analysis["true home score"] = GAMES_DF[X_train.shape[0]:]["Home Score"]
    analysis["home"] = GAMES_DF[X_train.shape[0]:]["Home"]
    analysis["true result"] = GAMES_DF[X_train.shape[0]:]["Result"]
    analysis["pred result"] = [round(float(pred), 2) for pred in list(preds_result)]
    analysis["spread pred"] = SPREADS[week]
    analysis["spread pred error"] = abs(analysis["pred result"] - analysis["spread pred"])
    return analysis


# CURRENT WEEK:

In [91]:
WEEK_13_BETS = predictWeek(CURRENT_WEEK)

Away DR 0.08204922760402508
Away OR 0.022505553979906043
Away PD 0.10248319184949178
Home DR 0.11129677815644491
Home OR 0.23005881084885585
Home PD -0.02855913962017686


In [92]:
WEEK_13_BETS

Unnamed: 0,away,true away score,pred away,pred home,true home score,home,true result,pred result,spread pred,spread pred error
180,DAL,27,25.93,18.51,17,NO,10,7.42,0.0,7.42
181,TB,-11,30.76,15.86,0,ATL,-11,14.89,11.0,3.89
182,ARI,0,30.16,15.89,0,CHI,0,14.27,7.5,6.77
183,LAC,0,22.85,28.91,-3,CIN,3,-6.06,-3.0,3.06
184,MIN,0,25.8,17.88,0,DET,0,7.92,7.5,0.42
185,IND,-10,28.72,15.53,0,HOU,-10,13.19,10.0,3.19
186,NYG,0,19.22,22.31,-6,MIA,6,-3.09,-6.0,2.91
187,PHI,-7,26.25,16.21,0,NYJ,-7,10.04,7.0,3.04
188,WAS,0,21.62,23.91,-1,LV,1,-2.29,-1.0,1.29
189,JAX,0,16.56,30.34,-13,LA,13,-13.77,-13.0,0.77


In [93]:
# WEEK 13 BETS: patriots, ravens