# Predicting NFL Win Totals

## BASELINE

## constraints
- years: 2009-present
- all 32 teams


## Model
- Random Forest

## Dependent Variable
- Current Year's Win Total

## Independent Variables
- Previous Year's Strength of Schedule
- Current Year's Market-Priced Strength of Schedule
- Previous Year's Relative Performance (EPA Standardized w.r.t Year)

In [1]:
# import python packages & libraries
import numpy as np
import pandas as pd
import nfl_data_py as nfl

In [2]:
# YEARS = [2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021]
YEARS_debug = [2020]

# Get Data
- Previous Year's Strength of Schedule
    - Each opposing team's previous year's Win PCT
        - Obtain schedule for the `result` for `game_type` REG season (ref: data_dict https://github.com/nflverse/nflreadr/blob/main/data-raw/dictionary_schedules.csv)
        - Transform schedule result to win/loss
        - Group Year & Team to obtain sum of wins / count games played
- Current Year's Market-Priced Strength of Schedule
    - Each opposing team's expected Win PCT
- Previous Year's Relative Performance
    - Team's EPA standardized w.r.t Year's mean team EPA
    

## Each opposing Team's previous year's Win PCT

In [18]:
# import the NFL schedule
nfl_schedule = nfl.import_schedules(years=YEARS_debug)

In [22]:
nfl_schedule

Unnamed: 0,game_id,season,game_type,week,gameday,weekday,gametime,away_team,away_score,home_team,...,wind,away_qb_id,home_qb_id,away_qb_name,home_qb_name,away_coach,home_coach,referee,stadium_id,stadium
5583,2020_01_HOU_KC,2020,REG,1,2020-09-10,Thursday,20:20,HOU,20.0,KC,...,7.0,00-0033537,00-0033873,Deshaun Watson,Patrick Mahomes,Bill O'Brien,Andy Reid,Clete Blakeman,KAN00,Arrowhead Stadium
5584,2020_01_SEA_ATL,2020,REG,1,2020-09-13,Sunday,13:00,SEA,38.0,ATL,...,,00-0029263,00-0026143,Russell Wilson,Matt Ryan,Pete Carroll,Dan Quinn,Shawn Hochuli,ATL97,Mercedes-Benz Stadium
5585,2020_01_CLE_BAL,2020,REG,1,2020-09-13,Sunday,13:00,CLE,6.0,BAL,...,5.0,00-0034855,00-0034796,Baker Mayfield,Lamar Jackson,Kevin Stefanski,John Harbaugh,Ronald Torbert,BAL00,M&T Bank Stadium
5586,2020_01_NYJ_BUF,2020,REG,1,2020-09-13,Sunday,13:00,NYJ,17.0,BUF,...,15.0,00-0034869,00-0034857,Sam Darnold,Josh Allen,Adam Gase,Sean McDermott,Shawn Smith,BUF00,New Era Field
5587,2020_01_LV_CAR,2020,REG,1,2020-09-13,Sunday,13:00,LV,34.0,CAR,...,5.0,00-0031280,00-0031237,Derek Carr,Teddy Bridgewater,Jon Gruden,Matt Rhule,Brad Allen,CAR00,Bank of America Stadium
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5847,2020_19_CLE_KC,2020,DIV,19,2021-01-17,Sunday,15:05,CLE,17.0,KC,...,13.0,00-0034855,00-0033873,Baker Mayfield,Patrick Mahomes,Kevin Stefanski,Andy Reid,Clay Martin,KAN00,Arrowhead Stadium
5848,2020_19_TB_NO,2020,DIV,19,2021-01-17,Sunday,18:40,TB,30.0,NO,...,,00-0019596,00-0020531,Tom Brady,Drew Brees,Bruce Arians,Sean Payton,Shawn Hochuli,NOR00,Mercedes-Benz Superdome
5849,2020_20_TB_GB,2020,CON,20,2021-01-24,Sunday,15:05,TB,31.0,GB,...,10.0,00-0019596,00-0023459,Tom Brady,Aaron Rodgers,Bruce Arians,Matt LaFleur,Clete Blakeman,GNB00,Lambeau Field
5850,2020_20_BUF_KC,2020,CON,20,2021-01-24,Sunday,18:40,BUF,24.0,KC,...,7.0,00-0034857,00-0033873,Josh Allen,Patrick Mahomes,Sean McDermott,Andy Reid,Bill Vinovich,KAN00,Arrowhead Stadium


In [26]:
nfl_wins = nfl_schedule.loc[nfl_schedule['game_type'] == 'REG',['game_id','season','game_type','home_team','away_team','result']]

In [27]:
nfl_wins = pd.melt(nfl_wins, id_vars=['game_id','season','game_type','result'], var_name=['home_away'], value_vars=['home_team', 'away_team'], value_name='team_name')

In [29]:
home_team_score_more = (nfl_wins.result > 0) & (nfl_wins.home_away == 'home_team')
away_team_score_more = (nfl_wins.result < 0) & (nfl_wins.home_away == 'away_team')
team_score_tie = (nfl_wins.result == 0)
home_win = 1
away_win = 1
tie = 0.5
conditions = [home_team_score_more, away_team_score_more, team_score_tie]
results = [home_win, away_win, tie]
nfl_wins['win'] = np.select(conditions, results, default=0)

In [7]:
# nfl_wins[['win_total', 'games_played']] = nfl_wins.groupby(['team_name'])['win'].agg(['sum', 'count'])
nfl_records = nfl_wins.groupby(['team_name','season'])['win'].agg(['sum', 'count']).rename(columns={'sum':'win_total', 'count':'games_played'}).reset_index()

In [13]:

nfl_records['win_pct'] = nfl_records.win_total / nfl_records.games_played

In [16]:
nfl_records

Unnamed: 0,team_name,season,win_total,games_played,win_pct
0,ARI,2020,8.0,16,0.5
1,ATL,2020,4.0,16,0.25
2,BAL,2020,11.0,16,0.6875
3,BUF,2020,13.0,16,0.8125
4,CAR,2020,5.0,16,0.3125
5,CHI,2020,8.0,16,0.5
6,CIN,2020,4.5,16,0.28125
7,CLE,2020,11.0,16,0.6875
8,DAL,2020,6.0,16,0.375
9,DEN,2020,5.0,16,0.3125


In [66]:
wins2 = nfl_wins.copy()

In [67]:
temp = wins2.groupby('game_id')['team_name'].agg(pd.Series.tolist)

In [68]:
wins2 = pd.merge(wins2, temp.reset_index(), on='game_id')

In [69]:
wins2

Unnamed: 0,game_id,season,game_type,result,home_away,team_name_x,win,team_name_y
0,2020_01_HOU_KC,2020,REG,14.0,home_team,KC,1.0,"[KC, HOU]"
1,2020_01_HOU_KC,2020,REG,14.0,away_team,HOU,0.0,"[KC, HOU]"
2,2020_01_SEA_ATL,2020,REG,-13.0,home_team,ATL,0.0,"[ATL, SEA]"
3,2020_01_SEA_ATL,2020,REG,-13.0,away_team,SEA,1.0,"[ATL, SEA]"
4,2020_01_CLE_BAL,2020,REG,32.0,home_team,BAL,1.0,"[BAL, CLE]"
...,...,...,...,...,...,...,...,...
507,2020_17_ARI_LA,2020,REG,11.0,away_team,ARI,0.0,"[LA, ARI]"
508,2020_17_SEA_SF,2020,REG,-3.0,home_team,SF,0.0,"[SF, SEA]"
509,2020_17_SEA_SF,2020,REG,-3.0,away_team,SEA,1.0,"[SF, SEA]"
510,2020_17_WAS_PHI,2020,REG,-6.0,home_team,PHI,0.0,"[PHI, WAS]"


In [74]:
match_team_name = (wins2.team_name_x == wins2.team_name_y.str[0])
conditions = [match_team_name]
team2 = wins2.team_name_y.str[1]
results = [team2]
wins2['opposing_team'] = np.select(conditions, results, default=wins2.team_name_y.str[0])

In [75]:
wins2

Unnamed: 0,game_id,season,game_type,result,home_away,team_name_x,win,team_name_y,opposing_team
0,2020_01_HOU_KC,2020,REG,14.0,home_team,KC,1.0,"[KC, HOU]",HOU
1,2020_01_HOU_KC,2020,REG,14.0,away_team,HOU,0.0,"[KC, HOU]",KC
2,2020_01_SEA_ATL,2020,REG,-13.0,home_team,ATL,0.0,"[ATL, SEA]",SEA
3,2020_01_SEA_ATL,2020,REG,-13.0,away_team,SEA,1.0,"[ATL, SEA]",ATL
4,2020_01_CLE_BAL,2020,REG,32.0,home_team,BAL,1.0,"[BAL, CLE]",CLE
...,...,...,...,...,...,...,...,...,...
507,2020_17_ARI_LA,2020,REG,11.0,away_team,ARI,0.0,"[LA, ARI]",LA
508,2020_17_SEA_SF,2020,REG,-3.0,home_team,SF,0.0,"[SF, SEA]",SEA
509,2020_17_SEA_SF,2020,REG,-3.0,away_team,SEA,1.0,"[SF, SEA]",SF
510,2020_17_WAS_PHI,2020,REG,-6.0,home_team,PHI,0.0,"[PHI, WAS]",WAS
