# Predicting NFL Win Totals

## BASELINE

## constraints
- years: 2009-present
- all 32 teams


## Model
- Random Forest

## Dependent Variable
- Current Year's Win Total

## Independent Variables
- [x]  Previous Year's Strength of Schedule
- [ ] Current Year's Market-Priced Strength of Schedule
- Previous Year's Relative Performance (EPA Standardized w.r.t Year)

In [1]:
# import python packages & libraries
import numpy as np
import pandas as pd
import nfl_data_py as nfl

In [35]:
# YEARS = [2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021]
YEARS_debug = [2019,2020]

# Get Data
- Previous Year's Strength of Schedule
    - Each opposing team's previous year's Win PCT
        - Obtain schedule for the `result` for `game_type` REG season (ref: data_dict https://github.com/nflverse/nflreadr/blob/main/data-raw/dictionary_schedules.csv)
        - Transform schedule result to win/loss
        - Group Year & Team to obtain sum of wins / count games played
- Current Year's Market-Priced Strength of Schedule
    - Each opposing team's expected Win PCT
- Previous Year's Relative Performance
    - Team's EPA standardized w.r.t Year's mean team EPA
    

## Previous Year's Strength of Schedule

In [36]:
# import the NFL schedule
nfl_schedule = nfl.import_schedules(years=YEARS_debug)

In [37]:
nfl_wins = nfl_schedule.loc[nfl_schedule['game_type'] == 'REG',['game_id','season','game_type','home_team','away_team','result']]

In [38]:
nfl_wins = pd.melt(nfl_wins, id_vars=['game_id','season','game_type','result'], var_name=['home_away'], value_vars=['home_team', 'away_team'], value_name='team_name')

In [39]:
home_team_score_more = (nfl_wins.result > 0) & (nfl_wins.home_away == 'home_team')
away_team_score_more = (nfl_wins.result < 0) & (nfl_wins.home_away == 'away_team')
team_score_tie = (nfl_wins.result == 0)
home_win = 1
away_win = 1
tie = 0
conditions = [home_team_score_more, away_team_score_more, team_score_tie]
results = [home_win, away_win, tie]
nfl_wins['win'] = np.select(conditions, results, default=0)

In [40]:
home_team_score_less = (nfl_wins.result < 0) & (nfl_wins.home_away == 'home_team')
away_team_score_less = (nfl_wins.result > 0) & (nfl_wins.home_away == 'away_team')
team_score_tie = (nfl_wins.result == 0)
home_loss = 1
away_loss = 1
tie = 0
conditions = [home_team_score_less, away_team_score_less, team_score_tie]
results = [home_loss, away_loss, tie]
nfl_wins['loss'] = np.select(conditions, results, default=0)

In [41]:
home_team_score_less = (nfl_wins.result < 0) & (nfl_wins.home_away == 'home_team')
away_team_score_less = (nfl_wins.result > 0) & (nfl_wins.home_away == 'away_team')
team_score_tie = (nfl_wins.result == 0)
home_loss = 0
away_loss = 0
tie = 1
conditions = [home_team_score_less, away_team_score_less, team_score_tie]
results = [home_loss, away_loss, tie]
nfl_wins['tie'] = np.select(conditions, results, default=0)

In [42]:
nfl_records = nfl_wins.groupby(['team_name','season']).agg({'win':'sum', 'loss':'sum', 'tie':'sum'}).reset_index()
nfl_records

Unnamed: 0,team_name,season,win,loss,tie
0,ARI,2019,5,10,1
1,ARI,2020,8,8,0
2,ATL,2019,7,9,0
3,ATL,2020,4,12,0
4,BAL,2019,14,2,0
...,...,...,...,...,...
59,TB,2020,11,5,0
60,TEN,2019,9,7,0
61,TEN,2020,11,5,0
62,WAS,2019,3,13,0


In [43]:
wins2 = nfl_wins.copy()

In [11]:
temp = wins2.groupby('game_id')['team_name'].agg(pd.Series.tolist).reset_index().rename(columns={'team_name':'game_teams'})
temp

Unnamed: 0,game_id,game_teams
0,2014_01_BUF_CHI,"[CHI, BUF]"
1,2014_01_CAR_TB,"[TB, CAR]"
2,2014_01_CIN_BAL,"[BAL, CIN]"
3,2014_01_CLE_PIT,"[PIT, CLE]"
4,2014_01_GB_SEA,"[SEA, GB]"
...,...,...
507,2015_17_SEA_ARI,"[ARI, SEA]"
508,2015_17_STL_SF,"[SF, STL]"
509,2015_17_TB_CAR,"[CAR, TB]"
510,2015_17_TEN_IND,"[IND, TEN]"


In [12]:
wins2 = pd.merge(wins2, temp, on='game_id')

In [13]:
match_team_name = (wins2.team_name == wins2.game_teams.str[0])
conditions = [match_team_name]
team2 = wins2.game_teams.str[1]
results = [team2]
wins2['opposing_team'] = np.select(conditions, results, default=wins2.game_teams.str[0])

In [14]:
wins2

Unnamed: 0,game_id,season,game_type,result,home_away,team_name,win,loss,tie,game_teams,opposing_team
0,2014_01_GB_SEA,2014,REG,20.0,home_team,SEA,1,0,0,"[SEA, GB]",GB
1,2014_01_GB_SEA,2014,REG,20.0,away_team,GB,0,1,0,"[SEA, GB]",SEA
2,2014_01_NO_ATL,2014,REG,3.0,home_team,ATL,1,0,0,"[ATL, NO]",NO
3,2014_01_NO_ATL,2014,REG,3.0,away_team,NO,0,1,0,"[ATL, NO]",ATL
4,2014_01_CIN_BAL,2014,REG,-7.0,home_team,BAL,0,1,0,"[BAL, CIN]",CIN
...,...,...,...,...,...,...,...,...,...,...,...
1019,2015_17_OAK_KC,2015,REG,6.0,away_team,OAK,0,1,0,"[KC, OAK]",KC
1020,2015_17_STL_SF,2015,REG,3.0,home_team,SF,1,0,0,"[SF, STL]",STL
1021,2015_17_STL_SF,2015,REG,3.0,away_team,STL,0,1,0,"[SF, STL]",SF
1022,2015_17_MIN_GB,2015,REG,-7.0,home_team,GB,0,1,0,"[GB, MIN]",MIN


In [15]:
# nfl_records['win_pct'] = nfl_records['win_pct'].round(3)
nfl_records['season_n'] = nfl_records['season'] +1
nfl_records.rename(columns={'season':'season_n-1','team_name':'matching_opposing_team_name','win':'season_n-1_win_total','loss':'season_n-1_loss_total', 'tie':'season_n-1_tie_total'}, inplace=True)
nfl_records['season_n-1_games_played'] = nfl_records[['season_n-1_win_total','season_n-1_loss_total','season_n-1_tie_total']].sum(axis=1)
nfl_records

Unnamed: 0,matching_opposing_team_name,season_n-1,season_n-1_win_total,season_n-1_loss_total,season_n-1_tie_total,season_n,season_n-1_games_played
0,ARI,2014,11,5,0,2015,16
1,ARI,2015,13,3,0,2016,16
2,ATL,2014,6,10,0,2015,16
3,ATL,2015,8,8,0,2016,16
4,BAL,2014,10,6,0,2015,16
...,...,...,...,...,...,...,...
59,TB,2015,6,10,0,2016,16
60,TEN,2014,2,14,0,2015,16
61,TEN,2015,3,13,0,2016,16
62,WAS,2014,4,12,0,2015,16


In [16]:
# adjust for team's relocation: OAK -> LV
team_OAK = nfl_records['matching_opposing_team_name'] == 'OAK'
season_2019 = nfl_records['season_n-1'] == 2019
nfl_records.loc[((team_OAK)&(season_2019)),'matching_opposing_team_name'] = 'LV'

# adjust for team's relocation: SD -> LAC
team_SD = nfl_records['matching_opposing_team_name'] == 'SD'
season_2016 = nfl_records['season_n-1'] == 2016
nfl_records.loc[((team_SD)&(season_2016)),'matching_opposing_team_name'] = 'LAC'

# adjust for team's relocation: STL -> LA
team_STL = nfl_records['matching_opposing_team_name'] == 'STL'
season_2015 = nfl_records['season_n-1'] == 2015
nfl_records.loc[((team_STL)&(season_2015)),'matching_opposing_team_name'] = 'LA'


In [19]:
assert all(nfl_records.loc[(nfl_records['season_n-1'] < 2021),'season_n-1_games_played'] == 16), "Not all opposing_team season_n-1_games_played are accounted"
assert all(nfl_records.loc[(nfl_records['season_n-1'] >= 2021),'season_n-1_games_played'] == 17), "Not all opposing_team season_n-1_games_played are accounted"

In [20]:
win_pct = pd.merge(wins2, nfl_records, left_on=['season','opposing_team'], right_on=['season_n','matching_opposing_team_name'])

In [21]:
# win_pct.groupby(['season_n','team_name_x']).agg({'win_total':'sum','loss_total':'sum', 'tie_total':'sum'}).sum(axis=1).sort_values()
wins3 = win_pct.groupby(['season_n','team_name']).agg({'season_n-1_win_total':'sum','season_n-1_loss_total':'sum', 'season_n-1_tie_total':'sum', 'season_n-1_games_played':'sum'}).reset_index()

In [22]:
wins3.rename(columns= {'season_n-1_win_total':'season_n-1_opposing_win_total','season_n-1_loss_total':'season_n-1_opposing_loss_total','season_n-1_tie_total':'season_n-1_opposing_tie_total'},inplace=True)
wins3['season_n-1_opposing_win_pct'] = (wins3['season_n-1_opposing_win_total'] + 0.5*wins3['season_n-1_opposing_tie_total'] ) / wins3['season_n-1_games_played']

In [24]:
assert all(nfl_records.loc[(nfl_records['season_n-1'] < 2021),'season_n-1_games_played'] == 16), "Not all opposing_team season_n-1_games_played are accounted"
assert all(nfl_records.loc[(nfl_records['season_n-1'] >= 2021),'season_n-1_games_played'] == 17), "Not all opposing_team season_n-1_games_played are accounted"

In [34]:
wins3.sort_values('season_n-1_opposing_win_pct').team_name.unique().tolist()

['ATL',
 'IND',
 'HOU',
 'TB',
 'NO',
 'CAR',
 'TEN',
 'JAX',
 'DAL',
 'PHI',
 'NE',
 'NYG',
 'WAS',
 'BUF',
 'NYJ',
 'MIA',
 'SD',
 'DET',
 'GB',
 'CHI',
 'MIN',
 'BAL',
 'DEN',
 'CLE',
 'KC',
 'OAK',
 'STL',
 'ARI',
 'SEA',
 'SF',
 'CIN',
 'PIT']