In [4]:
import pandas as pd
import datetime as dt
import sys
sys.path.append('../')
import pipeline as p

### Print out model cols so we can copy to 'forecast.py'

In [5]:
model_cols = p.open_pkl('data/model_cols.pkl')

In [6]:
model_cols

['PenY_SA1',
 'elo1',
 'PtsOpp_SA1',
 'RushAtt_SA1',
 'PtsTm_SA2',
 'elo2',
 'Penalies_SA2',
 'TimePossMins1',
 'FirstD_SA2',
 'playoff',
 'PtsOpp_SA2',
 'WinPct1',
 'TimePossMins2',
 'FourthDAtt_SA1',
 'WinPct2']

### Archive files

In [None]:
!mv data/nfl_games.csv data/nfl_games_old.csv

### Reset inital elos:

In [2]:
init_elos = pd.read_csv('data/initial_elos.csv')

In [3]:
init_elos.head()

Unnamed: 0,team,elo
0,RII,1503.947
1,STP,1300.0
2,BFF,1478.004
3,WBU,1300.0
4,RCH,1503.42


In [6]:
# this df contains week 1 data
df2 = p.open_pkl('data/model_df2.pkl')

In [12]:
df2.head(17)

Unnamed: 0,date,season,neutral,playoff,team1,team2,elo1,elo2,elo_prob1,score1,...,INT_SA2,SacksO_SA2,Fumbles_SA2,Penalies_SA2,PenY_SA2,ThirdDConv_SA2,ThirdDAtt_SA2,FourthDAtt_SA2,PassCompPct_SA2,PassRating_SA2
0,2007-09-06,2007,0,0,IND,NO,1653.923,1515.973,0.762833,41,...,,,,,,,,,,
1,2007-09-09,2007,0,0,WSH,MIA,1448.841,1489.941,0.534341,16,...,,,,,,,,,,
2,2007-09-09,2007,0,0,BUF,DEN,1516.684,1558.574,0.533209,14,...,,,,,,,,,,
3,2007-09-09,2007,0,0,SEA,TB,1511.936,1417.385,0.714726,20,...,,,,,,,,,,
4,2007-09-09,2007,0,0,CLE,PIT,1396.563,1568.569,0.350697,7,...,,,,,,,,,,
5,2007-09-09,2007,0,0,GB,PHI,1497.051,1542.727,0.527781,16,...,,,,,,,,,,
6,2007-09-09,2007,0,0,HOU,KC,1433.947,1539.589,0.441777,20,...,,,,,,,,,,
7,2007-09-09,2007,0,0,JAX,TEN,1544.671,1494.678,0.659699,10,...,,,,,,,,,,
8,2007-09-09,2007,0,0,MIN,ATL,1436.774,1448.286,0.576373,24,...,,,,,,,,,,
9,2007-09-09,2007,0,0,NYJ,NE,1514.095,1633.017,0.423017,14,...,,,,,,,,,,


For purposes of comparing my model to FiveThirtyEight's predictions, I need to define 'inital elo' ratings as team's Elo's going into the 1st week of 2007.

In [10]:
init_elo_dict = {}

# The first week comprises 16 games (or, the first 16 rows in df2)
for i in range(17):
    tm1 = df2.loc[i,'team1']
    tm2 = df2.loc[i,'team2']
    init_elo_dict[tm1] = df2.loc[i,'elo1']
    init_elo_dict[tm2] = df2.loc[i,'elo2']

In [16]:
len(init_elo_dict)

32

In [21]:
init_df2 = pd.DataFrame.from_dict(init_elo_dict, orient='index')
init_df2.reset_index(inplace=True)
init_df2.columns=['team','elo']

In [25]:
init_df2

Unnamed: 0,team,elo
0,MIA,1489.941
1,NO,1515.973
2,LAC,1646.204
3,NE,1633.017
4,WSH,1448.841
5,BUF,1516.684
6,DAL,1490.892
7,BAL,1605.477
8,IND,1653.923
9,CLE,1396.563


In [26]:
!ls

LICENSE         README.md       [34mdata[m[m            forecast.py     util.py
MyPred-01.ipynb [34m__pycache__[m[m     eval.py         pipeline.py


In [27]:
!mv data/initial_elos.csv data/initial_elos_old.csv

In [28]:
init_df2.to_csv('data/initial_elos.csv')

In [51]:
df2.to_csv('data/nfl_games.csv',index=False)

Test FiveThirtyEight's code for reading in "games" from 'nfl_games.csv'

In [44]:
import csv

In [42]:
def read_games(file):
    """ Initializes game objects from csv """
    games = [item for item in csv.DictReader(open(file))]

    # Uncommenting these three lines will grab the latest game results for 2018, update team ratings accordingly, and make forecasts for upcoming games
    #file_2018 = file.replace(".", "_2018.")
    #urlretrieve("https://projects.fivethirtyeight.com/nfl-api/2018/nfl_games_2018.csv", file_2018)
    #games += [item for item in csv.DictReader(open(file_2018))]

    for game in games:
        game['season'], game['neutral'], game['playoff'] = int(game['season']), int(game['neutral']), int(game['playoff'])
        game['score1'], game['score2'] = int(game['score1']) if game['score1'] != '' else None, int(game['score2']) if game['score2'] != '' else None
        game['elo_prob1'], game['result1'] = float(game['elo_prob1']) if game['elo_prob1'] != '' else None, float(game['result1']) if game['result1'] != '' else None

    return games

In [52]:
games = read_games('data/nfl_games.csv')

In [76]:
game = games[0]

In [78]:
for col in model_cols:
    game[col] = float(game[col]) if game[col] != '' else None

In [80]:
game

{'DefTO_SA1': '',
 'DefTO_SA2': '',
 'FirstD_SA1': '',
 'FirstD_SA2': None,
 'FourthDAtt_SA1': None,
 'FourthDAtt_SA2': '',
 'Fumbles_SA1': '',
 'Fumbles_SA2': '',
 'INT_SA1': '',
 'INT_SA2': '',
 'OT': '0',
 'PassAtt_SA1': '',
 'PassAtt_SA2': '',
 'PassCmp_SA1': '',
 'PassCmp_SA2': '',
 'PassCompPct_SA1': '',
 'PassCompPct_SA2': '',
 'PassRating_SA1': '',
 'PassRating_SA2': '',
 'PassTDs_SA1': '',
 'PassTDs_SA2': '',
 'PassY_SA1': '',
 'PassY_SA2': '',
 'PenY_SA1': None,
 'PenY_SA2': '',
 'Penalies_SA1': '',
 'Penalies_SA2': None,
 'PtsOpp_SA1': None,
 'PtsOpp_SA2': None,
 'PtsTm_SA1': '',
 'PtsTm_SA2': None,
 'RushAtt_SA1': None,
 'RushAtt_SA2': '',
 'RushTDs_SA1': '',
 'RushTDs_SA2': '',
 'RushY_SA1': '',
 'RushY_SA2': '',
 'SacksO_SA1': '',
 'SacksO_SA2': '',
 'TO_SA1': '',
 'TO_SA2': '',
 'ThirdDAtt_SA1': '',
 'ThirdDAtt_SA2': '',
 'ThirdDConv_SA1': '',
 'ThirdDConv_SA2': '',
 'TimePossMins1': 28.25,
 'TimePossMins2': 31.75,
 'TotY_SA1': '',
 'TotY_SA2': '',
 'WinPct1': 100.0,
 'W

In [82]:
gdf = pd.DataFrame.from_dict(game, orient='index').transpose()

In [84]:
gdf[model_cols]

Unnamed: 0,PenY_SA1,elo1,PtsOpp_SA1,RushAtt_SA1,PtsTm_SA2,elo2,Penalies_SA2,TimePossMins1,FirstD_SA2,playoff,PtsOpp_SA2,WinPct1,TimePossMins2,FourthDAtt_SA1,WinPct2
0,,1653.92,,,,1515.97,,28.25,,0,,100,31.75,,0


In [68]:
gdf[['week']]

KeyError: "['week'] not in index"

In [None]:
dum = gdf[model_cols]

In [95]:
for x in dum:
    try:
        print(float(x))
    except ValueError:
        print(None)

nan
1448.1190807611802
28.0
nan
12.0
1638.13804913745
8.0
nan
21.0
0.0
10.0
50.0
30.05
nan
50.0
