In [66]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import time
import pickle

In [67]:
# Setting pandas to display columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [68]:
nfl_small2 = pd.read_csv('nfl_small_cleaned.csv')
nfl_small2['Estimated Time (EST)'] = pd.to_datetime(nfl_small2['Estimated_Time'])
nfl_small2.shape

(84371, 55)

<h2>Combining weather data with dataframe data</h2>

In [69]:
import os
import re
def load_weather():

    # mapping out each stadium with their own weather dataframe in a dictionary
       
    weather_path = os.getcwd() + '\\historicalweatherdata'
    files = os.listdir(weather_path)
    df = pd.DataFrame()
    for name in files:
        if '.csv' in name and len(name) <= 7:
            
            df2 = pd.read_csv(weather_path + '\\' + name)
            df = pd.concat([df, df2], sort=True)
    
    df = df.drop(['time_local'], axis=1)
    df['Time (GMT)'] = pd.to_datetime(df['Time (GMT)'])
    df['Time (EST)'] = pd.to_datetime(df['Time (EST)'])
    
    df = df.sort_values(by=['Team Abbreviation', 'Time (EST)'])
    return df

In [70]:
# Run historical_weather.ipynb
weather_df = load_weather()

weather_df.to_csv("all_historical_weather.csv", index=False)

In [71]:
weather_df = pd.read_csv("all_historical_weather.csv")
weather_df['Time (GMT)'] = pd.to_datetime(weather_df['Time (GMT)'])
weather_df['Time (EST)'] = pd.to_datetime(weather_df['Time (EST)'])

weather_df = weather_df.sort_values(by=['Team Abbreviation', 'Time (EST)'])

In [72]:
weather_df.head(10)

Unnamed: 0,Air Pressure (hPa),City,Dewpoint (°C),Field,Humidity (%),Precipitation (mm),Roof,Team Abbreviation,Temperature (°C),Time (EST),Time (GMT),Wind Direction (deg),Wind Speed (km/h),added_time
0,1013.6,Glendale,2.2,Bermuda grass,73.0,,Retractable,ARI,6.7,2014-12-31 20:00:00,2015-01-01 00:00:00,290.0,11.2,0
1,1013.5,Glendale,2.7,Bermuda grass,79.0,,Retractable,ARI,6.1,2014-12-31 21:00:00,2015-01-01 01:00:00,240.0,5.4,0
2,1013.9,Glendale,2.2,Bermuda grass,73.0,,Retractable,ARI,6.7,2014-12-31 22:00:00,2015-01-01 02:00:00,290.0,14.8,0
3,1014.2,Glendale,1.6,Bermuda grass,73.0,,Retractable,ARI,6.1,2014-12-31 23:00:00,2015-01-01 03:00:00,270.0,22.3,0
4,1014.9,Glendale,0.0,Bermuda grass,70.0,,Retractable,ARI,5.0,2015-01-01 00:00:00,2015-01-01 04:00:00,270.0,14.8,0
5,1015.4,Glendale,-0.6,Bermuda grass,70.0,,Retractable,ARI,4.4,2015-01-01 01:00:00,2015-01-01 05:00:00,250.0,20.5,0
6,1015.8,Glendale,0.5,Bermuda grass,85.0,,Retractable,ARI,2.8,2015-01-01 02:00:00,2015-01-01 06:00:00,280.0,16.6,0
7,1015.7,Glendale,1.2,Bermuda grass,89.0,,Retractable,ARI,2.8,2015-01-01 03:00:00,2015-01-01 07:00:00,270.0,13.0,0
8,1016.0,Glendale,-0.7,Bermuda grass,78.0,0.0,Retractable,ARI,2.8,2015-01-01 04:00:00,2015-01-01 08:00:00,300.0,9.4,0
9,1016.9,Glendale,-0.7,Bermuda grass,78.0,0.0,Retractable,ARI,2.8,2015-01-01 05:00:00,2015-01-01 09:00:00,300.0,7.6,0


In [73]:
# Number of null values and percentage of nulls
print(weather_df[weather_df.isnull().any(axis=1)].shape[0], weather_df.shape[0], weather_df[weather_df.isnull().any(axis=1)].shape[0] / weather_df.shape[0])

43857 1402545 0.03126958493310375


In [74]:
for col in weather_df.columns:
    print(col, (len(weather_df) - weather_df[col].count()))

Air Pressure (hPa) 0
City 0
Dewpoint (°C) 0
Field 0
Humidity (%) 0
Precipitation (mm) 85
Roof 0
Team Abbreviation 0
Temperature (°C) 0
Time (EST) 0
Time (GMT) 0
Wind Direction (deg) 43823
Wind Speed (km/h) 43819
added_time 0


In [75]:
weather_df[['Temperature (°C)', 'Humidity (%)', 'Dewpoint (°C)']] = weather_df[['Temperature (°C)', 'Humidity (%)', 'Dewpoint (°C)']].interpolate(type='linear')

In [76]:
nfl_small2.dtypes

Unnamed: 0                            int64
index                                 int64
play_id                               int64
game_id                               int64
game_date                            object
time                                 object
quarter_seconds_remaining           float64
half_seconds_remaining              float64
game_seconds_remaining              float64
game_half                            object
quarter_end                           int64
qtr                                   int64
home_team                            object
away_team                            object
posteam                              object
posteam_type                         object
defteam                              object
side_of_field                        object
yardline_100                        float64
drive                                 int64
sp                                    int64
down                                float64
goal_to_go                      

In [77]:
nfl_small2.head()

Unnamed: 0.1,Unnamed: 0,index,play_id,game_id,game_date,time,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,qtr,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,drive,sp,down,goal_to_go,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,pass_length,air_yards,yards_after_catch,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,td_team,total_home_score,total_away_score,posteam_score,defteam_score,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,fumble,complete_pass,Start_Time,diff,Estimated_Time,Estimated Time (EST)
0,0,362094,44,2017090700,2017-09-07 00:00:00,15:00,900.0,1800.0,3600.0,Half1,0,1,NE,KC,NE,home,KC,KC,35.0,1,0,,0.0,KC 35,0,73,C.Santos kicks 64 yards from KC 35 to NE 1. D....,kickoff,0.0,0,0.0,0.0,,64.0,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-07 20:30:00,0 days 00:00:00.000000000,2017-09-07 20:30:00,2017-09-07 20:30:00
1,1,362095,68,2017090700,2017-09-07 00:00:00,14:55,895.0,1795.0,3595.0,Half1,0,1,NE,KC,NE,home,KC,NE,73.0,1,0,1.0,0.0,NE 27,10,0,(14:55) NE 12-Brady 18th season as Patriots QB...,pass,0.0,deep,27.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-07 20:30:00,0 days 00:00:15.000000000,2017-09-07 20:30:15,2017-09-07 20:30:15
2,2,362096,94,2017090700,2017-09-07 00:00:00,14:49,889.0,1789.0,3589.0,Half1,0,1,NE,KC,NE,home,KC,NE,73.0,1,0,2.0,0.0,NE 27,10,8,(14:49) T.Brady pass short right to R.Burkhead...,pass,8.0,short,1.0,7.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2017-09-07 20:30:00,0 days 00:00:33.000000000,2017-09-07 20:30:33,2017-09-07 20:30:33
3,3,362097,118,2017090700,2017-09-07 00:00:00,14:14,854.0,1754.0,3554.0,Half1,0,1,NE,KC,NE,home,KC,NE,65.0,1,0,3.0,0.0,NE 35,2,73,(14:14) (Shotgun) J.White left guard to NE 43 ...,run,8.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-07 20:30:00,0 days 00:02:18.000000000,2017-09-07 20:32:18,2017-09-07 20:32:18
4,4,362098,139,2017090700,2017-09-07 00:00:00,13:52,832.0,1732.0,3532.0,Half1,0,1,NE,KC,NE,home,KC,NE,57.0,1,0,1.0,0.0,NE 43,10,19,"(13:52) (No Huddle, Shotgun) J.White up the mi...",run,3.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-07 20:30:00,0 days 00:03:24.000000000,2017-09-07 20:33:24,2017-09-07 20:33:24


In [78]:
nfl_small2['Estimated_Hour'] = nfl_small2['Estimated Time (EST)'].dt.round('H')
nfl_weather = pd.merge(nfl_small2, weather_df, how = 'left', left_on = ['home_team', 'Estimated_Hour'], right_on = ['Team Abbreviation', 'Time (EST)'])
nfl_weather.sample(10)

Unnamed: 0.1,Unnamed: 0,index,play_id,game_id,game_date,time,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,qtr,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,drive,sp,down,goal_to_go,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,pass_length,air_yards,yards_after_catch,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,td_team,total_home_score,total_away_score,posteam_score,defteam_score,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,fumble,complete_pass,Start_Time,diff,Estimated_Time,Estimated Time (EST),Estimated_Hour,Air Pressure (hPa),City,Dewpoint (°C),Field,Humidity (%),Precipitation (mm),Roof,Team Abbreviation,Temperature (°C),Time (EST),Time (GMT),Wind Direction (deg),Wind Speed (km/h),added_time
84066,84066,449055,1085,2018121611,2018-12-16 00:00:00,12:10:00,730.0,730.0,2530.0,Half1,0,2,LA,PHI,PHI,away,LA,LA,16.0,4,0,3.0,0.0,LA 16,5,59,(12:10) (Shotgun) N.Foles pass incomplete deep...,pass,0.0,deep,16.0,0.0,,,,,,7,3,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018-12-16 16:25:00,0 days 00:53:30.000000000,2018-12-16 17:18:30,2018-12-16 17:18:30,2018-12-16 17:00:00,1019.3,Los Angeles,11.7,Grass,63.0,0.0,Open,LA,18.9,2018-12-16 17:00:00,2018-12-16 21:00:00,270.0,7.6,0
29158,29158,392427,620,2017112607,2017-11-26 00:00:00,04:36,276.0,1176.0,2976.0,Half1,0,1,LA,NO,LA,home,NO,NO,29.0,3,0,1.0,0.0,NO 29,10,47,(4:36) J.Goff sacked at NO 36 for -7 yards (C....,pass,-7.0,0,0.0,0.0,,,,,,7,0,7.0,7.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-11-26 16:25:00,0 days 00:31:12.000000000,2017-11-26 16:56:12,2017-11-26 16:56:12,2017-11-26 17:00:00,1017.5,Los Angeles,13.9,Grass,81.0,0.0,Open,LA,17.2,2017-11-26 17:00:00,2017-11-26 21:00:00,270.0,18.4,0
11363,11363,373658,1680,2017100802,2017-10-08 00:00:00,03:02,182.0,182.0,1982.0,Half1,0,2,DET,CAR,CAR,away,DET,DET,38.0,10,0,2.0,0.0,DET 38,2,30,(3:02) (No Huddle) J.Stewart left tackle to DE...,run,-5.0,0,0.0,0.0,,,,,,10,10,10.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-10-08 13:00:00,0 days 01:20:54.000000000,2017-10-08 14:20:54,2017-10-08 14:20:54,2017-10-08 14:00:00,1026.792937,Detroit,14.0,FieldTurf Classic HD,57.0,0.0,Fixed,DET,23.0,2017-10-08 14:00:00,2017-10-08 17:53:00,190.0,15.0,0
29567,29567,392661,1997,2017112609,2017-11-26 00:00:00,02:12,132.0,132.0,1932.0,Half1,0,2,ARI,JAX,ARI,home,JAX,ARI,89.0,10,0,,0.0,ARI 11,0,-5,Timeout #3 by JAX at 02:12.,no_play,0.0,0,0.0,0.0,,,,,,13,0,13.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-11-26 16:25:00,0 days 01:23:24.000000000,2017-11-26 17:48:24,2017-11-26 17:48:24,2017-11-26 18:00:00,1012.9,Glendale,-3.2,Bermuda grass,11.0,0.0,Retractable,ARI,30.6,2017-11-26 18:00:00,2017-11-26 22:00:00,213.333333,0.0,0
6640,6640,369325,625,2017092407,2017-09-24 00:00:00,06:52,412.0,1312.0,3112.0,Half1,0,1,NE,HOU,NE,home,HOU,NE,69.0,4,0,3.0,0.0,NE 31,4,9,(6:52) (Shotgun) T.Brady pass short left to C....,pass,3.0,short,3.0,0.0,,,,,,7,3,7.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2017-09-24 13:00:00,0 days 00:24:24.000000000,2017-09-24 13:24:24,2017-09-24 13:24:24,2017-09-24 13:00:00,1017.4,Foxborough,17.2,FieldTurf CORE,62.0,0.0,Open,NE,25.0,2017-09-24 13:00:00,2017-09-24 17:00:00,120.0,9.4,0
23372,23372,386087,1010,2017111203,2017-11-12 00:00:00,14:10,850.0,850.0,2650.0,Half1,0,2,IND,PIT,PIT,away,IND,IND,42.0,7,0,1.0,0.0,IND 42,10,15,"(14:10) (No Huddle, Shotgun) B.Roethlisberger ...",pass,0.0,deep,32.0,0.0,,,,,,7,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-11-12 13:00:00,0 days 00:47:30.000000000,2017-11-12 13:47:30,2017-11-12 13:47:30,2017-11-12 14:00:00,1024.6,Indianapolis,2.2,Shaw Sports Momentum Pro,82.0,0.8,Retractable,IND,5.0,2017-11-12 14:00:00,2017-11-12 18:00:00,60.0,5.4,0
51747,51747,415422,3473,2018092312,2018-09-23 00:00:00,08:43:00,523.0,523.0,523.0,Half2,0,4,SEA,DAL,DAL,away,SEA,DAL,57.0,20,0,1.0,0.0,DAL 43,10,57,(8:43) (Shotgun) E.Elliott right guard to SEA ...,run,19.0,0,0.0,0.0,,,,,,24,6,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018-09-23 16:25:00,0 days 02:33:51.000000000,2018-09-23 18:58:51,2018-09-23 18:58:51,2018-09-23 19:00:00,1022.0,Seattle,6.0,FieldTurf Revolution 360,46.0,0.0,Open,SEA,17.8,2018-09-23 19:00:00,2018-09-23 23:00:00,250.0,9.4,0
55778,55778,419740,2167,2018100704,2018-10-07 00:00:00,00:42:00,42.0,42.0,1842.0,Half1,0,2,DET,GB,DET,home,GB,GB,7.0,12,0,1.0,1.0,GB 7,7,29,(:42) (Shotgun) M.Stafford pass incomplete sho...,pass,0.0,short,7.0,0.0,,,,,,17,0,17.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018-10-07 16:25:00,0 days 01:27:54.000000000,2018-10-07 17:52:54,2018-10-07 17:52:54,2018-10-07 18:00:00,1024.1,Detroit,12.3,FieldTurf Classic HD,78.0,0.0,Fixed,DET,16.1,2018-10-07 18:00:00,2018-10-07 22:00:00,50.0,14.8,0
43483,43483,405792,1419,2017123114,2017-12-31 00:00:00,06:25,385.0,385.0,2185.0,Half1,0,2,LA,SF,SF,away,LA,LA,36.0,9,0,1.0,0.0,LA 36,10,63,(6:25) C.Hyde up the middle to LA 33 for 3 yar...,run,3.0,0,0.0,0.0,,,,,,3,10,10.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-12-31 16:25:00,0 days 01:10:45.000000000,2017-12-31 17:35:45,2017-12-31 17:35:45,2017-12-31 18:00:00,1018.1,Los Angeles,11.6,Grass,72.0,0.0,Open,LA,16.7,2017-12-31 18:00:00,2017-12-31 22:00:00,260.0,16.6,0
16021,16021,378653,4620,2017102200,2017-10-22 00:00:00,00:08,8.0,8.0,8.0,Half2,0,4,BUF,TB,TB,away,BUF,TB,69.0,22,0,2.0,0.0,TB 31,4,4,(:08) (Shotgun) J.Winston pass short left to A...,pass,-1.0,short,7.0,-2.0,,,,,,30,27,27.0,27.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2017-10-22 13:00:00,0 days 02:59:36.000000000,2017-10-22 15:59:36,2017-10-22 15:59:36,2017-10-22 16:00:00,1021.6,Orchard Park,9.4,A-Turf Titan 50,36.0,0.0,Open,BUF,25.6,2017-10-22 16:00:00,2017-10-22 20:00:00,210.0,14.8,0


In [79]:
nfl_weather.dtypes

Unnamed: 0                            int64
index                                 int64
play_id                               int64
game_id                               int64
game_date                            object
time                                 object
quarter_seconds_remaining           float64
half_seconds_remaining              float64
game_seconds_remaining              float64
game_half                            object
quarter_end                           int64
qtr                                   int64
home_team                            object
away_team                            object
posteam                              object
posteam_type                         object
defteam                              object
side_of_field                        object
yardline_100                        float64
drive                                 int64
sp                                    int64
down                                float64
goal_to_go                      

In [80]:
nfl_small2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84371 entries, 0 to 84370
Data columns (total 56 columns):
Unnamed: 0                   84371 non-null int64
index                        84371 non-null int64
play_id                      84371 non-null int64
game_id                      84371 non-null int64
game_date                    84371 non-null object
time                         84371 non-null object
quarter_seconds_remaining    84371 non-null float64
half_seconds_remaining       84371 non-null float64
game_seconds_remaining       84371 non-null float64
game_half                    84371 non-null object
quarter_end                  84371 non-null int64
qtr                          84371 non-null int64
home_team                    84371 non-null object
away_team                    84371 non-null object
posteam                      84371 non-null object
posteam_type                 84371 non-null object
defteam                      84371 non-null object
side_of_field              

In [81]:
nfl_weather.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 84371 entries, 0 to 84370
Data columns (total 70 columns):
Unnamed: 0                   84371 non-null int64
index                        84371 non-null int64
play_id                      84371 non-null int64
game_id                      84371 non-null int64
game_date                    84371 non-null object
time                         84371 non-null object
quarter_seconds_remaining    84371 non-null float64
half_seconds_remaining       84371 non-null float64
game_seconds_remaining       84371 non-null float64
game_half                    84371 non-null object
quarter_end                  84371 non-null int64
qtr                          84371 non-null int64
home_team                    84371 non-null object
away_team                    84371 non-null object
posteam                      84371 non-null object
posteam_type                 84371 non-null object
defteam                      84371 non-null object
side_of_field              

In [82]:
nfl_weather[nfl_weather[['Temperature (°C)']].isnull().any(axis=1)][['home_team', 'Estimated_Hour']]

Unnamed: 0,home_team,Estimated_Hour


In [83]:
nfl_weather.to_csv('nfl_small_cleaned_plus_weather.csv')