In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import time
import pickle

In [2]:
# Setting pandas to display columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
nfl_small2 = pd.read_csv('nfl_small_cleaned.csv')
nfl_small2['Estimated Time (EST)'] = pd.to_datetime(nfl_small2['Estimated_Time'])
nfl_small2.shape

(84371, 55)

<h2>Combining weather data with dataframe data</h2>

In [4]:
import os
import re
def load_weather():

    # mapping out each stadium with their own weather dataframe in a dictionary
       
    weather_path = os.getcwd() + '\\historicalweatherdata'
    files = os.listdir(weather_path)
    df = pd.DataFrame()
    for name in files:
        if '.csv' in name and len(name) <= 7:
            
            df2 = pd.read_csv(weather_path + '\\' + name)
            df = pd.concat([df, df2], sort=True)
    
    df = df.drop(['time_local'], axis=1)
    df['Time (GMT)'] = pd.to_datetime(df['Time (GMT)'])
    df['Time (EST)'] = pd.to_datetime(df['Time (EST)'])
    
    df = df.sort_values(by=['Team Abbreviation', 'Time (EST)'])
    return df

In [5]:
# Run historical_weather.ipynb
weather_df = load_weather()

weather_df.to_csv("all_historical_weather.csv", index=False)

In [6]:
weather_df = pd.read_csv("all_historical_weather.csv")
weather_df['Time (GMT)'] = pd.to_datetime(weather_df['Time (GMT)'])
weather_df['Time (EST)'] = pd.to_datetime(weather_df['Time (EST)'])

weather_df = weather_df.sort_values(by=['Team Abbreviation', 'Time (EST)'])

In [7]:
weather_df.head(10)

Unnamed: 0,Air Pressure (hPa),City,Dewpoint (°C),Field,Humidity (%),Precipitation (mm),Roof,Team Abbreviation,Temperature (°C),Time (EST),Time (GMT),Wind Direction (deg),Wind Speed (km/h)
0,1013.6,Glendale,2.2,Bermuda grass,73.0,,Retractable,ARI,6.7,2014-12-31 20:00:00,2015-01-01 00:00:00,290.0,11.2
1,1013.5,Glendale,2.7,Bermuda grass,79.0,,Retractable,ARI,6.1,2014-12-31 21:00:00,2015-01-01 01:00:00,240.0,5.4
2,1013.9,Glendale,2.2,Bermuda grass,73.0,,Retractable,ARI,6.7,2014-12-31 22:00:00,2015-01-01 02:00:00,290.0,14.8
3,1014.2,Glendale,1.6,Bermuda grass,73.0,,Retractable,ARI,6.1,2014-12-31 23:00:00,2015-01-01 03:00:00,270.0,22.3
4,1014.9,Glendale,0.0,Bermuda grass,70.0,,Retractable,ARI,5.0,2015-01-01 00:00:00,2015-01-01 04:00:00,270.0,14.8
5,1015.4,Glendale,-0.6,Bermuda grass,70.0,,Retractable,ARI,4.4,2015-01-01 01:00:00,2015-01-01 05:00:00,250.0,20.5
6,1015.8,Glendale,0.5,Bermuda grass,85.0,,Retractable,ARI,2.8,2015-01-01 02:00:00,2015-01-01 06:00:00,280.0,16.6
7,1015.7,Glendale,1.2,Bermuda grass,89.0,,Retractable,ARI,2.8,2015-01-01 03:00:00,2015-01-01 07:00:00,270.0,13.0
8,1016.0,Glendale,-0.7,Bermuda grass,78.0,0.0,Retractable,ARI,2.8,2015-01-01 04:00:00,2015-01-01 08:00:00,300.0,9.4
9,1016.9,Glendale,-0.7,Bermuda grass,78.0,0.0,Retractable,ARI,2.8,2015-01-01 05:00:00,2015-01-01 09:00:00,300.0,7.6


In [8]:
# Number of null values
weather_df.dtypes
weather_df[weather_df.isnull().any(axis=1)].shape

(222249, 13)

In [9]:
for col in weather_df.columns:
    print(col, (len(weather_df) - weather_df[col].count()))

Air Pressure (hPa) 21121
City 0
Dewpoint (°C) 1954
Field 0
Humidity (%) 1954
Precipitation (mm) 76962
Roof 0
Team Abbreviation 0
Temperature (°C) 1651
Time (EST) 0
Time (GMT) 0
Wind Direction (deg) 142244
Wind Speed (km/h) 46189


In [10]:
weather_df[['Temperature (°C)', 'Humidity (%)', 'Dewpoint (°C)']] = weather_df[['Temperature (°C)', 'Humidity (%)', 'Dewpoint (°C)']].interpolate(type='linear')

In [11]:
nfl_small2.dtypes

Unnamed: 0                            int64
index                                 int64
play_id                               int64
game_id                               int64
game_date                            object
time                                 object
quarter_seconds_remaining           float64
half_seconds_remaining              float64
game_seconds_remaining              float64
game_half                            object
quarter_end                           int64
qtr                                   int64
home_team                            object
away_team                            object
posteam                              object
posteam_type                         object
defteam                              object
side_of_field                        object
yardline_100                        float64
drive                                 int64
sp                                    int64
down                                float64
goal_to_go                      

In [12]:
nfl_small2.head()

Unnamed: 0.1,Unnamed: 0,index,play_id,game_id,game_date,time,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,qtr,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,drive,sp,down,goal_to_go,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,pass_length,air_yards,yards_after_catch,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,td_team,total_home_score,total_away_score,posteam_score,defteam_score,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,fumble,complete_pass,Start_Time,diff,Estimated_Time,Estimated Time (EST)
0,0,362094,44,2017090700,2017-09-07 00:00:00,15:00,900.0,1800.0,3600.0,Half1,0,1,NE,KC,NE,home,KC,KC,35.0,1,0,,0.0,KC 35,0,73,C.Santos kicks 64 yards from KC 35 to NE 1. D....,kickoff,0.0,0,0.0,0.0,,64.0,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-07 20:30:00,0 days 00:00:00.000000000,2017-09-07 20:30:00,2017-09-07 20:30:00
1,1,362095,68,2017090700,2017-09-07 00:00:00,14:55,895.0,1795.0,3595.0,Half1,0,1,NE,KC,NE,home,KC,NE,73.0,1,0,1.0,0.0,NE 27,10,0,(14:55) NE 12-Brady 18th season as Patriots QB...,pass,0.0,deep,27.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-07 20:30:00,0 days 00:00:15.000000000,2017-09-07 20:30:15,2017-09-07 20:30:15
2,2,362096,94,2017090700,2017-09-07 00:00:00,14:49,889.0,1789.0,3589.0,Half1,0,1,NE,KC,NE,home,KC,NE,73.0,1,0,2.0,0.0,NE 27,10,8,(14:49) T.Brady pass short right to R.Burkhead...,pass,8.0,short,1.0,7.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2017-09-07 20:30:00,0 days 00:00:33.000000000,2017-09-07 20:30:33,2017-09-07 20:30:33
3,3,362097,118,2017090700,2017-09-07 00:00:00,14:14,854.0,1754.0,3554.0,Half1,0,1,NE,KC,NE,home,KC,NE,65.0,1,0,3.0,0.0,NE 35,2,73,(14:14) (Shotgun) J.White left guard to NE 43 ...,run,8.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-07 20:30:00,0 days 00:02:18.000000000,2017-09-07 20:32:18,2017-09-07 20:32:18
4,4,362098,139,2017090700,2017-09-07 00:00:00,13:52,832.0,1732.0,3532.0,Half1,0,1,NE,KC,NE,home,KC,NE,57.0,1,0,1.0,0.0,NE 43,10,19,"(13:52) (No Huddle, Shotgun) J.White up the mi...",run,3.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-07 20:30:00,0 days 00:03:24.000000000,2017-09-07 20:33:24,2017-09-07 20:33:24


In [13]:
nfl_small2['Estimated_Hour'] = nfl_small2['Estimated Time (EST)'].dt.round('H')
nfl_weather = pd.merge(nfl_small2, weather_df, how = 'left', left_on = ['home_team', 'Estimated_Hour'], right_on = ['Team Abbreviation', 'Time (EST)'])
nfl_weather.sample(10)

Unnamed: 0.1,Unnamed: 0,index,play_id,game_id,game_date,time,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,qtr,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,drive,sp,down,goal_to_go,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,pass_length,air_yards,yards_after_catch,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,td_team,total_home_score,total_away_score,posteam_score,defteam_score,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,fumble,complete_pass,Start_Time,diff,Estimated_Time,Estimated Time (EST),Estimated_Hour,Air Pressure (hPa),City,Dewpoint (°C),Field,Humidity (%),Precipitation (mm),Roof,Team Abbreviation,Temperature (°C),Time (EST),Time (GMT),Wind Direction (deg),Wind Speed (km/h)
29110,29110,391996,3501,2017112606,2017-11-26 00:00:00,09:38,578.0,578.0,578.0,Half2,0,4,PHI,CHI,PHI,home,CHI,CHI,46.0,21,0,1.0,0.0,CHI 46,10,6,(9:38) (Shotgun) L.Blount up the middle to CHI...,run,6.0,0,0.0,0.0,,,,,,31,3,31.0,31.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-11-26 13:00:00,0 days 02:31:06.000000000,2017-11-26 15:31:06,2017-11-26 15:31:06,2017-11-26 16:00:00,1015.4,Philadelphia,-4.4,Desso GrassMaster,36.0,0.0,Open,PHI,10.0,2017-11-26 16:00:00,2017-11-26 20:00:00,340.0,20.5
44943,44943,408139,1397,2018090905,2018-09-09 00:00:00,09:01:00,541.0,541.0,2341.0,Half1,0,2,NE,HOU,NE,home,HOU,HOU,35.0,11,0,,0.0,HOU 35,0,12,K.Fairbairn kicks 66 yards from HOU 35 to NE -...,kickoff,0.0,0,0.0,0.0,,,,,,14,6,14.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018-09-09 16:25:00,0 days 01:02:57.000000000,2018-09-09 17:27:57,2018-09-09 17:27:57,2018-09-09 17:00:00,1025.8,Foxborough,6.2,FieldTurf CORE,52.0,0.0,Open,NE,16.1,2018-09-09 17:00:00,2018-09-09 21:00:00,40.0,20.5
81811,81811,446711,3264,2018121300,2018-12-13 00:00:00,12:09:00,729.0,729.0,729.0,Half2,0,4,KC,LAC,KC,home,LAC,LAC,27.0,14,0,1.0,0.0,LAC 27,10,73,(12:09) (Shotgun) Dam. Williams left end to LA...,no_play,0.0,0,0.0,0.0,,,,,,21,14,21.0,21.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018-12-13 16:25:00,0 days 02:23:33.000000000,2018-12-13 18:48:33,2018-12-13 18:48:33,2018-12-13 19:00:00,1016.2,Kansas City,0.0,Bermuda grass,82.0,0.0,Open,KC,2.8,2018-12-13 19:00:00,2018-12-13 23:00:00,340.0,24.1
66284,66284,430631,1739,2018110408,2018-11-04 00:00:00,02:00:00,120.0,120.0,1920.0,Half1,0,2,SEA,LAC,LAC,away,SEA,LAC,53.0,10,0,1.0,0.0,LAC 47,10,65,(2:00) (Shotgun) P.Rivers pass incomplete shor...,pass,0.0,short,-1.0,0.0,,,,,,7,12,12.0,12.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018-11-04 16:25:00,0 days 01:24:00.000000000,2018-11-04 17:49:00,2018-11-04 17:49:00,2018-11-04 18:00:00,1016.0,Seattle,7.8,FieldTurf Revolution 360,62.0,0.0,Open,SEA,15.0,2018-11-04 18:00:00,2018-11-04 22:00:00,200.0,20.5
75044,75044,439719,506,2018112505,2018-11-25 00:00:00,06:19:00,379.0,1279.0,3079.0,Half1,0,1,NYJ,NE,NYJ,home,NE,NE,44.0,3,0,1.0,0.0,NE 44,10,80,(6:19) (Shotgun) J.McCown scrambles up the mid...,run,3.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018-11-25 16:25:00,0 days 00:26:03.000000000,2018-11-25 16:51:03,2018-11-25 16:51:03,2018-11-25 17:00:00,1012.0,East Rutherford,2.9,UBU Sports Speed Series S5-M Synthetic Turf,51.0,0.0,Open,NYJ,12.8,2018-11-25 17:00:00,2018-11-25 21:00:00,320.0,16.6
78881,78881,443674,2733,2018120300,2018-12-03 00:00:00,04:09:00,249.0,1149.0,1149.0,Half2,0,3,PHI,WAS,PHI,home,WAS,PHI,67.0,15,0,2.0,0.0,PHI 33,10,85,(4:09) C.Wentz pass short left to C.Clement to...,pass,23.0,short,-5.0,28.0,,,,,,14,13,14.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2018-12-03 16:25:00,0 days 02:02:33.000000000,2018-12-03 18:27:33,2018-12-03 18:27:33,2018-12-03 18:00:00,1007.7,Philadelphia,-0.1,Desso GrassMaster,46.0,0.0,Open,PHI,11.1,2018-12-03 18:00:00,2018-12-03 22:00:00,300.0,20.5
51396,51396,415229,2760,2018092310,2018-09-23 00:00:00,06:58:00,418.0,1318.0,1318.0,Half2,0,3,LA,LAC,LA,home,LAC,LA,60.0,14,0,1.0,0.0,LA 40,10,77,(6:58) J.Goff pass short left to T.Gurley push...,pass,25.0,short,-6.0,31.0,,,,,,27,20,27.0,27.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2018-09-23 16:25:00,0 days 01:54:06.000000000,2018-09-23 18:19:06,2018-09-23 18:19:06,2018-09-23 18:00:00,1010.8,Los Angeles,15.5,Grass,68.0,0.0,Open,LA,21.7,2018-09-23 18:00:00,2018-09-23 22:00:00,250.0,22.3
50268,50268,414773,222,2018092304,2018-09-23 00:00:00,11:35:00,695.0,1595.0,3395.0,Half1,0,1,JAX,TEN,JAX,home,TEN,JAX,52.0,1,0,4.0,0.0,JAX 48,14,23,"(11:35) L.Cooke punts 52 yards to end zone, Ce...",punt,0.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018-09-23 16:25:00,0 days 00:10:15.000000000,2018-09-23 16:35:15,2018-09-23 16:35:15,2018-09-23 17:00:00,1015.3,Jacksonville,21.7,Bermuda grass,61.0,0.0,Open,JAX,30.0,2018-09-23 17:00:00,2018-09-23 21:00:00,80.0,16.6
13073,13073,375605,991,2017100900,2017-10-09 00:00:00,14:20,860.0,860.0,2660.0,Half1,0,2,CHI,MIN,CHI,home,MIN,MIN,49.0,7,0,1.0,0.0,MIN 49,17,-2,(14:20) (Shotgun) M.Trubisky pass incomplete d...,no_play,0.0,0,0.0,0.0,,,,,,2,0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-10-09 20:30:00,0 days 00:47:00.000000000,2017-10-09 21:17:00,2017-10-09 21:17:00,2017-10-09 21:00:00,1016.0,Chicago,15.1,Kentucky bluegrass,84.0,0.0,Open,CHI,17.8,2017-10-09 21:00:00,2017-10-10 01:00:00,20.0,20.5
58490,58490,422370,674,2018101405,2018-10-14 00:00:00,05:33:00,333.0,1233.0,3033.0,Half1,0,1,MIN,ARI,ARI,away,MIN,MIN,35.0,5,0,,0.0,MIN 35,0,6,D.Bailey kicks 65 yards from MIN 35 to end zon...,kickoff,0.0,0,0.0,0.0,,,,,,7,3,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018-10-14 16:25:00,0 days 00:28:21.000000000,2018-10-14 16:53:21,2018-10-14 16:53:21,2018-10-14 17:00:00,1020.6,Minneapolis,-1.1,UBU Speed Series S5-M Synthetic Turf,73.0,0.0,Fixed,MIN,3.3,2018-10-14 17:00:00,2018-10-14 21:00:00,300.0,16.6


In [14]:
nfl_weather.dtypes

Unnamed: 0                            int64
index                                 int64
play_id                               int64
game_id                               int64
game_date                            object
time                                 object
quarter_seconds_remaining           float64
half_seconds_remaining              float64
game_seconds_remaining              float64
game_half                            object
quarter_end                           int64
qtr                                   int64
home_team                            object
away_team                            object
posteam                              object
posteam_type                         object
defteam                              object
side_of_field                        object
yardline_100                        float64
drive                                 int64
sp                                    int64
down                                float64
goal_to_go                      

In [15]:
nfl_small2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84371 entries, 0 to 84370
Data columns (total 56 columns):
Unnamed: 0                   84371 non-null int64
index                        84371 non-null int64
play_id                      84371 non-null int64
game_id                      84371 non-null int64
game_date                    84371 non-null object
time                         84371 non-null object
quarter_seconds_remaining    84371 non-null float64
half_seconds_remaining       84371 non-null float64
game_seconds_remaining       84371 non-null float64
game_half                    84371 non-null object
quarter_end                  84371 non-null int64
qtr                          84371 non-null int64
home_team                    84371 non-null object
away_team                    84371 non-null object
posteam                      84371 non-null object
posteam_type                 84371 non-null object
defteam                      84371 non-null object
side_of_field              

In [16]:
nfl_weather.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 84371 entries, 0 to 84370
Data columns (total 69 columns):
Unnamed: 0                   84371 non-null int64
index                        84371 non-null int64
play_id                      84371 non-null int64
game_id                      84371 non-null int64
game_date                    84371 non-null object
time                         84371 non-null object
quarter_seconds_remaining    84371 non-null float64
half_seconds_remaining       84371 non-null float64
game_seconds_remaining       84371 non-null float64
game_half                    84371 non-null object
quarter_end                  84371 non-null int64
qtr                          84371 non-null int64
home_team                    84371 non-null object
away_team                    84371 non-null object
posteam                      84371 non-null object
posteam_type                 84371 non-null object
defteam                      84371 non-null object
side_of_field              

In [25]:
nfl_weather[nfl_weather[['Temperature (°C)', 'Team Abbreviation']].isnull().any(axis=1)][['home_team', 'Estimated_Hour']]

Unnamed: 0,home_team,Estimated_Hour
35752,DET,2017-12-16 16:00:00
35753,DET,2017-12-16 17:00:00
35754,DET,2017-12-16 17:00:00
35755,DET,2017-12-16 17:00:00
35756,DET,2017-12-16 17:00:00
35757,DET,2017-12-16 17:00:00
35758,DET,2017-12-16 17:00:00
35759,DET,2017-12-16 17:00:00
35760,DET,2017-12-16 17:00:00
35761,DET,2017-12-16 17:00:00


In [18]:
nfl_weather.to_csv('nfl_small_cleaned_plus_weather.csv')

In [26]:
# adding 2017-12-16 17:00:00 into the Detroit file and any other file missing time and temperature

# how to do:
# go through file and make sure that the time gap is only 1 hour, if not then add the time and keep going, finally sort and linear interpolate temperature