In [1]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import time
import pickle

In [2]:
# Setting pandas to display columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [3]:
nfl_small2 = pd.read_csv('nfl_small_cleaned.csv')
nfl_small2['Estimated Time (EST)'] = pd.to_datetime(nfl_small2['Estimated_Time'])
nfl_small2.shape

(84371, 55)

<h2>Combining weather data with dataframe data</h2>

In [4]:
import os
import re
def load_weather():

    # mapping out each stadium with their own weather dataframe in a dictionary
       
    weather_path = os.getcwd() + '\\historicalweatherdata'
    files = os.listdir(weather_path)
    df = pd.DataFrame()
    for name in files:
        if '.csv' in name and len(name) <= 7:
            
            df2 = pd.read_csv(weather_path + '\\' + name)
            df = pd.concat([df, df2], sort=True)
    
    df = df.drop(['time_local'], axis=1)
    df['Time (GMT)'] = pd.to_datetime(df['Time (GMT)'])
    df['Time (EST)'] = pd.to_datetime(df['Time (EST)'])
    
    df = df.sort_values(by=['Team Abbreviation', 'Time (EST)'])
    return df

In [5]:
# Run historical_weather.ipynb
weather_df = load_weather()

weather_df.to_csv("all_historical_weather.csv", index=False)

In [8]:
weather_df = pd.read_csv("all_historical_weather.csv")
weather_df['Time (GMT)'] = pd.to_datetime(weather_df['Time (GMT)'])
weather_df['Time (EST)'] = pd.to_datetime(weather_df['Time (EST)'])

weather_df = weather_df.sort_values(by=['Team Abbreviation', 'Time (EST)'])

In [9]:
weather_df.head(10)

Unnamed: 0,Air Pressure (hPa),City,Dewpoint (°C),Field,Humidity (%),Precipitation (mm),Roof,Team Abbreviation,Temperature (°C),Time (EST),Time (GMT),Wind Direction (deg),Wind Speed (km/h),added_time
0,1013.6,Glendale,2.2,Bermuda grass,73.0,,Retractable,ARI,6.7,2014-12-31 20:00:00,2015-01-01 00:00:00,290.0,11.2,0
1,1013.5,Glendale,2.7,Bermuda grass,79.0,,Retractable,ARI,6.1,2014-12-31 21:00:00,2015-01-01 01:00:00,240.0,5.4,0
2,1013.9,Glendale,2.2,Bermuda grass,73.0,,Retractable,ARI,6.7,2014-12-31 22:00:00,2015-01-01 02:00:00,290.0,14.8,0
3,1014.2,Glendale,1.6,Bermuda grass,73.0,,Retractable,ARI,6.1,2014-12-31 23:00:00,2015-01-01 03:00:00,270.0,22.3,0
4,1014.9,Glendale,0.0,Bermuda grass,70.0,,Retractable,ARI,5.0,2015-01-01 00:00:00,2015-01-01 04:00:00,270.0,14.8,0
5,1015.4,Glendale,-0.6,Bermuda grass,70.0,,Retractable,ARI,4.4,2015-01-01 01:00:00,2015-01-01 05:00:00,250.0,20.5,0
6,1015.8,Glendale,0.5,Bermuda grass,85.0,,Retractable,ARI,2.8,2015-01-01 02:00:00,2015-01-01 06:00:00,280.0,16.6,0
7,1015.7,Glendale,1.2,Bermuda grass,89.0,,Retractable,ARI,2.8,2015-01-01 03:00:00,2015-01-01 07:00:00,270.0,13.0,0
8,1016.0,Glendale,-0.7,Bermuda grass,78.0,0.0,Retractable,ARI,2.8,2015-01-01 04:00:00,2015-01-01 08:00:00,300.0,9.4,0
9,1016.9,Glendale,-0.7,Bermuda grass,78.0,0.0,Retractable,ARI,2.8,2015-01-01 05:00:00,2015-01-01 09:00:00,300.0,7.6,0


In [10]:
# Number of null values and percentage of nulls
print(weather_df[weather_df.isnull().any(axis=1)].shape[0], weather_df.shape[0], weather_df[weather_df.isnull().any(axis=1)].shape[0] / weather_df.shape[0])

43857 1402545 0.03126958493310375


In [11]:
for col in weather_df.columns:
    print(col, (len(weather_df) - weather_df[col].count()))

Air Pressure (hPa) 0
City 0
Dewpoint (°C) 0
Field 0
Humidity (%) 0
Precipitation (mm) 85
Roof 0
Team Abbreviation 0
Temperature (°C) 0
Time (EST) 0
Time (GMT) 0
Wind Direction (deg) 43823
Wind Speed (km/h) 43819
added_time 0


In [12]:
weather_df[['Temperature (°C)', 'Humidity (%)', 'Dewpoint (°C)']] = weather_df[['Temperature (°C)', 'Humidity (%)', 'Dewpoint (°C)']].interpolate(type='linear')

In [13]:
nfl_small2.dtypes

Unnamed: 0                            int64
index                                 int64
play_id                               int64
game_id                               int64
game_date                            object
time                                 object
quarter_seconds_remaining           float64
half_seconds_remaining              float64
game_seconds_remaining              float64
game_half                            object
quarter_end                           int64
qtr                                   int64
home_team                            object
away_team                            object
posteam                              object
posteam_type                         object
defteam                              object
side_of_field                        object
yardline_100                        float64
drive                                 int64
sp                                    int64
down                                float64
goal_to_go                      

In [14]:
nfl_small2.head()

Unnamed: 0.1,Unnamed: 0,index,play_id,game_id,game_date,time,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,qtr,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,drive,sp,down,goal_to_go,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,pass_length,air_yards,yards_after_catch,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,td_team,total_home_score,total_away_score,posteam_score,defteam_score,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,fumble,complete_pass,Start_Time,diff,Estimated_Time,Estimated Time (EST)
0,0,362094,44,2017090700,2017-09-07 00:00:00,15:00,900.0,1800.0,3600.0,Half1,0,1,NE,KC,NE,home,KC,KC,35.0,1,0,,0.0,KC 35,0,73,C.Santos kicks 64 yards from KC 35 to NE 1. D....,kickoff,0.0,0,0.0,0.0,,64.0,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-07 20:30:00,0 days 00:00:00.000000000,2017-09-07 20:30:00,2017-09-07 20:30:00
1,1,362095,68,2017090700,2017-09-07 00:00:00,14:55,895.0,1795.0,3595.0,Half1,0,1,NE,KC,NE,home,KC,NE,73.0,1,0,1.0,0.0,NE 27,10,0,(14:55) NE 12-Brady 18th season as Patriots QB...,pass,0.0,deep,27.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-07 20:30:00,0 days 00:00:15.000000000,2017-09-07 20:30:15,2017-09-07 20:30:15
2,2,362096,94,2017090700,2017-09-07 00:00:00,14:49,889.0,1789.0,3589.0,Half1,0,1,NE,KC,NE,home,KC,NE,73.0,1,0,2.0,0.0,NE 27,10,8,(14:49) T.Brady pass short right to R.Burkhead...,pass,8.0,short,1.0,7.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2017-09-07 20:30:00,0 days 00:00:33.000000000,2017-09-07 20:30:33,2017-09-07 20:30:33
3,3,362097,118,2017090700,2017-09-07 00:00:00,14:14,854.0,1754.0,3554.0,Half1,0,1,NE,KC,NE,home,KC,NE,65.0,1,0,3.0,0.0,NE 35,2,73,(14:14) (Shotgun) J.White left guard to NE 43 ...,run,8.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-07 20:30:00,0 days 00:02:18.000000000,2017-09-07 20:32:18,2017-09-07 20:32:18
4,4,362098,139,2017090700,2017-09-07 00:00:00,13:52,832.0,1732.0,3532.0,Half1,0,1,NE,KC,NE,home,KC,NE,57.0,1,0,1.0,0.0,NE 43,10,19,"(13:52) (No Huddle, Shotgun) J.White up the mi...",run,3.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-07 20:30:00,0 days 00:03:24.000000000,2017-09-07 20:33:24,2017-09-07 20:33:24


In [15]:
nfl_small2['Estimated_Hour'] = nfl_small2['Estimated Time (EST)'].dt.round('H')
nfl_weather = pd.merge(nfl_small2, weather_df, how = 'left', left_on = ['home_team', 'Estimated_Hour'], right_on = ['Team Abbreviation', 'Time (EST)'])
nfl_weather.sample(10)

Unnamed: 0.1,Unnamed: 0,index,play_id,game_id,game_date,time,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,qtr,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,drive,sp,down,goal_to_go,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,pass_length,air_yards,yards_after_catch,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,td_team,total_home_score,total_away_score,posteam_score,defteam_score,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,fumble,complete_pass,Start_Time,diff,Estimated_Time,Estimated Time (EST),Estimated_Hour,Air Pressure (hPa),City,Dewpoint (°C),Field,Humidity (%),Precipitation (mm),Roof,Team Abbreviation,Temperature (°C),Time (EST),Time (GMT),Wind Direction (deg),Wind Speed (km/h),added_time
25797,25797,388769,1838,2017111903,2017-11-19 00:00:00,00:55,55.0,55.0,1855.0,Half1,0,2,HOU,ARI,HOU,home,ARI,HOU,58.0,12,0,1.0,0.0,HOU 42,10,25,(:55) (Shotgun) T.Savage pass short right to C...,pass,8.0,short,6.0,2.0,,,,,,7,14,7.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2017-11-19 13:00:00,0 days 01:27:15.000000000,2017-11-19 14:27:15,2017-11-19 14:27:15,2017-11-19 14:00:00,1026.2,Houston,0.8,Hellas Matrix Turf,33.0,0.0,Retractable,HOU,17.2,2017-11-19 14:00:00,2017-11-19 18:00:00,30.0,22.3,0
10104,10104,372318,53,2017100111,2017-10-01 00:00:00,15:00,900.0,1800.0,3600.0,Half1,0,1,TB,NYG,NYG,away,TB,NYG,75.0,1,0,1.0,0.0,NYG 25,10,-1,(15:00) P.Perkins up the middle to NYG 24 for ...,run,-1.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-10-01 16:05:00,0 days 00:00:00.000000000,2017-10-01 16:05:00,2017-10-01 16:05:00,2017-10-01 16:00:00,1012.7,Tampa,21.0,Bermuda grass,50.0,7.85,Open,TB,32.8,2017-10-01 16:00:00,2017-10-01 20:00:00,70.0,7.6,0
6116,6116,368982,984,2017092404,2017-09-24 00:00:00,12:51,771.0,771.0,2571.0,Half1,0,2,DET,ATL,DET,home,ATL,ATL,35.0,4,0,,0.0,ATL 35,0,0,M.Bosher kicks 65 yards from ATL 35 to end zon...,kickoff,0.0,0,0.0,0.0,,,,,,0,10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-24 13:00:00,0 days 00:51:27.000000000,2017-09-24 13:51:27,2017-09-24 13:51:27,2017-09-24 14:00:00,1025.687556,Detroit,17.0,FieldTurf Classic HD,43.0,0.0,Fixed,DET,31.0,2017-09-24 14:00:00,2017-09-24 17:53:00,120.0,13.0,0
21082,21082,383724,3058,2017110502,2017-11-05 00:00:00,11:01,661.0,661.0,661.0,Half2,0,4,JAX,CIN,JAX,home,CIN,JAX,55.0,15,0,1.0,0.0,JAX 45,10,13,(11:01) B.Bortles pass incomplete deep right t...,pass,0.0,deep,30.0,0.0,,,,,,22,7,22.0,22.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-11-05 13:00:00,0 days 02:26:57.000000000,2017-11-05 15:26:57,2017-11-05 15:26:57,2017-11-05 15:00:00,1017.9,Jacksonville,21.1,Bermuda grass,74.0,0.0,Open,JAX,26.1,2017-11-05 15:00:00,2017-11-05 19:00:00,50.0,24.1,0
21403,21403,384055,3156,2017110504,2017-11-05 00:00:00,00:41,41.0,941.0,941.0,Half2,0,3,NYG,LA,LA,away,NYG,LA,78.0,20,0,2.0,0.0,LA 22,8,2,(:41) (No Huddle) M.Brown right guard to LA 22...,run,0.0,0,0.0,0.0,,,,,,10,48,48.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-11-05 13:00:00,0 days 02:12:57.000000000,2017-11-05 15:12:57,2017-11-05 15:12:57,2017-11-05 15:00:00,1021.7,East Rutherford,12.8,UBU Sports Speed Series S5-M Synthetic Turf,93.0,0.4,Open,NYG,13.9,2017-11-05 15:00:00,2017-11-05 19:00:00,20.0,13.0,0
19957,19957,382729,1052,2017102909,2017-10-29 00:00:00,12:26,746.0,746.0,2546.0,Half1,0,2,WAS,DAL,WAS,home,DAL,DAL,47.0,6,0,1.0,0.0,DAL 47,10,38,(12:26) K.Cousins pass short right to J.Reed t...,pass,5.0,short,-1.0,6.0,,,,,,10,7,10.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2017-10-29 16:25:00,0 days 00:52:42.000000000,2017-10-29 17:17:42,2017-10-29 17:17:42,2017-10-29 17:00:00,993.0,Landover,14.5,Bermuda grass,93.0,1.3,Open,WAS,15.6,2017-10-29 17:00:00,2017-10-29 21:00:00,360.0,20.5,0
55159,55159,420072,242,2018100701,2018-10-07 00:00:00,11:44:00,704.0,1604.0,3404.0,Half1,0,1,CAR,NYG,CAR,home,NYG,CAR,95.0,2,0,2.0,0.0,CAR 5,11,0,"(11:44) (Shotgun) PENALTY on CAR, Delay of Gam...",no_play,0.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018-10-07 16:25:00,0 days 00:09:48.000000000,2018-10-07 16:34:48,2018-10-07 16:34:48,2018-10-07 17:00:00,1021.2,Charlotte,21.1,Bermuda grass,59.0,0.0,Open,CAR,30.0,2018-10-07 17:00:00,2018-10-07 21:00:00,30.0,13.0,0
76380,76380,441087,1257,2018120200,2018-12-02 00:00:00,11:33:00,693.0,693.0,2493.0,Half1,0,2,ATL,BAL,BAL,away,ATL,ATL,20.0,7,0,,0.0,ATL 20,0,15,M.Bosher kicks 67 yards from ATL 20 to BAL 13....,kickoff,0.0,0,0.0,0.0,,67.0,,,,9,7,7.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018-12-02 16:25:00,0 days 00:55:21.000000000,2018-12-02 17:20:21,2018-12-02 17:20:21,2018-12-02 17:00:00,1007.4,Atlanta,6.8,FieldTurf Revolution 360,37.0,0.0,Retractable,ATL,22.2,2018-12-02 17:00:00,2018-12-02 21:00:00,220.0,20.5,0
3640,3640,365515,2675,2017091705,2017-09-17 00:00:00,09:36,576.0,1476.0,1476.0,Half2,0,3,NO,NE,NE,away,NO,NE,73.0,13,0,3.0,0.0,NE 27,5,33,(9:36) T.Brady pass short left intended for C....,no_play,0.0,0,0.0,0.0,,,,,,13,30,30.0,30.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-09-17 13:00:00,0 days 01:46:12.000000000,2017-09-17 14:46:12,2017-09-17 14:46:12,2017-09-17 15:00:00,1017.6,New Orleans,23.3,FieldTurf Revolution 360,61.0,0.0,Fixed,NO,31.7,2017-09-17 15:00:00,2017-09-17 19:00:00,70.0,13.0,0
76522,76522,442127,558,2018120201,2018-12-02 00:00:00,05:03:00,303.0,1203.0,3003.0,Half1,0,1,CIN,DEN,CIN,home,DEN,CIN,52.0,2,0,3.0,0.0,CIN 48,16,45,(5:03) (Shotgun) J.Driskel pass short middle t...,pass,7.0,short,2.0,5.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2018-12-02 16:25:00,0 days 00:29:51.000000000,2018-12-02 16:54:51,2018-12-02 16:54:51,2018-12-02 17:00:00,997.4,Cincinnati,2.7,UBU Speed Series S5-M Synthetic Turf,39.0,0.0,Open,CIN,16.7,2018-12-02 17:00:00,2018-12-02 21:00:00,220.0,44.6,0


In [16]:
nfl_weather.dtypes

Unnamed: 0                            int64
index                                 int64
play_id                               int64
game_id                               int64
game_date                            object
time                                 object
quarter_seconds_remaining           float64
half_seconds_remaining              float64
game_seconds_remaining              float64
game_half                            object
quarter_end                           int64
qtr                                   int64
home_team                            object
away_team                            object
posteam                              object
posteam_type                         object
defteam                              object
side_of_field                        object
yardline_100                        float64
drive                                 int64
sp                                    int64
down                                float64
goal_to_go                      

In [17]:
nfl_small2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84371 entries, 0 to 84370
Data columns (total 56 columns):
Unnamed: 0                   84371 non-null int64
index                        84371 non-null int64
play_id                      84371 non-null int64
game_id                      84371 non-null int64
game_date                    84371 non-null object
time                         84371 non-null object
quarter_seconds_remaining    84371 non-null float64
half_seconds_remaining       84371 non-null float64
game_seconds_remaining       84371 non-null float64
game_half                    84371 non-null object
quarter_end                  84371 non-null int64
qtr                          84371 non-null int64
home_team                    84371 non-null object
away_team                    84371 non-null object
posteam                      84371 non-null object
posteam_type                 84371 non-null object
defteam                      84371 non-null object
side_of_field              

In [18]:
nfl_weather.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 84371 entries, 0 to 84370
Data columns (total 70 columns):
Unnamed: 0                   84371 non-null int64
index                        84371 non-null int64
play_id                      84371 non-null int64
game_id                      84371 non-null int64
game_date                    84371 non-null object
time                         84371 non-null object
quarter_seconds_remaining    84371 non-null float64
half_seconds_remaining       84371 non-null float64
game_seconds_remaining       84371 non-null float64
game_half                    84371 non-null object
quarter_end                  84371 non-null int64
qtr                          84371 non-null int64
home_team                    84371 non-null object
away_team                    84371 non-null object
posteam                      84371 non-null object
posteam_type                 84371 non-null object
defteam                      84371 non-null object
side_of_field              

In [19]:
nfl_weather[nfl_weather[['Temperature (°C)']].isnull().any(axis=1)][['home_team', 'Estimated_Hour']]

Unnamed: 0,home_team,Estimated_Hour


In [20]:
nfl_weather.to_csv('nfl_small_cleaned_plus_weather.csv')