In [93]:
# importing packages
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from datetime import datetime
from datetime import time
import pickle
import os
import re

In [94]:
# Setting pandas to display max columns and rows
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [95]:
# importing Tyler's 'nfl_small_cleaned.csv' file
nfl_small2 = pd.read_csv('nfl_small_cleaned.csv')
nfl_small2['Estimated Time (EST)'] = pd.to_datetime(nfl_small2['Estimated_Time'])
nfl_small2.shape

(173055, 56)

In [96]:
nfl_small2.replace({'LAR':'LA'}, inplace=True)

<h2>Combining weather data with dataframe data</h2>

In [98]:
# loads weather data and appends dataframe together
def load_weather():

    # mapping out each stadium with their own weather dataframe in a dictionary
       
    weather_path = os.getcwd() + '\\historicalweatherdata'
    files = os.listdir(weather_path)
    df = pd.DataFrame()
    for name in files:
        if '.csv' in name and len(name) <= 7:
            
            df2 = pd.read_csv(weather_path + '\\' + name)
            df = pd.concat([df, df2], sort=True)
    
    df = df.drop(['time_local'], axis=1)
    df['Time (GMT)'] = pd.to_datetime(df['Time (GMT)'])
    df['Time (EST)'] = pd.to_datetime(df['Time (EST)'])
    
    df = df.sort_values(by=['Team Abbreviation', 'Time (EST)'])
    return df

In [99]:
# Run load_weather function
weather_df = load_weather()

weather_df.to_csv("all_historical_weather.csv", index=False)

In [100]:
weather_df = pd.read_csv("all_historical_weather.csv")
weather_df['Time (GMT)'] = pd.to_datetime(weather_df['Time (GMT)'])
weather_df['Time (EST)'] = pd.to_datetime(weather_df['Time (EST)'])

weather_df = weather_df.sort_values(by=['Team Abbreviation', 'Time (EST)'])

In [101]:
weather_df.head(10)

Unnamed: 0.1,Air Pressure (hPa),City,Dewpoint (°C),Field,Humidity (%),Precipitation (mm),Roof,Team Abbreviation,Temperature (°C),Time (EST),Time (GMT),Unnamed: 0,Wind Direction (deg),Wind Speed (km/h),added_time
0,1013.6,Glendale,2.2,Bermuda grass,73.0,,Retractable,ARI,6.7,2014-12-31 20:00:00,2015-01-01 00:00:00,,290.0,11.2,0
1,1013.5,Glendale,2.7,Bermuda grass,79.0,,Retractable,ARI,6.1,2014-12-31 21:00:00,2015-01-01 01:00:00,,240.0,5.4,0
2,1013.9,Glendale,2.2,Bermuda grass,73.0,,Retractable,ARI,6.7,2014-12-31 22:00:00,2015-01-01 02:00:00,,290.0,14.8,0
3,1014.2,Glendale,1.6,Bermuda grass,73.0,,Retractable,ARI,6.1,2014-12-31 23:00:00,2015-01-01 03:00:00,,270.0,22.3,0
4,1014.9,Glendale,0.0,Bermuda grass,70.0,,Retractable,ARI,5.0,2015-01-01 00:00:00,2015-01-01 04:00:00,,270.0,14.8,0
5,1015.4,Glendale,-0.6,Bermuda grass,70.0,,Retractable,ARI,4.4,2015-01-01 01:00:00,2015-01-01 05:00:00,,250.0,20.5,0
6,1015.8,Glendale,0.5,Bermuda grass,85.0,,Retractable,ARI,2.8,2015-01-01 02:00:00,2015-01-01 06:00:00,,280.0,16.6,0
7,1015.7,Glendale,1.2,Bermuda grass,89.0,,Retractable,ARI,2.8,2015-01-01 03:00:00,2015-01-01 07:00:00,,270.0,13.0,0
8,1016.0,Glendale,-0.7,Bermuda grass,78.0,0.0,Retractable,ARI,2.8,2015-01-01 04:00:00,2015-01-01 08:00:00,,300.0,9.4,0
9,1016.9,Glendale,-0.7,Bermuda grass,78.0,0.0,Retractable,ARI,2.8,2015-01-01 05:00:00,2015-01-01 09:00:00,,300.0,7.6,0


In [102]:
# Number of null values and percentage of nulls
print(weather_df[weather_df.isnull().any(axis=1)].shape[0], weather_df.shape[0], weather_df[weather_df.isnull().any(axis=1)].shape[0] / weather_df.shape[0])

1314899 1402596 0.9374752245122615


In [103]:
for col in weather_df.columns:
    print(col, (len(weather_df) - weather_df[col].count()))

Air Pressure (hPa) 0
City 0
Dewpoint (°C) 0
Field 0
Humidity (%) 0
Precipitation (mm) 86
Roof 0
Team Abbreviation 0
Temperature (°C) 0
Time (EST) 0
Time (GMT) 0
Unnamed: 0 1314897
Wind Direction (deg) 43822
Wind Speed (km/h) 43819
added_time 0


In [1]:
# interpolates temperature, humidity, and dewpoint to make sure there aren't any null values
weather_df[['Temperature (°C)', 'Humidity (%)', 'Dewpoint (°C)']] = weather_df[['Temperature (°C)', 'Humidity (%)', 'Dewpoint (°C)']].interpolate(type='linear')

NameError: name 'weather_df' is not defined

In [105]:
nfl_small2.dtypes

Unnamed: 0                            int64
index                                 int64
play_id                               int64
game_id                               int64
game_date                            object
time                                 object
quarter_seconds_remaining           float64
half_seconds_remaining              float64
game_seconds_remaining              float64
game_half                            object
quarter_end                           int64
qtr                                   int64
home_team                            object
away_team                            object
posteam                              object
posteam_type                         object
defteam                              object
side_of_field                        object
yardline_100                        float64
drive                                 int64
sp                                    int64
down                                float64
goal_to_go                      

In [106]:
nfl_small2.head()

Unnamed: 0.1,Unnamed: 0,index,play_id,game_id,game_date,time,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,qtr,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,drive,sp,down,goal_to_go,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,pass_length,air_yards,yards_after_catch,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,td_team,total_home_score,total_away_score,posteam_score,defteam_score,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,fumble,complete_pass,Start_Time,year,diff,Estimated_Time,Estimated Time (EST)
0,0,270409,36,2015091000,2015-09-10 00:00:00,15:00,900.0,1800.0,3600.0,Half1,0,1,NE,PIT,PIT,away,NE,NE,35.0,1,0,,0.0,NE 35,0,0,S.Gostkowski kicks 65 yards from NE 35 to end ...,kickoff,0.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015-09-10 20:40:00,2015.0,0 days 00:00:00.000000000,2015-09-10 20:40:00,2015-09-10 20:40:00
1,1,270410,51,2015091000,2015-09-10 00:00:00,15:00,900.0,1800.0,3600.0,Half1,0,1,NE,PIT,PIT,away,NE,PIT,80.0,1,0,1.0,0.0,PIT 20,10,18,(15:00) De.Williams right tackle to PIT 38 for...,run,18.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015-09-10 20:40:00,,0 days 00:00:00.000000000,2015-09-10 20:40:00,2015-09-10 20:40:00
2,2,270411,72,2015091000,2015-09-10 00:00:00,14:21,861.0,1761.0,3561.0,Half1,0,1,NE,PIT,PIT,away,NE,PIT,62.0,1,0,1.0,0.0,PIT 38,10,31,(14:21) B.Roethlisberger pass short right to A...,pass,9.0,short,-4.0,13.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2015-09-10 20:40:00,,0 days 00:01:57.000000000,2015-09-10 20:41:57,2015-09-10 20:41:57
3,3,270412,101,2015091000,2015-09-10 00:00:00,14:04,844.0,1744.0,3544.0,Half1,0,1,NE,PIT,PIT,away,NE,PIT,53.0,1,0,2.0,0.0,PIT 47,1,31,(14:04) De.Williams right guard to NE 49 for 4...,run,4.0,0,0.0,0.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015-09-10 20:40:00,,0 days 00:02:48.000000000,2015-09-10 20:42:48,2015-09-10 20:42:48
4,4,270413,122,2015091000,2015-09-10 00:00:00,13:26,806.0,1706.0,3506.0,Half1,0,1,NE,PIT,PIT,away,NE,NE,49.0,1,0,1.0,0.0,NE 49,10,45,(13:26) B.Roethlisberger pass short right to H...,pass,14.0,short,9.0,5.0,,,,,,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2015-09-10 20:40:00,,0 days 00:04:42.000000000,2015-09-10 20:44:42,2015-09-10 20:44:42


In [107]:
# merging weather data and nfl data
nfl_small2['Estimated_Hour'] = nfl_small2['Estimated Time (EST)'].dt.round('H')
nfl_weather = pd.merge(nfl_small2, weather_df, how = 'left', left_on = ['home_team', 'Estimated_Hour'], right_on = ['Team Abbreviation', 'Time (EST)'])
nfl_weather.sample(10)

Unnamed: 0,Unnamed: 0_x,index,play_id,game_id,game_date,time,quarter_seconds_remaining,half_seconds_remaining,game_seconds_remaining,game_half,quarter_end,qtr,home_team,away_team,posteam,posteam_type,defteam,side_of_field,yardline_100,drive,sp,down,goal_to_go,yrdln,ydstogo,ydsnet,desc,play_type,yards_gained,pass_length,air_yards,yards_after_catch,field_goal_result,kick_distance,extra_point_result,two_point_conv_result,td_team,total_home_score,total_away_score,posteam_score,defteam_score,sack,touchdown,pass_touchdown,rush_touchdown,return_touchdown,extra_point_attempt,two_point_attempt,field_goal_attempt,fumble,complete_pass,Start_Time,year,diff,Estimated_Time,Estimated Time (EST),Estimated_Hour,Air Pressure (hPa),City,Dewpoint (°C),Field,Humidity (%),Precipitation (mm),Roof,Team Abbreviation,Temperature (°C),Time (EST),Time (GMT),Unnamed: 0_y,Wind Direction (deg),Wind Speed (km/h),added_time
95642,95597,368377,2994,2017092408,2017-09-24 00:00:00,00:04,4.0,904.0,904.0,Half2,0,3,NYJ,MIA,MIA,away,NYJ,NYJ,49.0,18,0,2.0,0.0,NYJ 49,6,54,(:04) (Shotgun) J.Cutler pass short right to J...,pass,8.0,short,8.0,0.0,,,,,,20,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2017-09-24 13:00:00,,0 days 02:14:48.000000000,2017-09-24 15:14:48,2017-09-24 15:14:48,2017-09-24 15:00:00,1016.8,East Rutherford,16.6,UBU Sports Speed Series S5-M Synthetic Turf,38.0,0.0,Open,NYJ,32.8,2017-09-24 15:00:00,2017-09-24 19:00:00,,80.0,13.0,0
84229,84184,357275,3417,2016122404,2016-12-24 00:00:00,06:52,412.0,412.0,412.0,Half2,0,4,GB,MIN,MIN,away,GB,GB,49.0,23,0,2.0,0.0,GB 49,9,31,(6:52) (Shotgun) S.Bradford pass short middle ...,pass,5.0,short,4.0,1.0,,,,,,38,13,13.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2016-12-24 13:00:00,,0 days 02:39:24.000000000,2016-12-24 15:39:24,2016-12-24 15:39:24,2016-12-24 16:00:00,1024.2,Green Bay,-1.0,Desso GrassMaster,82.0,0.0,Open,GB,1.7,2016-12-24 16:00:00,2016-12-24 20:00:00,,304.444444,0.0,0
107397,107352,382273,3477,2017102901,2017-10-29 00:00:00,14:57,897.0,897.0,897.0,Half2,0,4,BUF,OAK,OAK,away,BUF,OAK,75.0,17,0,1.0,0.0,OAK 25,10,14,(14:57) (Shotgun) J.Richard left tackle to OAK...,run,14.0,0,0.0,0.0,,,,,,26,7,7.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-10-29 13:00:00,,0 days 02:15:09.000000000,2017-10-29 15:15:09,2017-10-29 15:15:09,2017-10-29 15:00:00,1005.3,Orchard Park,5.7,A-Turf Titan 50,97.0,1.0,Open,BUF,6.1,2017-10-29 15:00:00,2017-10-29 19:00:00,,340.0,14.8,0
29804,29804,301357,3730,2015112906,2015-11-29 00:00:00,08:31,511.0,511.0,511.0,Half2,0,4,JAX,LAC,LAC,away,JAX,JAX,30.0,20,0,1.0,0.0,JAC 30,10,52,(8:31) (Shotgun) D.Woodhead up the middle to J...,run,2.0,0,0.0,0.0,,,,,,19,24,24.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015-11-29 13:03:00,,0 days 02:34:27.000000000,2015-11-29 15:37:27,2015-11-29 15:37:27,2015-11-29 16:00:00,1019.1,Jacksonville,12.2,Bermuda grass,48.0,0.0,Open,JAX,23.9,2015-11-29 16:00:00,2015-11-29 20:00:00,,60.0,11.2,0
103012,102967,376697,4917,2017101504,2017-10-15 00:00:00,04:15,255.0,255.0,255.0,Half2,0,4,NO,DET,DET,away,NO,DET,82.0,31,0,1.0,0.0,DET 18,10,0,(4:15) (Shotgun) M.Stafford pass incomplete sh...,pass,0.0,short,4.0,0.0,,,,,,49,36,36.0,36.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-10-15 13:00:00,,0 days 02:47:15.000000000,2017-10-15 15:47:15,2017-10-15 15:47:15,2017-10-15 16:00:00,1016.3,New Orleans,23.9,FieldTurf Revolution 360,77.0,0.0,Fixed,NO,28.3,2017-10-15 16:00:00,2017-10-15 20:00:00,,110.0,7.6,0
151176,151131,426653,3136,2018102500,2018-10-25 00:00:00,00:36:00,36.0,936.0,936.0,Half2,0,3,HOU,MIA,HOU,home,MIA,MIA,35.0,19,0,,0.0,MIA 35,0,49,J.Sanders kicks onside 14 yards from MIA 35 to...,kickoff,0.0,0,0.0,0.0,,14.0,,,,28,20,28.0,28.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2018-10-25 16:25:00,,0 days 02:13:12.000000000,2018-10-25 18:38:12,2018-10-25 18:38:12,2018-10-25 19:00:00,1011.3,Houston,14.4,Hellas Matrix Turf,75.0,0.0,Retractable,HOU,18.9,2018-10-25 19:00:00,2018-10-25 23:00:00,,330.0,16.6,0
3577,3577,274849,2984,2015092003,2015-09-20 00:00:00,01:36,96.0,996.0,996.0,Half2,0,3,CLE,TEN,CLE,home,TEN,TEN,35.0,19,0,,0.0,TEN 35,0,0,R.Succop kicks 71 yards from TEN 35 to CLE -6....,kickoff,0.0,0,0.0,0.0,,,,,,20,7,20.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2015-09-20 13:02:00,,0 days 02:10:12.000000000,2015-09-20 15:12:12,2015-09-20 15:12:12,2015-09-20 15:00:00,1021.8,Cleveland,10.7,Kentucky bluegrass,55.0,0.0,Open,CLE,20.0,2015-09-20 15:00:00,2015-09-20 19:00:00,,30.0,18.4,0
118383,118338,392750,4124,2017112609,2017-11-26 00:00:00,03:52,232.0,232.0,232.0,Half2,0,4,ARI,JAX,ARI,home,JAX,ARI,59.0,24,0,1.0,0.0,ARI 41,10,25,(3:52) B.Gabbert pass incomplete deep right to...,pass,0.0,deep,34.0,0.0,,,,,,24,23,24.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-11-26 16:25:00,,0 days 02:48:24.000000000,2017-11-26 19:13:24,2017-11-26 19:13:24,2017-11-26 19:00:00,1012.2,Glendale,-2.8,Bermuda grass,11.0,0.0,Retractable,ARI,31.1,2017-11-26 19:00:00,2017-11-26 23:00:00,,250.0,0.0,0
124958,124913,400102,3844,2017121700,2017-12-17 00:00:00,06:46,406.0,406.0,406.0,Half2,0,4,BUF,MIA,BUF,home,MIA,BUF,54.0,20,0,3.0,0.0,BUF 46,13,28,(6:46) (Shotgun) T.Taylor scrambles left end t...,run,10.0,0,0.0,0.0,,,,,,24,13,24.0,24.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2017-12-17 13:00:00,,0 days 02:39:42.000000000,2017-12-17 15:39:42,2017-12-17 15:39:42,2017-12-17 16:00:00,1023.0,Orchard Park,-9.4,A-Turf Titan 50,71.0,0.0,Open,BUF,-5.0,2017-12-17 16:00:00,2017-12-17 20:00:00,,40.0,11.2,0
1897,1897,272371,488,2015091310,2015-09-13 00:00:00,07:08,428.0,1328.0,3128.0,Half1,0,1,OAK,CIN,OAK,home,CIN,OAK,80.0,3,0,3.0,0.0,OAK 20,10,10,(7:08) (Shotgun) D.Carr pass short right to M....,pass,10.0,short,10.0,0.0,,,,,,0,7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2015-09-13 16:25:00,,0 days 00:23:36.000000000,2015-09-13 16:48:36,2015-09-13 16:48:36,2015-09-13 17:00:00,1008.9,Oakland,14.0,Grass,54.0,0.0,Open,OAK,23.9,2015-09-13 17:00:00,2015-09-13 21:00:00,,290.0,14.8,0


In [108]:
nfl_weather.dtypes

Unnamed: 0_x                          int64
index                                 int64
play_id                               int64
game_id                               int64
game_date                            object
time                                 object
quarter_seconds_remaining           float64
half_seconds_remaining              float64
game_seconds_remaining              float64
game_half                            object
quarter_end                           int64
qtr                                   int64
home_team                            object
away_team                            object
posteam                              object
posteam_type                         object
defteam                              object
side_of_field                        object
yardline_100                        float64
drive                                 int64
sp                                    int64
down                                float64
goal_to_go                      

In [109]:
nfl_small2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173055 entries, 0 to 173054
Data columns (total 57 columns):
 #   Column                     Non-Null Count   Dtype         
---  ------                     --------------   -----         
 0   Unnamed: 0                 173055 non-null  int64         
 1   index                      173055 non-null  int64         
 2   play_id                    173055 non-null  int64         
 3   game_id                    173055 non-null  int64         
 4   game_date                  173055 non-null  object        
 5   time                       173055 non-null  object        
 6   quarter_seconds_remaining  173055 non-null  float64       
 7   half_seconds_remaining     173055 non-null  float64       
 8   game_seconds_remaining     173055 non-null  float64       
 9   game_half                  173055 non-null  object        
 10  quarter_end                173055 non-null  int64         
 11  qtr                        173055 non-null  int64   

In [110]:
nfl_weather.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 173100 entries, 0 to 173099
Data columns (total 72 columns):
 #   Column                     Non-Null Count   Dtype         
---  ------                     --------------   -----         
 0   Unnamed: 0_x               173100 non-null  int64         
 1   index                      173100 non-null  int64         
 2   play_id                    173100 non-null  int64         
 3   game_id                    173100 non-null  int64         
 4   game_date                  173100 non-null  object        
 5   time                       173100 non-null  object        
 6   quarter_seconds_remaining  173100 non-null  float64       
 7   half_seconds_remaining     173100 non-null  float64       
 8   game_seconds_remaining     173100 non-null  float64       
 9   game_half                  173100 non-null  object        
 10  quarter_end                173100 non-null  int64         
 11  qtr                        173100 non-null  int64   

In [111]:
# checking for nulls
nfl_weather[nfl_weather[['Temperature (°C)']].isnull().any(axis=1)][['home_team', 'Estimated_Hour']]

Unnamed: 0,home_team,Estimated_Hour


In [113]:
# saving weather plus nfl data
nfl_weather.to_csv('nfl_small_cleaned_plus_weather.csv')