In [None]:
# imports
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.formula.api as smf
import pylab

# this allows plots to appear directly in the notebook
%matplotlib inline
# increase size of plots
pylab.rcParams['figure.figsize'] = (15, 10)
# allow reading of all of the columns in a dataset
pd.options.display.max_columns = 50
pd.options.display.max_rows = 120

In [61]:
weatherDF = pd.read_csv("../data/fanduel_data/nflsavant_data_sets/weather_20131231.csv")
weatherDF.head()

Unnamed: 0,id,home_team,home_score,away_team,away_score,temperature,wind_chill,humidity,wind_mph,weather,date
0,196009230ram,Los Angeles Rams,21,St. Louis Cardinals,43,66,,78%,8,66 degrees- relative humidity 78%- wind 8 mph,9/23/1960
1,196009240dal,Dallas Cowboys,28,Pittsburgh Steelers,35,72,,80%,16,72 degrees- relative humidity 80%- wind 16 mph,9/24/1960
2,196009250gnb,Green Bay Packers,14,Chicago Bears,17,60,,76%,13,60 degrees- relative humidity 76%- wind 13 mph,9/25/1960
3,196009250sfo,San Francisco 49ers,19,New York Giants,21,72,,44%,10,72 degrees- relative humidity 44%- wind 10 mph,9/25/1960
4,196009250clt,Baltimore Colts,20,Washington Redskins,0,62,,80%,9,62 degrees- relative humidity 80%- wind 9 mph,9/25/1960


##Functions that create new columns

In [None]:
'''
iterates through every row to check who the winner is 
and inputs that in a new column named 'winner'
'''
def create_winner(row):
    if (row[['home_score']]  > row[['away_score']]).all():
        return row['home_team']
    else:
        return row['away_team']

    
'''
iterates through a df and checks is the home team won or 
not
'''
def is_home_team_winner(row):
    if (row[['home_team']] == row[['winner']]).all():
        return True
    else:
        return False
    

from datetime import datetime
'''
creates a month column
returns the month to the dataframe
'''
def create_month_column(row):
    return month_generator(row['date'])

    
'''converts data of format mm/dd/yyyy into 
english words. returns the name of the month
'''
def month_generator(dateStr):
    date_object = datetime.strptime(dateStr, '%m/%d/%Y')
    dateInEnglish = date_object.strftime('%B %d, %Y')
    month = dateInEnglish.split(' ', 1)[0]
    return month


'''
searches the weather column value at respected row and searches for keyword 'rain'
gets row with info
returns T/F depending if raining or not
'''
def create_raining_column(row):
    return weatherDF['weather'].str.contains('rain', na = False)
    

#In [39]: s4 = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])

#In [40]: s4.str.contains('A', na=False)

##Manipulate data and create new columns
add columns 'winner and 'winner_is_home_team'

In [None]:
# weatherDF['winner'] = weatherDF.apply(create_winner, axis=1)
weatherDF['winner_is_home_team'] = weatherDF.apply(is_home_team_winner, axis=1)

# create month column1
weatherDF['month'] = weatherDF.apply(create_month_column, axis=1)

# create isRaining column
weatherDF['did_rain'] = weatherDF['weather'].str.contains('rain', na = False)

In [None]:
weatherDF

In [80]:
weatherDF = weatherDF[weatherDF['month'] != 'August']

##Now check to see if the weather affects home team win percentage. Also check if weather affects points scored in a game.

Start by checking all unique values in home team columns to research normal temperatures in that home team city.

Notes on this section:

http://www.sportingcharts.com/articles/nfl/a-historical-analysis-of-nfl-results-during-cold-weather-months.aspx
results from this page only go until 1978

Checking to see which values appear the most to choose a dataset with most records

In [81]:
weatherDF['home_team'].value_counts().idxmax()

'Pittsburgh Steelers'

In [82]:
#All home team unique cities
teams = pd.unique(weatherDF.home_team.ravel())
teams

array(['Los Angeles Rams', 'Dallas Cowboys', 'Green Bay Packers',
       'San Francisco 49ers', 'Baltimore Colts', 'Philadelphia Eagles',
       'Cleveland Browns', 'St. Louis Cardinals', 'Detroit Lions',
       'Pittsburgh Steelers', 'Washington Redskins', 'Chicago Bears',
       'New York Giants', 'Minnesota Vikings', 'Atlanta Falcons',
       'New Orleans Saints', 'Kansas City Chiefs', 'Buffalo Bills',
       'San Diego Chargers', 'Cincinnati Bengals', 'Boston Patriots',
       'Denver Broncos', 'Houston Oilers', 'Miami Dolphins',
       'New York Jets', 'Oakland Raiders', 'New England Patriots',
       'Seattle Seahawks', 'Tampa Bay Buccaneers', 'Los Angeles Raiders',
       'Indianapolis Colts', 'Phoenix Cardinals', 'Arizona Cardinals',
       'Jacksonville Jaguars', 'St. Louis Rams', 'Carolina Panthers',
       'Baltimore Ravens', 'Tennessee Oilers', 'Tennessee Titans',
       'Houston Texans'], dtype=object)

In [83]:
steelers = weatherDF[weatherDF['home_team'] == 'Pittsburgh Steelers']

##Check average temperatures  

In [84]:
weatherDF[weatherDF['month'] == 'January']

Unnamed: 0,id,home_team,home_score,away_team,away_score,temperature,wind_chill,humidity,wind_mph,weather,date,winner_is_home_team,month
574,196601020gnb,Green Bay Packers,23,Cleveland Browns,12,26,16,92%,11,26 degrees- relative humidity 92%- wind 11 mph...,1/2/1966,False,January
680,196701010dal,Dallas Cowboys,27,Green Bay Packers,34,40,,75%,8,40 degrees- relative humidity 75%- wind 8 mph,1/1/1967,False,January
681,196701150gnb,Green Bay Packers,35,Kansas City Chiefs,10,54,,90%,7,54 degrees- relative humidity 90%- wind 7 mph,1/15/1967,False,January
797,196801140gnb,Green Bay Packers,33,Oakland Raiders,14,60,,74%,12,60 degrees- relative humidity 74%- wind 12 mph,1/14/1968,False,January
913,196901120clt,Baltimore Colts,7,New York Jets,16,66,,80%,12,66 degrees- relative humidity 80%- wind 12 mph,1/12/1969,False,January
1028,197001040min,Minnesota Vikings,27,Cleveland Browns,7,8,-6,75%,9,8 degrees- relative humidity 75%- wind 9 mph- ...,1/4/1970,False,January
1029,197001110kan,Kansas City Chiefs,23,Minnesota Vikings,7,55,,84%,14,55 degrees- relative humidity 84%- wind 14 mph,1/11/1970,False,January
1216,197101030clt,Baltimore Colts,27,Oakland Raiders,17,34,28,68%,6,34 degrees- relative humidity 68%- wind 6 mph-...,1/3/1971,False,January
1217,197101030sfo,San Francisco 49ers,10,Dallas Cowboys,17,46,,29%,22,46 degrees- relative humidity 29%- wind 22 mph,1/3/1971,False,January
1218,197101170clt,Baltimore Colts,16,Dallas Cowboys,13,59,,60%,11,59 degrees- relative humidity 60%- wind 11 mph,1/17/1971,False,January


In [94]:
rained = 'fjdkslarainfjdklsa'
rained.contains('rain')

AttributeError: 'str' object has no attribute 'contains'