In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from math import sin, cos, radians

In [None]:
nfl_dat = pd.read_csv('runs_nfl_data.csv')
print(nfl_dat.shape)
nfl_dat.head()

In [None]:
# Let us try to figure out where the play granularity items are:
nfl_dat.columns

In [None]:
play_cols = []
indiv_cols = []

for col in nfl_dat.columns:
    unique_val = len(nfl_dat[col].head(22).unique())
    print('For ',col,'there are ',unique_val,'unique values in the first play')
    if unique_val == 1:
        play_cols.append(col)
    else:
        indiv_cols.append(col)

In [None]:
# This should be a run-level dataset, stripped of unique information
plays_dat = nfl_dat[play_cols].copy()
plays_dat.drop_duplicates(inplace=True)
plays_dat.reset_index(inplace=True,drop=True)
print(plays_dat.shape)
plays_dat.head()

In [None]:
# Let us check to see if that is the case:
print(len(plays_dat.PlayId.unique()),'play IDs')
print(plays_dat.shape[0],'rows in data-set')

In [None]:
# close but why the discrepancy?
dupes_list = plays_dat.PlayId.value_counts().head(13).index.tolist()
for col in plays_dat:
    unique_val = len(plays_dat[plays_dat.PlayId.isin(dupes_list)][col].head(2).unique())
    if unique_val > 1:
        print(col)

In [None]:
plays_dat[plays_dat.PlayId.isin(dupes_list)]['WindSpeed'].head(4).unique()

In [None]:
# Looks like windspeed needs to be converted:
plays_dat.WindSpeed = pd.to_numeric(plays_dat['WindSpeed'], errors='coerce').fillna(0.0)

In [None]:
# Now, re de-dupe dataset:
plays_dat2 = plays_dat.copy()
plays_dat2.drop_duplicates(inplace=True)
plays_dat2.reset_index(inplace=True,drop=True)
print(plays_dat2.shape)
plays_dat2.head()

In [None]:
# Check again, for sanity:
print(len(plays_dat2.PlayId.unique()),'play IDs')
print(plays_dat2.shape[0],'rows in data-set')

In [None]:
# Let us get our players data
indiv_cols.append('GameId')
indiv_cols.append('PlayId')
players_dat = nfl_dat[indiv_cols]
players_dat.head()

In [None]:
# Rusher Data:
rushers_dat = pd.merge(plays_dat2, players_dat,  how='left', left_on=['GameId','PlayId','NflIdRusher'], right_on = ['GameId','PlayId','NflId'])

In [None]:
print(rushers_dat.shape)
rushers_dat.head()

In [None]:
rushers_dat[['GameId','X','Y','Orientation','Dir','S','A','Dis','Team','PlayDirection']].head(10)

In [None]:
rushers_dat.to_csv('rushers_data.csv',index=False)

In [None]:
(sin(radians(245.7))*0.38)+30.53

In [None]:
(cos(radians(245.7))*0.38)+78.75

In [None]:
(30.53-30.18)+(78.75-78.59)

In [None]:
rushers_dat[(rushers_dat['GameId']==2017090700) & (rushers_dat['Team']=='away')].Dir.hist()

In [None]:
rushers_dat[(rushers_dat['Orientation']>180) & (rushers_dat['Position'] != 'RB')]['Dir'].plot.kde()

In [None]:
rushers_dat_new = rushers_dat.copy()

In [None]:
for i in range(len(rushers_dat_new)):
    if rushers_dat_new.iloc[i]['Orientation']>180:
        old_dir, old_x, old_y = rushers_dat_new.iloc[i]['Dir'],rushers_dat_new.iloc[i]['X'],rushers_dat_new.iloc[i]['Y']
        rushers_dat_new.at[i,'Dir'] = old_dir - 180
        rushers_dat_new.at[i,'X'] = 120 - old_x
        rushers_dat_new.at[i,'Y'] = 53.3 - old_y

In [None]:
rushers_dat_new[['GameId','X','Y','Orientation','Dir','S','A','Dis','Team','PlayDirection','Yards']].head(10)

In [None]:
rushers_dat_new['X1'] = rushers_dat_new['Yards']+rushers_dat_new['X']
rushers_dat_new['Y1'] = ((np.sin(np.radians(rushers_dat_new['Dir'])))*rushers_dat_new['Yards'])+rushers_dat_new['Y']

In [None]:
export_cols = ['GameId', 'Season','HomeTeamAbbr', 'VisitorTeamAbbr',
               'PossessionTeam','Team','Week', 'Quarter',
               'Down', 'Distance', 'FieldPosition',
               'HomeScoreBeforePlay', 'VisitorScoreBeforePlay',
               'OffenseFormation', 'DefendersInTheBox',
               'Yards', 'X', 'Y', 'S', 'A', 'Dis',
               'Orientation', 'Dir', 'DisplayName', 'JerseyNumber',
               'PlayerHeight', 'PlayerWeight', 'PlayerCollegeName',
               'Position', 'X1', 'Y1']
rushers_dat_new[export_cols].to_csv('rushers_lines.csv',index=False)

In [None]:
rushers_dat_new[export_cols].head(500).to_csv('rushers_500.csv',index=False)

In [None]:
rushers_dat_new.Yards.hist()