## Data Output Supporting Submission
### To view the submission please visit [NFL Punt Safety (McGovern-Steussie](https://www.kaggle.com/mcgovey/nfl-punt-safety-mcgovern-steussie)

In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import math

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory

import os
print(os.listdir("../input"))

# Any results you write to the current directory are saved as output.

In [None]:
# Read video footage data
injDF = pd.read_csv('../input/NFL-Punt-Analytics-Competition/video_footage-injury.csv')

In [None]:
#Cleanup columns
injDF = injDF.drop(columns=['season','Type','Home_team','Visit_Team'])
injDF['InjCtrlFlag'] = 'Injury'
injDF = injDF.rename(index=str, columns={"Week": "InjuryWeek", "Qtr": "InjuryQtr", "PlayDescription": "InjuryPlayDesc", "gamekey": "GameKey", "playid": "PlayID", "PREVIEW LINK (5000K)": "InjuryVideoLink"})
#injDF.head()

In [None]:
# Add control data
ctrlDF = pd.read_csv('../input/NFL-Punt-Analytics-Competition/video_footage-control.csv')
ctrlDF = ctrlDF.drop(columns=['season','Season_Type','Home_team','Visit_Team'])
ctrlDF['InjCtrlFlag'] = 'Control'
ctrlDF = ctrlDF.rename(index=str, columns={"Week": "InjuryWeek", "Qtr": "InjuryQtr", "PlayDescription": "InjuryPlayDesc", "gamekey": "GameKey", "playid": "PlayID", "Preview Link": "InjuryVideoLink"})
ctrlDF.head()
# combine control data
puntDF = pd.concat([injDF, ctrlDF])

In [None]:
#load video review data
tempDF = pd.read_csv('../input/NFL-Punt-Analytics-Competition/video_review.csv')
puntDF = pd.merge(puntDF, tempDF, how='outer', on=['GameKey','PlayID'])
#puntDF.head()

In [None]:
# change data types
puntDF = puntDF.infer_objects()
# load player punt role data
tempDF = pd.read_csv('../input/NFL-Punt-Analytics-Competition/play_player_role_data.csv')
tempDF = tempDF.infer_objects()
tempDF.rename(columns={'Role':'InjuredRole'}, inplace=True)

puntDF['Primary_Partner_GSISID'] = pd.to_numeric(["0" if ele  == "Unclear" else ele for ele in puntDF['Primary_Partner_GSISID']])
puntDF = pd.merge(puntDF, tempDF, how='left', on=['GameKey','PlayID', 'GSISID'])
puntDF.rename(columns={
    'Season_Year_x':'Season_Year',
}, inplace=True)

tempDF.rename(columns={'InjuredRole':'PrimaryActorRole'}, inplace=True)

# merge data sets
puntDF = pd.merge(puntDF, tempDF, how='left', left_on=['GameKey','PlayID', 'Primary_Partner_GSISID'], right_on=['GameKey','PlayID', 'GSISID'])

#puntDF.head()

In [None]:
# clean main data set again
puntDF.drop(['Season_Year_y', 'GSISID_y'], axis=1, inplace=True)
puntDF.rename(columns={
    'Season_Year_x':'Season_Year',
    'GSISID_x':'GSISID',
}, inplace=True)

In [None]:
# load play information
tempDF = pd.read_csv('../input/NFL-Punt-Analytics-Competition/play_information.csv')
tempDF = tempDF.drop(columns=['Season_Year','Week','Home_Team_Visit_Team'])
list(tempDF)

In [None]:
# merge plays and punt data
puntDF = pd.merge(puntDF, tempDF, how='left', on=['GameKey','PlayID'])

tempDF = None

In [None]:
# Load playStatisticsOutcomes to get player and game lookups
df = pd.read_csv('../input/playstatisticsoutcomes/players.csv', encoding = "ISO-8859-1")
df['gsisId'] = pd.to_numeric(df['gsisId'].str[3:])
df['nflId'] = pd.to_numeric(["0" if ele[0] == "M" else ele for ele in df['nflId']])


df2 = pd.read_csv('../input/playstatisticsoutcomes/gameParticipation.csv', encoding = "ISO-8859-1")
df = pd.merge(df, df2, how='inner', on=['nflId'])

df2 = pd.read_csv('../input/playstatisticsoutcomes/teams.csv', encoding = "ISO-8859-1")
df = pd.merge(df, df2, how='inner', on=['teamId'])

df = df.loc[df['unit'] == 'special teams']

df = df.loc[:,('gsisId', 'gameId', 'nameLast', 'nameFull', 'position1', 'team')]


In [None]:
# Load data to join games based on date and home team

df3 = pd.read_csv('../input/playstatisticsoutcomes/games.csv', encoding = "ISO-8859-1")
df3 = pd.merge(df3, df2, how='inner', left_on=['homeTeamId'], right_on=['teamId'])
df3 = df3.loc[:,('gameId', 'gameDate', 'teamAbrv')]
df3.rename(columns={
    'teamAbrv':'homeTeamAbrv',
}, inplace=True)
df3['gameDate'] = pd.to_datetime(df3['gameDate'], format='%m/%d/%Y')
#df3
df = pd.merge(df, df3, how='inner', on=['gameId'])
#df.dtypes

In [None]:
# Load game data
tempDF = pd.read_csv('../input/NFL-Punt-Analytics-Competition/game_data.csv')
tempDF = tempDF.loc[:,('GameKey', 'Game_Date', 'HomeTeamCode')]
tempDF['gameDate'] = pd.to_datetime(tempDF['Game_Date'], format='%Y-%m-%d %H:%M:%S.%f')
puntDF = pd.merge(puntDF, tempDF, how='left', on=['GameKey'])
tempDF = None

In [None]:
#reshape data
df = df[(df['gameDate'] > '2016-01-01') & (~np.isnan(df['gsisId']))]

#df.shape

In [None]:
puntDF = pd.merge(puntDF, df, how='left', left_on=['gameDate','GSISID', 'HomeTeamCode'], right_on=['gameDate', 'gsisId', 'homeTeamAbrv'])
df = None
df2 = None
df3 = None

In [None]:
tempDF = pd.read_csv('../input/playstatisticsoutcomes/Injuries_play-level-data.csv')
tempDF = tempDF.loc[:,('GameKey', 'PlayID', 'GSISID', 'injuryDescription', 'injuryClass', 'blindsideBlock')]

puntDF = pd.merge(puntDF, tempDF, how='left', on=['GameKey', 'PlayID', 'GSISID'])
#puntDF.head()

In [None]:
# output play level data
puntDF.head()

In [None]:
#output play level data
playLvlData = puntDF
playLvlData.to_csv('play-level-data.csv', index = False)
playLvlData = None

In [None]:
# Load NGS Data
tempDF0 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2016-post.csv')
tempDF0 = pd.merge(puntDF, tempDF0, how='inner', on=['GameKey','PlayID'])

tempDF1 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2016-pre.csv')
tempDF1 = pd.merge(puntDF, tempDF1, how='inner', on=['GameKey','PlayID'])

tempDF2 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2016-reg-wk1-6.csv')
tempDF2 = pd.merge(puntDF, tempDF2, how='inner', on=['GameKey','PlayID'])

tempDF3 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2016-reg-wk7-12.csv')
tempDF3 = pd.merge(puntDF, tempDF3, how='inner', on=['GameKey','PlayID'])

tempDF4 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2016-reg-wk13-17.csv')
tempDF4 = pd.merge(puntDF, tempDF4, how='inner', on=['GameKey','PlayID'])

tempDF5 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2017-post.csv')
tempDF5 = pd.merge(puntDF, tempDF5, how='inner', on=['GameKey','PlayID'])

tempDF6 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2017-pre.csv')
tempDF6 = pd.merge(puntDF, tempDF6, how='inner', on=['GameKey','PlayID'])

tempDF7 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2017-reg-wk1-6.csv')
tempDF7 = pd.merge(puntDF, tempDF7, how='inner', on=['GameKey','PlayID'])

tempDF8 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2017-reg-wk7-12.csv')
tempDF8 = pd.merge(puntDF, tempDF8, how='inner', on=['GameKey','PlayID'])

tempDF9 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2017-reg-wk13-17.csv')
tempDF9 = pd.merge(puntDF, tempDF9, how='inner', on=['GameKey','PlayID'])

puntDF = pd.concat([tempDF0, tempDF1, tempDF2, tempDF3, tempDF4, tempDF5, tempDF6, tempDF7, tempDF8, tempDF9])
puntDF.head()

In [None]:
# calculate time difference for a relative time metric
puntDF.loc[:,('TimeAlt')] = pd.to_datetime(puntDF['Time'], format='%Y-%m-%d %H:%M:%S.%f')

minTimes = puntDF.groupby('PlayID', as_index=False)['TimeAlt'].min()

minTimes.rename(columns={'TimeAlt':'TimeMin'}, inplace=True)
# merge min times back into df
puntDF = pd.merge(puntDF, minTimes, how='left', on=['PlayID'])
puntDF = puntDF.assign(TimeNum = pd.to_numeric((puntDF['TimeAlt'] - puntDF['TimeMin'])/100000000))
puntDF = puntDF.sort_values(by=['GameKey', 'PlayID', 'TimeNum', 'GSISID_y'])

puntDF.head()

In [None]:
# Split into injured and primary actor subsets - keep only key fields and X,Y columns
mvmtFields = puntDF.loc[:,('GameKey', 'PlayID', 'TimeNum', 'GSISID_x', 'Primary_Partner_GSISID', 'GSISID_y', 'x', 'y')]
injuredPlayer = mvmtFields.loc[(mvmtFields['GSISID_x'] == mvmtFields['GSISID_y'])]
primaryActor = mvmtFields.loc[(mvmtFields['Primary_Partner_GSISID'] == mvmtFields['GSISID_y'])]

In [None]:
# Join subsets together based on GameKey, PlayID, TimeNum
distanceBtwn = pd.merge(injuredPlayer, primaryActor, how='inner', on=['GameKey','PlayID', 'TimeNum'])
# Null out 
injuredPlayer = None
primaryActor = None
distanceBtwn = distanceBtwn.loc[:,('GameKey', 'PlayID', 'TimeNum', 'GSISID_x_x', 'Primary_Partner_GSISID_x', 'x_x', 'y_x', 'x_y', 'y_y')]
distanceBtwn.rename(columns={
    'GSISID_x_x':'injuredGSISID',
    'Primary_Partner_GSISID_x':'Primary_Partner_GSISID',
    'x_x':'injuredX',
    'y_x':'injuredY',
    'x_y':'actorX',
    'y_y':'actorY',
}, inplace=True)
#distanceBtwn.head()

In [None]:
# Calculate distance from InjuredXY to PrimaryActorXY
def distanceCalc(row):
    a = np.array([row['injuredX'], row['injuredY']])
    b = np.array([row['actorX'], row['actorY']])
    return np.linalg.norm(a-b)

distanceBtwn['Distance'] = distanceBtwn.apply(distanceCalc, axis=1)

#distanceBtwn.head()

In [None]:
# merge distances back into main DF
distanceBtwn = distanceBtwn.loc[:,('GameKey', 'PlayID', 'TimeNum', 'Distance')]
puntDF = pd.merge(puntDF, distanceBtwn, how='left', on=['GameKey', 'PlayID', 'TimeNum'])
distanceBtwn = None
#puntDF.head()

In [None]:
# Calculate Player Speed
# Sort df by game, play, player, time
puntDF = puntDF.sort_values(by=['GameKey', 'PlayID', 'GSISID_y', 'TimeNum'])
# shift X and Y
puntDF['prevX'] = puntDF.loc[(puntDF['GSISID_y'].shift(-1)==puntDF['GSISID_y']), 'x']
puntDF['prevX'] = puntDF['prevX'].shift()
puntDF['prevY'] = puntDF.loc[(puntDF['GSISID_y'].shift(-1)==puntDF['GSISID_y']), 'y']
puntDF['prevY'] = puntDF['prevY'].shift()

In [None]:
# Calculate distance traveled (or speed)
def distanceTrvledCalc(row):
    a = np.array([row['x'], row['y']])
    b = np.array([row['prevX'], row['prevY']])
    return np.linalg.norm(a-b)

puntDF['Speed'] = puntDF.apply(distanceTrvledCalc, axis=1)

In [None]:
puntDF['prevSpeed'] = puntDF.loc[(puntDF['GSISID_y'].shift(-1)==puntDF['GSISID_y']), 'Speed']
puntDF['Acceleration'] = puntDF['Speed'] - puntDF['prevSpeed'].shift()
#puntDF.head()

In [None]:
puntDF = puntDF.drop(columns=['prevX','prevY','prevSpeed'])
puntDF.head()

In [None]:
# output to csv
puntDF.to_csv('playerMvmt-level-data.csv', index = False)