## Objective:
- Combine NGS data with particular play subsets (punt returns, fair catches, concussions) to reduce memory requirements to run analysis notebooks.

In [None]:
import pandas as pd
import numpy as np

In [None]:
%%time

# 2016 Season Data Processing
ngs_2016_pre = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2016-pre.csv')
ngs_2016_1_6 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2016-reg-wk1-6.csv')
ngs_2016_7_12 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2016-reg-wk7-12.csv')
ngs_2016_13_17 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2016-reg-wk13-17.csv')
ngs_2016_post = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2016-post.csv')

# Combine
ngs_2016 = pd.concat([ngs_2016_pre, ngs_2016_1_6, ngs_2016_7_12, ngs_2016_13_17, ngs_2016_post], axis=0)

# Clear up memory
del ngs_2016_pre
del ngs_2016_1_6
del ngs_2016_7_12
del ngs_2016_13_17
del ngs_2016_post

# 2017 Season Data Processing
ngs_2017_pre = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2017-pre.csv')
ngs_2017_1_6 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2017-reg-wk1-6.csv')
ngs_2017_7_12 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2017-reg-wk7-12.csv')
ngs_2017_13_17 = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2017-reg-wk13-17.csv')
ngs_2017_post = pd.read_csv('../input/NFL-Punt-Analytics-Competition/NGS-2017-post.csv')

# Combine
ngs_2017 = pd.concat([ngs_2017_pre, ngs_2017_1_6, ngs_2017_7_12, ngs_2017_13_17, ngs_2017_post], axis=0)

# Clear up memory
del ngs_2017_pre
del ngs_2017_1_6
del ngs_2017_7_12
del ngs_2017_13_17
del ngs_2017_post

# Combine
ngs_all = pd.concat([ngs_2016, ngs_2017], axis=0)

# Clear up memory
del ngs_2016
del ngs_2017

# Drop unneeded columns
droppers = ['Season_Year', 'o', 'dir']
ngs_all.drop(columns=droppers, inplace=True)

- Get NGS subsets
- **play-fair_catch.csv and play-punt_return.csv** can be obtained from: https://www.kaggle.com/jdemeo/preprocessing-punt-play

In [None]:
# Fair Catch
fair_catch_df = pd.read_csv('../input/ngsconcussion/play-fair_catch.csv')
remainder_df = fair_catch_df.groupby(['GameKey','PlayID']).size().reset_index().rename(columns={0:'count'})

# Create condensed set of NGS data
condensed_ngs = pd.merge(remainder_df, ngs_all,
                          how='inner',
                          on=['GameKey', 'PlayID'])

condensed_ngs.to_csv('NGS-fair_catch.csv', index=False)

In [None]:
# Punt Return
fair_catch_df = pd.read_csv('../input/ngsconcussion/play-punt_return.csv')
remainder_df = fair_catch_df.groupby(['GameKey','PlayID']).size().reset_index().rename(columns={0:'count'})

# Create condensed set of NGS data
condensed_ngs = pd.merge(remainder_df, ngs_all,
                          how='inner',
                          on=['GameKey', 'PlayID'])

condensed_ngs.to_csv('NGS-punt_return.csv', index=False)

In [None]:
# Concussion
concussion_df = pd.read_csv('../input/NFL-Punt-Analytics-Competition/video_review.csv')
remainder_df = concussion_df.groupby(['GameKey','PlayID']).size().reset_index().rename(columns={0:'count'})

# Create condensed set of NGS data
condensed_ngs = pd.merge(remainder_df, ngs_all,
                          how='inner',
                          on=['GameKey', 'PlayID'])

condensed_ngs.to_csv('NGS-concussion.csv', index=False)

# Links to other notebooks:
- Concussion play analysis with proposed rule changes: https://www.kaggle.com/jdemeo/analysis-concussions
- Analysis of uncalled penalties: https://www.kaggle.com/jdemeo/analysis-uncalled-penalties
- Analysis of punt returns: https://www.kaggle.com/jdemeo/analysis-punt-returns
- Analysis of fair catches: https://www.kaggle.com/jdemeo/analysis-fair-catches
- Preprocessing of Play Information: https://www.kaggle.com/jdemeo/preprocessing-punt-play
- Preprocessing of NGS data for the above notebooks: https://www.kaggle.com/jdemeo/preprocessing-ngs