In [1]:
import numpy as np
import pandas as pd

In [2]:
# Columns we want from the dataset
columns_to_use = ['game_date', 'yardline_100', 'yrdln', 'ydstogo', 'ydsnet', 'desc', 'play_type', 'yards_gained', 'shotgun', 'no_huddle', 'qb_dropback', 'qb_scramble', 'pass_length', 'pass_location', 'air_yards', 'yards_after_catch', 'run_location', 'run_gap', 'field_goal_result', 'kick_distance', 'extra_point_result', 'two_point_conv_result', 'incomplete_pass', 'interception', 'qb_hit', 'pass_attempt', 'rush_attempt', 'sack', 'touchdown', 'pass_touchdown', 'rush_touchdown', 'extra_point_attempt', 'two_point_attempt', 'field_goal_attempt', 'passer_player_id', 'passer_player_name', 'receiver_player_id', 'receiver_player_name', 'rusher_player_id', 'rusher_player_name']

### Fetched the following dataset from Kaggle:

https://www.kaggle.com/maxhorowitz/nflplaybyplay2009to2016?select=NFL+Play+by+Play+2009-2018+%28v5%29.csv

In [3]:
df = pd.read_csv("nfl_play_by_play.csv", usecols=columns_to_use)

  interactivity=interactivity, compiler=compiler, result=result)


In [4]:
# Only want rows that correspond to the 4 specific play types
df = df[df['play_type'].isin(['pass', 'run', 'field_goal', 'extra_point'])]

In [5]:
def convertGameDateToSeason(game_date):
    # If the month is January or February, that means we are in the prior season. So year - 1
    # (game_date.month <= 2) will be 1 if prior season. 0 otherwise.
    return game_date.year - (game_date.month <= 2)

In [6]:
# Let's check the type of game date as we want to ensure it is a datetime object
df.dtypes['game_date']

dtype('O')

In [7]:
# Convert to a datetime object since it wasn't already
df['game_date'] = pd.to_datetime(df['game_date'])

In [8]:
# Let's double check
df.dtypes['game_date']

dtype('<M8[ns]')

In [9]:
# Create a new field to represent the season
df['season'] = df['game_date'].map(convertGameDateToSeason)

In [10]:
# Let's save all the plays by season for easy consumption later
for year in df['season'].unique():
    df[df['season'] == year].to_csv(f"season_datasets/{year}_plays.csv", index=False)