In [1]:
import pandas as pd
from data.drives import DriveFetcher
from data.plays import PlayFetcher

from matplotlib import pyplot as plt

In [2]:
# CONFIG

YEARS = [2017, 2018, 2019, 2021, 2022, 2023]
# YEARS = [2023]

# games with obvious data tracking errors
IGNORE_GAMES = [
    401520433,
    401531412,
    401520147, 
    401524003, 
    401524023,
    401525546,
    401525831,
    401525868,
    401525896,
    401525900,
    401532436, 
    401532449, 
    401532572, 
    401532630
]

# valid drive results - ignore others
DRIVE_RESULT_FILTERS = {
    'PUNT': 0,
    'DOWNS': 0,
    'FG': 3,
    'TD': 7,
    'MISSED FG': 0,
    'FUMBLE': 0,
    'INT': 0,
    'INT TD': -7,
    'FUMBLE TD': -7,
    'MISSED FG TD': -7,
    'FUMBLE RETURN TD': -7,
    'PUNT RETURN TD': -7,
    'SF': -2,
}

PLAY_FILTERS = [
    'Pass Reception',
    'Rush',
    'Sack',
    'Pass Incompletion',
    'Field Goal Good',
    'Rushing Touchdown',
    'Field Goal Missed',
    'Interception',
    'Passing Touchdown',
    'Fumble Recovery (Own)',
    'Fumble Recovery (Opponent)',
    'Interception Return Touchdown',
    'Blocked Punt',
    'Blocked Field Goal',
    'Blocked Field Goal Touchdown',
    'Blocked Punt Touchdown',
    'Pass Interception Return',
    'Punt Return Touchdown',
    'Fumble Return Touchdown',
    'Safety',
    'Missed Field Goal Return',
]

In [3]:
# get data
play_fetcher = PlayFetcher()

play_data = play_fetcher.get_data(years=YEARS)
play_data = play_data[play_data["type"].isin(PLAY_FILTERS)]


In [4]:
# get drives
drive_fetcher = DriveFetcher()

drive_data = drive_fetcher.get_data(years=YEARS)
drive_data = drive_data[drive_data["result_str"].isin(DRIVE_RESULT_FILTERS.keys())]
drive_data = drive_data[~drive_data["game"].isin(IGNORE_GAMES)]

drive_data["derived_result"] =  drive_data["result_str"].map(DRIVE_RESULT_FILTERS)


In [74]:
# join plays to drives to label each play with ultimate result
joined_data = play_data.join(drive_data[["offense", "result"]], on="drive", rsuffix="_drive").dropna(subset=["result"])
joined_data = joined_data[joined_data["offense"] == joined_data["offense_drive"]]