In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import motion.detection as md
import man_vs_zone.clean as clean
import scipy.stats as stats
project_dir = "../data/"

In [2]:
games = pd.read_csv(f'{project_dir}games.csv')
play_df = pd.read_csv(f'{project_dir}plays.csv')
player_plays = pd.read_csv(f'{project_dir}player_play.csv')
players = pd.read_csv(f'{project_dir}players.csv')
tracking_df = pd.read_csv(f'{project_dir}tracking_week_1.csv')

Adding all weeks to tracking

In [6]:
for i in range(2,10):
    temp = pd.read_csv(f'{project_dir}tracking_week_{i}.csv')
    tracking_df = pd.concat([tracking_df, temp])

In [7]:
gid = 2022102302
pid = 2655

import animate
animate.animate_play(games, tracking_df, play_df, players, gid, pid)

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x107a9f760>>
Traceback (most recent call last):
  File "/Users/nolanbirkeland/Desktop/big_data_bowl/python/env/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


In [115]:
players = clean.get_postion_groups(players)

cleaned_tracking = clean.flip_coords(tracking_df)

games = tracking_df['gameId'].unique()
play_df = play_df[play_df['gameId'].isin(games)]

play_df = play_df[play_df['pff_manZone'] != 'other']
tracking_df = tracking_df[(tracking_df['gameId'].isin(play_df['gameId'])) & (tracking_df['playId'].isin(play_df['playId']))]

tracking_df = pd.merge(tracking_df, players[['nflId', 'position_group']], on='nflId', how='left')

cleaned_tracking = clean.zero_coords(cleaned_tracking, play_df)



Now that we have have all plays from week 1 cleaned so that all plays are zeroed, we can proceed. 

First we select only pass plays:

In [116]:
play_df['playType'] = play_df['isDropback'].map({True: 'pass', False: 'run'})
plays = play_df[play_df['playType'] == 'pass']

Next, we want to only select plays where the defense is in man coverage

In [117]:
plays = plays[plays['pff_manZone'] == 'Man']

Only include plays in this new subset

In [118]:
cleaned_tracking = cleaned_tracking[(cleaned_tracking['gameId'].isin(plays['gameId'])) & (cleaned_tracking['playId'].isin(plays['playId']))]

player_plays = player_plays[(player_plays['gameId'].isin(plays['gameId'])) & (player_plays['playId'].isin(plays['playId']))]

Next, we need to identify the defender covering the targeted receiver for every play

In [119]:
# Filter target players and keep only the necessary columns
targets = player_plays[player_plays['wasTargettedReceiver'] == 1][['gameId', 'playId', 'nflId']]
targets.rename(columns={'nflId': 'target'}, inplace=True)

# Merge the target information into the plays DataFrame
plays = plays.merge(targets, on=['gameId', 'playId'], how='left')

plays.dropna(subset=['target'], inplace=True)

Need to only include plays with WR target

In [120]:
plays = pd.merge(plays, players[['nflId', 'position_group']], left_on='target', right_on='nflId', how='left')

plays = plays[plays['position_group'] == 'receiver']

To identify the covering receiver, we can use our graph function; we can make a graph of the play and identify coverage at snap (the closest DB is the covering DB in man coverage)

In [121]:
covering_players = []

for index, play in plays.iterrows():
    play_graphs = clean.graph_one_play(tracking_df, play['gameId'], play['playId'], 'post')

    # Get graph at snap
    play_graph = play_graphs[0]

    # Get target player
    cover = None
    target = play['target']
    for edge in play_graph.edges(data=True):
        if edge[1] == target:
            cover = edge[0]

    covering_players.append(cover)

# Add covering player to plays DataFrame
plays['covering'] = covering_players

# Drop all plays where the covering player is None
plays = plays.dropna(subset=['covering'])

Now, we can measure the distance at snap for these players and the average distance after snap (avg. separation). Because these players will naturally be most separated in the frames right after snap, it makes more sense to use average or minimum separation than max.

In [122]:
avg_separations = []

for index, play in plays.iterrows():
    frames = cleaned_tracking[(cleaned_tracking['gameId'] == play['gameId']) & (cleaned_tracking['playId'] == play['playId'])]

    snap = frames.loc[frames["frameType"] == "SNAP", "frameId"].iloc[0]
    frame_count = int(frames["frameId"].max())

    target = play['target']
    cover = play['covering']
    dist = 0

    for frame in range(snap+1, frame_count+1):
        target_coords = frames[(frames['frameId'] == frame) & (frames['nflId'] == target)][['x', 'y']].values
        cover_coords = frames[(frames['frameId'] == frame) & (frames['nflId'] == cover)][['x', 'y']].values

        dist += np.linalg.norm(target_coords - cover_coords)
    
    avg_separations.append(dist / (frame_count - snap))

plays['avg_separation'] = avg_separations

In [123]:
plays.head()

Unnamed: 0,gameId,playId,playDescription,quarter,down,yardsToGo,possessionTeam,defensiveTeam,yardlineSide,yardlineNumber,...,pff_runConceptSecondary,pff_runPassOption,pff_passCoverage,pff_manZone,playType,target,nflId,position_group,covering,avg_separation
0,2022091104,3662,(12:51) (Shotgun) J.Hurts pass incomplete shor...,4,3,12,PHI,DET,PHI,35,...,,0,Cover-0,Man,pass,53439.0,53439,receiver,52411.0,10.931226
9,2022101600,1215,(7:13) (Shotgun) J.Garoppolo pass short right ...,2,2,9,SF,ATL,ATL,14,...,,0,Cover-1,Man,pass,52433.0,52433,receiver,52424.0,2.224102
15,2022092505,2669,(2:43) (Shotgun) L.Jackson pass incomplete dee...,3,3,5,BAL,NE,NE,38,...,,0,Cover-1,Man,pass,53456.0,53456,receiver,52991.0,14.34631
21,2022103001,2042,(11:47) (Shotgun) P.Walker pass incomplete dee...,3,2,10,CAR,ATL,ATL,24,...,,0,Cover-1,Man,pass,53488.0,53488,receiver,46278.0,1.265543
22,2022091103,4199,(8:37) (Shotgun) M.Trubisky pass incomplete de...,5,3,3,PIT,CIN,PIT,32,...,,0,Cover-1,Man,pass,52457.0,52457,receiver,43641.0,1.745893


In [124]:
player_plays['motionSinceLineset'].value_counts()

motionSinceLineset
False    47491
True      3253
Name: count, dtype: int64

Now, we can add a bool for WR motion so that we can hypothesis test

In [125]:
motioner = []

for index, play in plays.iterrows():
    # Filter player_plays for the current play and target
    player_play = player_plays[
        (player_plays['gameId'] == play['gameId']) & 
        (player_plays['playId'] == play['playId']) & 
        (player_plays['nflId'] == play['target'])
    ]

    # Check if a matching row exists and evaluate the 'motionSinceLineset' column
    if not player_play.empty and player_play.iloc[0]['motionSinceLineset'] == True:
        motioner.append(True)
    else:
        motioner.append(False)


plays['motion'] = motioner

Hypothesis test (independent samples t-test)

In [126]:
no_motion = plays.loc[plays['motion'] == True, 'avg_separation']
motion = plays.loc[plays['motion'] == False, 'avg_separation']

result = stats.ttest_ind(no_motion, motion, equal_var=True)

# Print all output details similar to R's t.test
print("Independent t-test results:")
print(f"t-statistic: {result.statistic}")
print(f"p-value: {result.pvalue}")
print(f"Degrees of freedom: {len(no_motion) + len(motion) - 2}")

Independent t-test results:
t-statistic: 0.7488133242467927
p-value: 0.454272243366282
Degrees of freedom: 582


Not statistically significant; however, this is a very small sample size, so expanding to include all weeks could improve this