In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
from pathlib import Path
from tqdm.auto import tqdm
import os
from typing import List

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold
from sklearn.cluster import KMeans
from multiprocessing import Pool as MultiprocessingPool, cpu_count

from src.kinematics import calculate_speed_and_direction

pd.set_option("display.max_columns", None)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# ============================================================================
# CONFIG
# ============================================================================

class Config:
    DATA_DIR = Path("./data")
    OUTPUT_DIR = Path("./outputs")
    OUTPUT_DIR.mkdir(exist_ok=True)
    
    SEED = 42
    N_FOLDS = 5
    BATCH_SIZE = 256
    EPOCHS = 60
    PATIENCE = 30
    LEARNING_RATE = 1e-4
    
    WINDOW_SIZE = 10
    HIDDEN_DIM = 128
    MAX_FUTURE_HORIZON = 94
    
    FIELD_X_MIN, FIELD_X_MAX = 0.0, 120.0
    FIELD_Y_MIN, FIELD_Y_MAX = 0.0, 53.3
    
    K_NEIGH = 6
    RADIUS = 30.0
    TAU = 8.0
    N_ROUTE_CLUSTERS = 7
    
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def set_seed(seed=42):
    import random
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

set_seed(Config.SEED)



In [3]:
config = Config()
config

print("\n[1/4] Loading data...")
train_input_files = [config.DATA_DIR / f"train/input_2023_w{w:02d}.csv" for w in range(1, 19)]
train_output_files = [config.DATA_DIR / f"train/output_2023_w{w:02d}.csv" for w in range(1, 19)]
train_input = pd.concat([pd.read_csv(f) for f in train_input_files if f.exists()])
train_output = pd.concat([pd.read_csv(f) for f in train_output_files if f.exists()])
supplementary_data = pd.read_csv(config.DATA_DIR / "supplementary_data.csv")

print(f"✓ Train input: {train_input.shape}, Train output: {train_output.shape}")
print(f"✓ Train output: {train_output.shape}, unique plays: {train_output[['game_id','play_id']].drop_duplicates().shape[0]}")
print(f"✓ Supplementary data: {supplementary_data.shape}")

traj_output = pd.read_csv('local_submission.csv')
traj_output = traj_output[['game_id', 'play_id', 'nfl_id', 'frame_id', 'pred_x', 'pred_y']]
traj_output.rename(columns={'pred_x': 'x', 'pred_y': 'y'}, inplace=True)

train_output.sort_values(by=['game_id', 'play_id', 'nfl_id', 'frame_id'], inplace=True)
traj_output.sort_values(by=['game_id', 'play_id', 'nfl_id', 'frame_id'], inplace=True)

print(f"✓ Projected trajectory output: {traj_output.shape}, unique plays: {traj_output[['game_id','play_id']].drop_duplicates().shape[0]}")


[1/4] Loading data...


  supplementary_data = pd.read_csv(config.DATA_DIR / "supplementary_data.csv")


✓ Train input: (4880579, 23), Train output: (562936, 6)
✓ Train output: (562936, 6), unique plays: 14108
✓ Supplementary data: (18009, 41)
✓ Projected trajectory output: (562936, 6), unique plays: 14108


In [40]:
supplementary_data[supplementary_data['play_id'] == 3824]

Unnamed: 0,game_id,season,week,game_date,game_time_eastern,home_team_abbr,visitor_team_abbr,play_id,play_description,quarter,game_clock,down,yards_to_go,possession_team,defensive_team,yardline_side,yardline_number,pre_snap_home_score,pre_snap_visitor_score,play_nullified_by_penalty,pass_result,pass_length,offense_formation,receiver_alignment,route_of_targeted_receiver,play_action,dropback_type,dropback_distance,pass_location_type,defenders_in_the_box,team_coverage_man_zone,team_coverage_type,penalty_yards,pre_penalty_yards_gained,yards_gained,expected_points,expected_points_added,pre_snap_home_team_win_probability,pre_snap_visitor_team_win_probability,home_team_win_probability_added,visitor_team_win_probility_added,down_and_distance,route_type_of_targeted_receiver,pass_length_bin
1945,2023092403,2023,3,09/24/2023,13:00:00,GB,NO,3824,(6:55) (Shotgun) J.Winston pass short right to...,4,06:55,1,10,NO,GB,NO,17,11,17,N,C,4,SHOTGUN,3x1,HITCH,False,TRADITIONAL,1.44,INSIDE_BOX,6,MAN_COVERAGE,COVER_1_MAN,,4,4,0.831423,-0.281057,0.133122,0.866878,-0.003042,0.003042,1st_long,STOP_HITCH,SHORT
5082,2023102202,2023,7,10/22/2023,13:00:00,IND,CLE,3824,(11:18) (Shotgun) P.Walker pass incomplete sho...,4,11:18,2,16,CLE,IND,IND,47,31,30,N,I,15,SHOTGUN,2x2,HITCH,False,TRADITIONAL,5.54,INSIDE_BOX,5,ZONE_COVERAGE,COVER_3_ZONE,,0,0,2.045362,-1.17,0.420814,0.579186,0.049949,-0.049949,2nd_long,STOP_HITCH,INTERMEDIATE
7655,2023111209,2023,10,11/12/2023,16:25:00,DAL,NYG,3824,(2:00) (Shotgun) T.DeVito pass short left to J...,4,02:00,3,9,NYG,DAL,NYG,26,49,10,N,C,-3,SHOTGUN,3x1,SCREEN,False,TRADITIONAL,4.12,INSIDE_BOX,6,ZONE_COVERAGE,COVER_2_ZONE,,7,7,0.301786,-0.805112,0.999024,0.000976,-7.1e-05,7.1e-05,3rd_long,SCREEN,SHORT
11071,2023121602,2023,15,12/16/2023,20:15:00,DET,DEN,3824,(5:42) J.Goff pass short left to A.St. Brown p...,4,05:42,2,8,DET,DEN,DEN,42,35,17,N,C,1,SINGLEBACK,2x2,CROSS,True,DESIGNED_ROLLOUT_LEFT,7.69,INSIDE_BOX,8,MAN_COVERAGE,COVER_1_MAN,,10,10,2.950058,0.932552,0.998344,0.001656,-3.9e-05,3.9e-05,2nd_long,IN_BREAK,SHORT
11342,2023121703,2023,15,12/17/2023,13:00:00,TEN,HOU,3824,(1:01) (Shotgun) C.Keenum pass incomplete deep...,4,01:01,3,15,HOU,TEN,HOU,35,16,16,N,I,45,EMPTY,3x2,GO,False,TRADITIONAL,4.47,INSIDE_BOX,4,ZONE_COVERAGE,COVER_3_ZONE,,0,0,0.702797,-0.776747,0.448848,0.551152,0.080695,-0.080695,3rd_long,VERTICAL,DEEP
13257,2023123113,2023,17,12/31/2023,16:25:00,KC,CIN,3824,(1:51) (Shotgun) J.Browning pass deep middle t...,4,01:51,4,18,CIN,KC,CIN,32,25,17,N,C,23,SHOTGUN,3x1,POST,False,TRADITIONAL,4.08,INSIDE_BOX,5,ZONE_COVERAGE,COVER_4_ZONE,,23,23,-0.910978,3.564902,0.97786,0.02214,-0.099968,0.099968,4th_down,VERTICAL,DEEP
16925,2024122908,2024,17,12/29/2024,13:00:00,NO,LV,3824,"(4:45) (No Huddle, Shotgun) S.Rattler pass sho...",4,04:45,2,8,NO,LV,NO,12,10,25,N,C,-1,SHOTGUN,2x2,SCREEN,False,TRADITIONAL,4.24,INSIDE_BOX,6,ZONE_COVERAGE,COVER_3_ZONE,,2,2,-0.029118,-0.507546,0.00608,0.99392,-0.001793,0.001793,2nd_long,SCREEN,SHORT


In [11]:
local_submission = pd.read_csv('local_submission.csv')
a = train_input[['game_id','play_id','nfl_id','player_role']].drop_duplicates()
labeled_output = train_output.merge(a, on=['game_id','play_id','nfl_id'], how='left')
print(labeled_output.shape)
labeled_output.head()

keys = ['game_id','play_id','nfl_id','frame_id']
loc_sub_cols = keys + ['pred_x','pred_y']
labeled_output = labeled_output.merge(local_submission[loc_sub_cols], on=['game_id','play_id','nfl_id','frame_id'], how='left', suffixes=('_true','_pred'))
labeled_output['error'] = np.sqrt((labeled_output['x'] - labeled_output['pred_x'])**2 + (labeled_output['y'] - labeled_output['pred_y'])**2)
labeled_output['error_diff'] = labeled_output.groupby(['game_id', 'play_id', 'nfl_id'])['error'].diff()
labeled_output.head(20)

receiver_positions = labeled_output.loc[
  labeled_output['player_role'] == 'Targeted Receiver',
  ['game_id','play_id','nfl_id', 'frame_id','x','y']].drop_duplicates()
receiver_positions.rename(columns = {'x':'target_x', 'y':'target_y'}, inplace = True)
print(receiver_positions.shape)
receiver_positions.head(3)

labeled_output = labeled_output.merge(
    receiver_positions[['game_id','play_id','frame_id','target_x','target_y']],
    on=['game_id','play_id','frame_id'], how='left'
)
print(labeled_output.shape)
labeled_output.head(3)

labeled_output['distance_from_receiver'] = np.sqrt(
    (labeled_output['x'] - labeled_output['target_x'])**2 +
    (labeled_output['y'] - labeled_output['target_y'])**2
)

# labeled_output['distance_from_receiver_l1'] = labeled_output.groupby(['game_id', 'play_id', 'nfl_id'])['distance_from_receiver'].shift(1)
labeled_output['distance_from_receiver_l3'] = labeled_output.groupby(['game_id', 'play_id', 'nfl_id'])['distance_from_receiver'].shift(3)
labeled_output['distance_from_receiver_l5'] = labeled_output.groupby(['game_id', 'play_id', 'nfl_id'])['distance_from_receiver'].shift(5)
labeled_output['distance_from_receiver_l7'] = labeled_output.groupby(['game_id', 'play_id', 'nfl_id'])['distance_from_receiver'].shift(7)

labeled_output['distance_from_receiver_min_l7'] = labeled_output[[
                                                                # 'distance_from_receiver',
                                                                # 'distance_from_receiver_l1',
                                                                'distance_from_receiver_l3',
                                                                'distance_from_receiver_l5',
                                                                'distance_from_receiver_l7']].min(axis=1) 
labeled_output[
  (labeled_output['game_id'] == 2023091013) &
  (labeled_output['play_id'] == 3686) &
  (labeled_output['nfl_id'] == 54632)
]

# test = labeled_output[labeled_output['player_role'].str.contains('Defensive Coverage')]

(562936, 7)
(160360, 6)
(562936, 13)


Unnamed: 0,game_id,play_id,nfl_id,frame_id,x,y,player_role,pred_x,pred_y,error,error_diff,target_x,target_y,distance_from_receiver,distance_from_receiver_l3,distance_from_receiver_l5,distance_from_receiver_l7,distance_from_receiver_min_l7
29621,2023091013,3686,54632,1,46.63,39.84,Defensive Coverage,46.631927,39.860932,0.02102,,43.87,40.61,2.865397,,,,
29622,2023091013,3686,54632,2,46.44,40.36,Defensive Coverage,46.512623,40.263906,0.12045,0.09943,43.89,41.11,2.658007,,,,
29623,2023091013,3686,54632,3,46.22,40.96,Defensive Coverage,46.370215,40.694341,0.305187,0.184738,43.88,41.61,2.4286,,,,
29624,2023091013,3686,54632,4,46.02,41.48,Defensive Coverage,46.206613,41.154565,0.375143,0.069956,43.84,42.13,2.274841,2.865397,,,2.865397
29625,2023091013,3686,54632,5,45.79,42.05,Defensive Coverage,46.025641,41.638732,0.473992,0.098849,43.77,42.64,2.1044,2.658007,,,2.658007
29626,2023091013,3686,54632,6,45.58,42.56,Defensive Coverage,45.828163,42.145513,0.483099,0.009107,43.68,43.15,1.989497,2.4286,2.865397,,2.4286
29627,2023091013,3686,54632,7,45.29,43.15,Defensive Coverage,45.614781,42.675,0.575419,0.092321,43.58,43.66,1.784433,2.274841,2.658007,,2.274841
29628,2023091013,3686,54632,8,45.08,43.63,Defensive Coverage,45.398282,43.220472,0.518668,-0.056751,43.46,44.18,1.710819,2.1044,2.4286,2.865397,2.1044
29629,2023091013,3686,54632,9,44.67,44.28,Defensive Coverage,45.166847,43.777821,0.706428,0.187759,43.3,44.73,1.442012,1.989497,2.274841,2.658007,1.989497
29630,2023091013,3686,54632,10,43.99,45.14,Defensive Coverage,44.927816,44.349732,1.226386,0.519958,43.16,45.23,0.834865,1.784433,2.1044,2.4286,1.784433


In [12]:
labeled_output['potential_contact'] = np.where(
  (
    (labeled_output['player_role'] == 'Defensive Coverage') & 
    (labeled_output['error_diff'] > 0.1) & 
    (labeled_output['distance_from_receiver_min_l7'] < 1.25)
  ),
  1, 0
)


potential_contact = labeled_output.groupby(['game_id','play_id','nfl_id']).max()['potential_contact'].reset_index()
potential_contact['potential_contact'].value_counts()

potential_contact
0    44540
1     1505
Name: count, dtype: int64

In [13]:
# potential_contact[
#   (potential_contact['game_id'] == 2023091013) &
#   (potential_contact['play_id'] == 3686) &
#   (potential_contact['nfl_id'] == 54632)
# ]
labeled_output[
  (labeled_output['game_id'] == 2023091013) &
  (labeled_output['play_id'] == 3686) &
  (labeled_output['nfl_id'] == 54632)
]


Unnamed: 0,game_id,play_id,nfl_id,frame_id,x,y,player_role,pred_x,pred_y,error,error_diff,target_x,target_y,distance_from_receiver,distance_from_receiver_l3,distance_from_receiver_l5,distance_from_receiver_l7,distance_from_receiver_min_l7,potential_contact
29621,2023091013,3686,54632,1,46.63,39.84,Defensive Coverage,46.631927,39.860932,0.02102,,43.87,40.61,2.865397,,,,,0
29622,2023091013,3686,54632,2,46.44,40.36,Defensive Coverage,46.512623,40.263906,0.12045,0.09943,43.89,41.11,2.658007,,,,,0
29623,2023091013,3686,54632,3,46.22,40.96,Defensive Coverage,46.370215,40.694341,0.305187,0.184738,43.88,41.61,2.4286,,,,,0
29624,2023091013,3686,54632,4,46.02,41.48,Defensive Coverage,46.206613,41.154565,0.375143,0.069956,43.84,42.13,2.274841,2.865397,,,2.865397,0
29625,2023091013,3686,54632,5,45.79,42.05,Defensive Coverage,46.025641,41.638732,0.473992,0.098849,43.77,42.64,2.1044,2.658007,,,2.658007,0
29626,2023091013,3686,54632,6,45.58,42.56,Defensive Coverage,45.828163,42.145513,0.483099,0.009107,43.68,43.15,1.989497,2.4286,2.865397,,2.4286,0
29627,2023091013,3686,54632,7,45.29,43.15,Defensive Coverage,45.614781,42.675,0.575419,0.092321,43.58,43.66,1.784433,2.274841,2.658007,,2.274841,0
29628,2023091013,3686,54632,8,45.08,43.63,Defensive Coverage,45.398282,43.220472,0.518668,-0.056751,43.46,44.18,1.710819,2.1044,2.4286,2.865397,2.1044,0
29629,2023091013,3686,54632,9,44.67,44.28,Defensive Coverage,45.166847,43.777821,0.706428,0.187759,43.3,44.73,1.442012,1.989497,2.274841,2.658007,1.989497,0
29630,2023091013,3686,54632,10,43.99,45.14,Defensive Coverage,44.927816,44.349732,1.226386,0.519958,43.16,45.23,0.834865,1.784433,2.1044,2.4286,1.784433,0


In [15]:
# Get max frame_id for each player in each play (no warnings)
final_positions = (
    train_input
    .sort_values('frame_id')
    .groupby(['game_id', 'play_id', 'nfl_id'], as_index=False)
    .last()
    [['game_id', 'play_id', 'nfl_id', 'frame_id', 'x', 'y', 'player_role', 'ball_land_x', 'ball_land_y']]
)

# Calculate angle to ball landing spot
final_positions['angle_to_ball'] = np.degrees(np.arctan2(
    final_positions['ball_land_y'] - final_positions['y'],
    final_positions['ball_land_x'] - final_positions['x']
))

# Distance to ball
final_positions['distance_to_ball'] = np.sqrt(
    (final_positions['ball_land_x'] - final_positions['x'])**2 + 
    (final_positions['ball_land_y'] - final_positions['y'])**2
)

# Get targeted receiver positions AND angle
targeted_receivers = final_positions[
    final_positions['player_role'] == 'Targeted Receiver'
][['game_id', 'play_id', 'x', 'y', 'angle_to_ball']].rename(
    columns={
        'x': 'target_x', 
        'y': 'target_y',
        'angle_to_ball': 'target_angle_to_ball'
    }
)

# Merge back to get targeted receiver position for each play
final_positions = final_positions.merge(
    targeted_receivers,
    on=['game_id', 'play_id'],
    how='left'
)

# Calculate distance to targeted receiver
final_positions['distance_to_target_receiver'] = np.sqrt(
    (final_positions['target_x'] - final_positions['x'])**2 + 
    (final_positions['target_y'] - final_positions['y'])**2
)

# Calculate angle difference from targeted receiver
# Handle angle wrapping (e.g., difference between 359° and 1° should be 2°, not 358°)
angle_diff = final_positions['angle_to_ball'] - final_positions['target_angle_to_ball']
final_positions['angle_diff_from_target'] = np.where(
    angle_diff > 180, 
    angle_diff - 360,
    np.where(angle_diff < -180, angle_diff + 360, angle_diff)
)

# Absolute angle difference (unsigned)
final_positions['abs_angle_diff_from_target'] = np.abs(final_positions['angle_diff_from_target'])

# Optional: drop intermediate columns
final_positions = final_positions.drop(columns=['target_x', 'target_y', 'target_angle_to_ball'])

final_positions['defender_type'] = np.where((final_positions['distance_to_target_receiver'] < 6.0) &
                                            (final_positions['player_role'].str.contains('Defensive Coverage')) &
                                            (final_positions['abs_angle_diff_from_target'] < 90.0)
                                            , 'Trailing', np.where((final_positions['player_role'].str.contains('Defensive Coverage'))
                                                                   , 'Converging', None))

final_positions.sort_values(['game_id','play_id','nfl_id'], inplace=True)
print(f"Shape: {final_positions.shape}")
final_positions.head(3)

Shape: (173150, 15)


Unnamed: 0,game_id,play_id,nfl_id,frame_id,x,y,player_role,ball_land_x,ball_land_y,angle_to_ball,distance_to_ball,distance_to_target_receiver,angle_diff_from_target,abs_angle_diff_from_target,defender_type
0,2023090700,101,43290,26,35.41,29.99,Passer,63.259998,-0.22,-47.327657,41.08852,23.257319,5.649543,5.649543,
1,2023090700,101,44930,26,52.43,14.14,Targeted Receiver,63.259998,-0.22,-52.977199,17.986063,0.0,0.0,0.0,
2,2023090700,101,46137,26,55.82,17.67,Defensive Coverage,63.259998,-0.22,-67.41881,19.375388,4.89418,-14.441611,14.441611,Trailing


In [16]:
final_positions.rename(columns={'x': 'throw_x', 'y': 'throw_y'}, inplace=True)
defender_type = train_output[['game_id','play_id','nfl_id']].drop_duplicates().merge(
    final_positions[['game_id','play_id','nfl_id','player_role','angle_to_ball','throw_x','throw_y','distance_to_ball', 'distance_to_target_receiver', 'abs_angle_diff_from_target', 'defender_type']],
    on=['game_id','play_id','nfl_id'], how='left'
)
print(defender_type.shape)
defender_type.tail(3)

(46045, 11)


Unnamed: 0,game_id,play_id,nfl_id,player_role,angle_to_ball,throw_x,throw_y,distance_to_ball,distance_to_target_receiver,abs_angle_diff_from_target,defender_type
46042,2024010713,4018,47844,Defensive Coverage,-80.352611,30.69,15.24,8.652364,8.013588,40.57913,Converging
46043,2024010713,4018,52457,Targeted Receiver,-120.931741,38.45,17.24,12.275871,0.0,0.0,
46044,2024010713,4018,52647,Defensive Coverage,-152.507915,44.4,13.09,13.82071,7.254309,31.576174,Converging


In [17]:
def map_route_family(route: str) -> str:
    """
    For field named: route_type_of_targeted_receiver
    """
    if pd.isna(route):
        return "UNKNOWN"
    route = str(route).upper()
    
    vertical = {"GO", "POST", "CORNER", "WHEEL"}
    out_break = {"OUT", "FLAT"}
    in_break = {"IN", "SLANT", "CROSS", "ANGLE"}
    stop = {"HITCH"}
    screen = {"SCREEN"}
    
    if route in vertical:
        return "VERTICAL"
    if route in out_break:
        return "OUT_BREAK"
    if route in in_break:
        return "IN_BREAK"
    if route in stop:
        return "STOP_HITCH"
    if route in screen:
        return "SCREEN"
    return "OTHER"

def classify_down_distance(down, distance):
    """
    Classify down/distance into meaningful football situations.

    Returns compact labels like '3rd_long', '1st_short', etc.
    """
    if pd.isna(down) or pd.isna(distance):
        return "unknown"

    # Distance buckets
    if distance <= 3:
        dist_label = "short"
    elif distance <= 7:
        dist_label = "medium"
    else:
        dist_label = "long"

    # Special cases
    if down == 3 and distance >= 10:
        return "3rd_long"  # Most predictive situation
    elif down == 4:
        return "4th_down"  # All 4th downs are high-leverage
    elif down == 1 and distance <= 3:
        return "1st_short"  # Run-heavy
    elif down == 2 and distance >= 10:
        return "2nd_long"  # Pass-heavy
    else:
        return (
            f'{int(down)}{"st" if down==1 else "nd" if down==2 else "rd"}_{dist_label}'
        )


def bin_pass_length(pl):
    """
    pl: numeric pass_length (air yards)
    Bins: SHORT (0-10), INTERMEDIATE (10-20), DEEP (20+)
    """
    if pd.isna(pl):
        return "UNKNOWN"
    try:
        v = float(pl)
    except Exception:
        return "UNKNOWN"
    
    if v <= 10:
        return "SHORT"
    elif v <= 20:
        return "INTERMEDIATE"
    else:
        return "DEEP"

def prep_supplementary_data_for_icc(supplementary_data:pd.DataFrame) -> pd.DataFrame:
    supplementary_data["down_and_distance"] = supplementary_data.apply(
        lambda row: classify_down_distance(row["down"], row["yards_to_go"]), axis=1
    )

    supplementary_data["route_type_of_targeted_receiver"] = supplementary_data[
        "route_of_targeted_receiver"
    ].apply(map_route_family)

    supplementary_data["pass_length_bin"] = supplementary_data["pass_length"].apply(
        bin_pass_length
    )


    
    suppl_data_pre = supplementary_data[
        [
            "game_id",
            "play_id",
            "possession_team",
            "defensive_team",
            "down_and_distance",
            "receiver_alignment",
            "route_of_targeted_receiver",
            "route_type_of_targeted_receiver",
            "pass_length",
            "pass_length_bin",
            "dropback_type",
            # 'dropback_distance',
            "team_coverage_man_zone",
            "team_coverage_type",
            # "pass_result"
        ]
    ]
    return suppl_data_pre
suppl_data_pre = prep_supplementary_data_for_icc(supplementary_data)
print(suppl_data_pre.shape)
suppl_data_pre.head(3)

(18009, 13)


Unnamed: 0,game_id,play_id,possession_team,defensive_team,down_and_distance,receiver_alignment,route_of_targeted_receiver,route_type_of_targeted_receiver,pass_length,pass_length_bin,dropback_type,team_coverage_man_zone,team_coverage_type
0,2023090700,3461,DET,KC,3rd_long,3x2,IN,IN_BREAK,18,INTERMEDIATE,TRADITIONAL,ZONE_COVERAGE,COVER_2_ZONE
1,2023090700,461,DET,KC,1st_long,3x1,POST,VERTICAL,13,INTERMEDIATE,TRADITIONAL,ZONE_COVERAGE,COVER_6_ZONE
2,2023090700,1940,DET,KC,2nd_long,3x1,OUT,OUT_BREAK,18,INTERMEDIATE,TRADITIONAL,ZONE_COVERAGE,COVER_2_ZONE


In [18]:
suppl_data_pre['pass_length_bin'].value_counts()

pass_length_bin
SHORT           12089
INTERMEDIATE     3885
DEEP             2035
Name: count, dtype: int64

In [19]:
def bin_air_frames(n_frames):
    """
    n_frames: number of output frames while ball is in the air
    
    Bins by air time:
      QUICK  : <= 10 frames
      MEDIUM : 11-20 frames
      LONG   : > 20 frames
    """
    if pd.isna(n_frames):
        return "UNKNOWN"
    try:
        f = float(n_frames)
    except Exception:
        return "UNKNOWN"
    
    if f <= 10:
        return "QUICK"
    elif f <= 20:
        return "MEDIUM"
    else:
        return "LONG"

def prep_player_level_data_for_icc(
    train_input: pd.DataFrame, train_output: pd.DataFrame
) -> pd.DataFrame:
    qbs = train_input.loc[
        train_input["player_role"] == "Passer", ["game_id", "play_id", "nfl_id"]
    ].drop_duplicates()
    qbs = qbs.rename(columns={"nfl_id": "passer_nfl_id"})

    output_frame_count = (
        train_output.groupby(["game_id", "play_id"])["frame_id"].max().reset_index()
    )
    output_frame_count = output_frame_count.rename(
        columns={"frame_id": "num_output_frames"}
    )
    output_frame_count["num_output_frames_bin"] = output_frame_count[
        "num_output_frames"
    ].apply(bin_air_frames)

    train_input_pre = train_input[
        ["game_id", "play_id", "nfl_id", "player_position"]
    ].drop_duplicates()
    train_input_pre = train_input_pre.merge(qbs, on=["game_id", "play_id"], how="left")
    train_input_pre = train_input_pre.merge(
        output_frame_count, on=["game_id", "play_id"], how="left"
    )
    return train_input_pre

train_input_pre = prep_player_level_data_for_icc(train_input, train_output)
print(train_input_pre.shape)
train_input_pre.head(3)

(173150, 7)


Unnamed: 0,game_id,play_id,nfl_id,player_position,passer_nfl_id,num_output_frames,num_output_frames_bin
0,2023090700,101,54527,FS,43290.0,21,LONG
1,2023090700,101,46137,SS,43290.0,21,LONG
2,2023090700,101,52546,CB,43290.0,21,LONG


In [20]:
player_impacts = pd.read_csv('./outputs/defender_impact_log_wr.csv')
print(player_impacts.shape)
player_impacts = player_impacts.merge(supplementary_data[['game_id','play_id','pass_result', 'yards_gained', 'season','week','home_team_abbr','visitor_team_abbr','play_description', 'quarter','game_clock']],
        on=['game_id','play_id'], how='left')
print(player_impacts.shape)
player_impacts = player_impacts.merge(train_input_pre,
        on=['game_id','play_id','nfl_id'], how='left')
print(player_impacts.shape)
player_impacts = player_impacts.merge(suppl_data_pre,
        on=['game_id','play_id'], how='left')
print(player_impacts.shape)

player_impacts = player_impacts.merge(defender_type[['game_id','play_id','nfl_id','defender_type']],
                                      on = ['game_id','play_id','nfl_id'],
                                      how = 'left')
print(player_impacts.shape)

player_impacts = player_impacts.merge(potential_contact[['game_id','play_id','nfl_id', 'potential_contact']],
                                      on = ['game_id','play_id','nfl_id'],
                                      how = 'left')
print(player_impacts.shape)

recs = player_impacts.loc[
  player_impacts['player_role'] == 'Targeted Receiver',
  ['game_id','play_id','nfl_id']].drop_duplicates().rename(columns = {'nfl_id':'targeted_receiver_id'})

player_impacts = player_impacts.merge(recs,
                                      on= ['game_id','play_id'],
                                      how = 'left')
print(player_impacts.shape)

player_impacts.sort_values('delta', ascending = False).head(3)

(46045, 8)
(46045, 17)
(46045, 21)
(46045, 32)
(46045, 33)
(46045, 34)
(46045, 35)


Unnamed: 0,game_id,play_id,nfl_id,player_role,baseline_prob,real_prob,delta,fold,pass_result,yards_gained,season,week,home_team_abbr,visitor_team_abbr,play_description,quarter,game_clock,player_position,passer_nfl_id,num_output_frames,num_output_frames_bin,possession_team,defensive_team,down_and_distance,receiver_alignment,route_of_targeted_receiver,route_type_of_targeted_receiver,pass_length,pass_length_bin,dropback_type,team_coverage_man_zone,team_coverage_type,defender_type,potential_contact,targeted_receiver_id
28066,2023091701,3025,41282,Targeted Receiver,0.061843,0.844657,0.782814,4,I,0,2023,2,BUF,LV,(12:57) (Shotgun) J.Garoppolo pass incomplete ...,4,12:57,WR,41291.0,17,MEDIUM,LV,BUF,3rd_short,3x1,GO,VERTICAL,17,INTERMEDIATE,TRADITIONAL,ZONE_COVERAGE,COVER_6_ZONE,,0,41282
38324,2023091711,1082,55928,Targeted Receiver,0.123415,0.790457,0.667041,5,C,53,2023,2,DEN,WAS,(13:15) (Shotgun) R.Wilson pass deep left to M...,2,13:15,WR,38605.0,27,LONG,DEN,WAS,3rd_medium,2x2,GO,VERTICAL,45,DEEP,TRADITIONAL,MAN_COVERAGE,COVER_1_MAN,,0,55928
30835,2023101600,4288,47911,Targeted Receiver,0.179383,0.796526,0.617143,4,I,0,2023,6,LAC,DAL,(2:28) (Shotgun) D.Prescott pass incomplete de...,4,02:28,RB,43424.0,23,LONG,DAL,LAC,3rd_long,3x1,WHEEL,VERTICAL,21,DEEP,TRADITIONAL,ZONE_COVERAGE,COVER_6_ZONE,,0,47911


In [21]:
catch_probabilities = pd.read_csv('./outputs/catch_probabilities_log_wr.csv')
catch_probabilities = catch_probabilities.rename(columns={
  'pred_catch_prob_by_proj_traj': 'baseline_prob',
  'pred_catch_prob_by_real_traj': 'real_prob',
})
catch_probabilities['delta'] = catch_probabilities['real_prob'] - catch_probabilities['baseline_prob']

catch_probabilities
print(catch_probabilities.shape)
catch_probabilities = catch_probabilities.merge(supplementary_data[['game_id','play_id','pass_result', 'yards_gained', 'season','week','home_team_abbr','visitor_team_abbr','play_description', 'quarter','game_clock']],
        on=['game_id','play_id'], how='left')
print(catch_probabilities.shape)
catch_probabilities = catch_probabilities.merge(train_input_pre[['game_id','play_id','passer_nfl_id','num_output_frames', 'num_output_frames_bin']].drop_duplicates(),
        on=['game_id','play_id'], how='left')
print(catch_probabilities.shape)
catch_probabilities = catch_probabilities.merge(suppl_data_pre,
        on=['game_id','play_id'], how='left')
print(catch_probabilities.shape)
catch_probabilities.sort_values('delta', ascending = False).head(3)

(14108, 5)
(14108, 14)
(14108, 17)
(14108, 28)


Unnamed: 0,game_id,play_id,baseline_prob,real_prob,delta,pass_result,yards_gained,season,week,home_team_abbr,visitor_team_abbr,play_description,quarter,game_clock,passer_nfl_id,num_output_frames,num_output_frames_bin,possession_team,defensive_team,down_and_distance,receiver_alignment,route_of_targeted_receiver,route_type_of_targeted_receiver,pass_length,pass_length_bin,dropback_type,team_coverage_man_zone,team_coverage_type
3471,2023100805,2735,0.093919,0.741877,0.647958,C,3,2023,5,NE,NO,(9:12) (Shotgun) D.Carr pass short right to R....,3,09:12,41265.0,14,MEDIUM,NO,NE,2nd_medium,3x1,HITCH,STOP_HITCH,3,SHORT,TRADITIONAL,ZONE_COVERAGE,COVER_3_ZONE
11755,2023091711,1082,0.123415,0.744086,0.620671,C,53,2023,2,DEN,WAS,(13:15) (Shotgun) R.Wilson pass deep left to M...,2,13:15,38605.0,27,LONG,DEN,WAS,3rd_medium,2x2,GO,VERTICAL,45,DEEP,TRADITIONAL,MAN_COVERAGE,COVER_1_MAN
12691,2023110200,2677,0.215321,0.819346,0.604025,I,0,2023,9,PIT,TEN,(5:49) (Shotgun) K.Pickett pass incomplete sho...,3,05:49,54485.0,19,MEDIUM,PIT,TEN,3rd_short,3x1,GO,VERTICAL,12,INTERMEDIATE,TRADITIONAL,MAN_COVERAGE,COVER_1_MAN


In [22]:
min_plays = 30
player_stats = (player_impacts[player_impacts['player_role'] == 'Targeted Receiver']
    .groupby('nfl_id').agg(
      mean_delta=('delta', 'mean'),
      std_delta=('delta', 'std'),
      n_plays=('delta', 'count')
).query(f'n_plays >= {min_plays}')
)

In [23]:
# Variance between players (signal)
var_between = player_stats['mean_delta'].var()

# Average variance within players (noise)
var_within = (player_stats['std_delta'] ** 2).mean()

# ICC
icc = var_between / (var_between + var_within)
print(f"ICC for WR delta: {icc:.3f}")

ICC for WR delta: 0.024


In [24]:
from typing import Union, List, Dict, Callable

def apply_filter_dict(
    df: pd.DataFrame, 
    filter_dict: Dict[str, Union[str, List, Callable]]
) -> pd.DataFrame:
    filtered_df = df.copy()
    
    for col, condition in filter_dict.items():
        if callable(condition):
            # Lambda/function filter
            filtered_df = filtered_df[filtered_df[col].apply(condition)]
        elif isinstance(condition, list):
            # List of values (OR condition)
            filtered_df = filtered_df[filtered_df[col].isin(condition)]
        else:
            # Single value equality
            filtered_df = filtered_df[filtered_df[col] == condition]
    
    return filtered_df


def assess_icc_by_cut(
    df: pd.DataFrame, 
    min_plays: int,
    filter_dict: Dict[str, Union[str, List, Callable]] = None,
    groupby_cols: Union[str, List[str]] = 'nfl_id',
    label: str = 'default'
) -> Dict[str, float]:
    """
    Assess ICC with flexible filtering and grouping.
    
    Args:
        df: Input dataframe (player_impacts)
        min_plays: Minimum plays threshold
        filter_dict: Dictionary of {column: value/list/callable}
            Examples:
                {'player_role': 'Targeted Receiver'}
                {'player_role': ['Targeted Receiver', 'Defensive Coverage']}
                {'yards_gained': lambda x: x > 10}
        groupby_cols: Column(s) to group by. Can be string or list of strings
            Examples: 'nfl_id', ['nfl_id', 'quarter'], ['game_id', 'play_id']
    
    Returns:
        Dictionary with ICC metrics and metadata
    """
    filtered_df = df.copy()
    
    if filter_dict:
        filtered_df = apply_filter_dict(filtered_df, filter_dict)
    
    if isinstance(groupby_cols, str):
        groupby_cols = [groupby_cols]
    
    player_stats = (
        filtered_df
        .groupby(groupby_cols)
        .agg(
            mean_delta=('delta', 'mean'),
            std_delta=('delta', 'std'),
            n_plays=('delta', 'count')
        )
        .query(f'n_plays >= {min_plays}')
    )
    
    # Handle case where no groups survive
    if len(player_stats) == 0:
        return
    
    var_between = player_stats['mean_delta'].var()
    var_within = (player_stats['std_delta'] ** 2).mean()
    icc = var_between / (var_between + var_within) if (var_between + var_within) > 0 else np.nan
    
    return {
        'label': label,
        'icc': icc,
        'var_between': var_between,
        'var_within': var_within,
        'n_groups': len(player_stats),
        'groupby': groupby_cols,
        'filters': filter_dict,
    }

In [25]:
supplementary_data['team_coverage_man_zone'].value_counts()

team_coverage_man_zone
ZONE_COVERAGE    12783
MAN_COVERAGE      5221
Name: count, dtype: int64

In [26]:
# ======================
# Variation By Team - Unfiltered
# ======================

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      # filter_dict={'player_role': ['Targeted Receiver', 'Defensive Coverage']},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      # filter_dict={'player_role': 'Defensive Coverage'},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team'
  )
)

{'label': 'Variation by Defensive Team', 'icc': np.float64(0.0024441665827069545), 'var_between': np.float64(2.3870740334407182e-05), 'var_within': np.float64(0.00974254227885103), 'n_groups': 32, 'groupby': ['defensive_team'], 'filters': None}
{'label': 'Variation by Offensive Team', 'icc': np.float64(0.006384084965308083), 'var_between': np.float64(6.222867556966294e-05), 'var_within': np.float64(0.009685241151010238), 'n_groups': 32, 'groupby': ['possession_team'], 'filters': None}


In [27]:
# ======================
# Variation by Defensive Team
# ======================
print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      # filter_dict={'player_role': ['Targeted Receiver', 'Defensive Coverage']},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'down_and_distance': "3rd_long"},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team - On 3rd and Long'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'route_type_of_targeted_receiver': "VERTICAL"},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team - On Vertical Routes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'route_type_of_targeted_receiver': "OUT_BREAK"},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team - On Out Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'route_type_of_targeted_receiver': "IN_BREAK"},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team - On In Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'pass_length_bin': "SHORT"},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team - On Short Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'pass_length_bin': "INTERMEDIATE"},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team - On Intermediate Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'pass_length_bin': "DEEP"},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team - On Deep Pass Lengths'
  )
)


print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'num_output_frames_bin': "QUICK"},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team - On Quick Passes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'num_output_frames_bin': "MEDIUM"},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team - On Medium-Time Passes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'num_output_frames_bin': "LONG"},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team - On Long-Time Passes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'team_coverage_man_zone': "ZONE_COVERAGE"},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team - Playing Zone'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'team_coverage_man_zone': "MAN_COVERAGE"},
      groupby_cols='defensive_team',
      label = 'Variation by Defensive Team - Playing Man'
  )
)



{'label': 'Variation by Defensive Team', 'icc': np.float64(0.0024441665827069545), 'var_between': np.float64(2.3870740334407182e-05), 'var_within': np.float64(0.00974254227885103), 'n_groups': 32, 'groupby': ['defensive_team'], 'filters': None}
{'label': 'Variation by Defensive Team - On 3rd and Long', 'icc': np.float64(0.021007752004882686), 'var_between': np.float64(0.00024008474744227217), 'var_within': np.float64(0.011188303563047633), 'n_groups': 32, 'groupby': ['defensive_team'], 'filters': {'down_and_distance': '3rd_long'}}
{'label': 'Variation by Defensive Team - On Vertical Routes', 'icc': np.float64(0.014976087959534212), 'var_between': np.float64(0.0002896495692529152), 'var_within': np.float64(0.019051153585453177), 'n_groups': 32, 'groupby': ['defensive_team'], 'filters': {'route_type_of_targeted_receiver': 'VERTICAL'}}
{'label': 'Variation by Defensive Team - On Out Breaking Routes', 'icc': np.float64(0.0060740782368218825), 'var_between': np.float64(4.014316867018345e-05

In [28]:
# ======================
# Variation by Offensive Team
# ======================
print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      # filter_dict={'player_role': ['Targeted Receiver', 'Defensive Coverage']},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'down_and_distance': "3rd_long"},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team - On 3rd and Long'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'route_type_of_targeted_receiver': "VERTICAL"},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team - On Vertical Routes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'route_type_of_targeted_receiver': "OUT_BREAK"},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team - On Out Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'route_type_of_targeted_receiver': "IN_BREAK"},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team - On In Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'pass_length_bin': "SHORT"},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team - On Short Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'pass_length_bin': "INTERMEDIATE"},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team - On Intermediate Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'pass_length_bin': "DEEP"},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team - On Deep Pass Lengths'
  )
)


print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'num_output_frames_bin': "QUICK"},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team - On Quick Passes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'num_output_frames_bin': "MEDIUM"},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team - On Medium-Time Passes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'num_output_frames_bin': "LONG"},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team - On Long-Time Passes'
  )
)


print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'team_coverage_man_zone': "MAN_COVERAGE"},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team - Playing Man'
  )
)
print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'team_coverage_man_zone': "ZONE_COVERAGE"},
      groupby_cols='possession_team',
      label = 'Variation by Offensive Team - Playing Zone'
  )
)

{'label': 'Variation by Offensive Team', 'icc': np.float64(0.006384084965308083), 'var_between': np.float64(6.222867556966294e-05), 'var_within': np.float64(0.009685241151010238), 'n_groups': 32, 'groupby': ['possession_team'], 'filters': None}
{'label': 'Variation by Offensive Team - On 3rd and Long', 'icc': np.float64(0.024899291587198582), 'var_between': np.float64(0.00028549918125521725), 'var_within': np.float64(0.011180657606996554), 'n_groups': 32, 'groupby': ['possession_team'], 'filters': {'down_and_distance': '3rd_long'}}
{'label': 'Variation by Offensive Team - On Vertical Routes', 'icc': np.float64(0.018008561640585664), 'var_between': np.float64(0.0003506421957167968), 'var_within': np.float64(0.019120218537911092), 'n_groups': 32, 'groupby': ['possession_team'], 'filters': {'route_type_of_targeted_receiver': 'VERTICAL'}}
{'label': 'Variation by Offensive Team - On Out Breaking Routes', 'icc': np.float64(0.011494683916281851), 'var_between': np.float64(7.669383025247356e-0

In [29]:
# ======================
# Variation by Passer
# ======================
print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      # filter_dict={'player_role': ['Targeted Receiver', 'Defensive Coverage']},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'down_and_distance': "3rd_long"},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer - On 3rd and Long'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'route_type_of_targeted_receiver': "VERTICAL"},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer - On Vertical Routes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'route_type_of_targeted_receiver': "OUT_BREAK"},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer - On Out Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'route_type_of_targeted_receiver': "IN_BREAK"},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer - On In Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'pass_length_bin': "SHORT"},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer - On Short Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'pass_length_bin': "INTERMEDIATE"},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer - On Intermediate Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'pass_length_bin': "DEEP"},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer - On Deep Pass Lengths'
  )
)


print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'num_output_frames_bin': "QUICK"},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer - On Quick Passes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'num_output_frames_bin': "MEDIUM"},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer - On Medium-Time Passes'
  )
)

print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'num_output_frames_bin': "LONG"},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer - On Long-Time Passes'
  )
)


print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'team_coverage_man_zone': "MAN_COVERAGE"},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer - Playing Man'
  )
)
print(
  assess_icc_by_cut(
      catch_probabilities,
      min_plays=20,
      filter_dict={'team_coverage_man_zone': "ZONE_COVERAGE"},
      groupby_cols='passer_nfl_id',
      label = 'Variation by Passer - Playing Zone'
  )
)

{'label': 'Variation by Passer', 'icc': np.float64(0.019864380528396265), 'var_between': np.float64(0.00019785043245144806), 'var_within': np.float64(0.00976221009743115), 'n_groups': 66, 'groupby': ['passer_nfl_id'], 'filters': None}
{'label': 'Variation by Passer - On 3rd and Long', 'icc': np.float64(0.05249238594811048), 'var_between': np.float64(0.0006159101257676928), 'var_within': np.float64(0.011117412996113818), 'n_groups': 35, 'groupby': ['passer_nfl_id'], 'filters': {'down_and_distance': '3rd_long'}}
{'label': 'Variation by Passer - On Vertical Routes', 'icc': np.float64(0.025337759141313976), 'var_between': np.float64(0.0005026945003795428), 'var_within': np.float64(0.019337043401299554), 'n_groups': 42, 'groupby': ['passer_nfl_id'], 'filters': {'route_type_of_targeted_receiver': 'VERTICAL'}}
{'label': 'Variation by Passer - On Out Breaking Routes', 'icc': np.float64(0.019809287357455584), 'var_between': np.float64(0.00013491319236826516), 'var_within': np.float64(0.00667568

In [30]:
# ======================
# Variation by Receiver (All Roles)
# ======================

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=15,
      filter_dict={'player_role': 'Targeted Receiver'},
      groupby_cols='nfl_id',
      label = 'Variation by Receiver ID - All'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Targeted Receiver',
        'route_type_of_targeted_receiver': "VERTICAL"},
      groupby_cols='nfl_id',
      label = 'Variation by Receiver ID - On Vertical Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Targeted Receiver',
        'route_type_of_targeted_receiver': "OUT_BREAK"},
      groupby_cols='nfl_id',
      label = 'Variation by Receiver ID - On Out Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Targeted Receiver',
        'route_type_of_targeted_receiver': "IN_BREAK"},
      groupby_cols='nfl_id',
      label = 'Variation by Receiver ID - On In Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Targeted Receiver',
        'pass_length_bin': "SHORT"},
      groupby_cols='nfl_id',
      label = 'Variation by Receiver ID - On Short Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Targeted Receiver',
        'pass_length_bin': "INTERMEDIATE"},
      groupby_cols='nfl_id',
      label = 'Variation by Receiver ID - On Intermediate Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Targeted Receiver',
        'pass_length_bin': "DEEP"},
      groupby_cols='nfl_id',
      label = 'Variation by Receiver ID - On Deep Pass Lengths'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Targeted Receiver',
        'num_output_frames_bin': "QUICK"},
      groupby_cols='nfl_id',
      label = 'Variation by Receiver ID - On Quick Passes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Targeted Receiver',
        'num_output_frames_bin': "MEDIUM"},
      groupby_cols='nfl_id',
      label = 'Variation by Receiver ID - On Medium-Time Passes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Targeted Receiver',
        'num_output_frames_bin': "LONG"},
      groupby_cols='nfl_id',
      label = 'Variation by Receiver ID - On Long-Time Passes'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Targeted Receiver',
        'team_coverage_man_zone': "MAN_COVERAGE"},
      groupby_cols='nfl_id',
      label = 'Variation by Receiver ID - Playing Man'
  )
)
print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Targeted Receiver',
        'team_coverage_man_zone': "ZONE_COVERAGE"},
      groupby_cols='nfl_id',
      label = 'Variation by Receiver ID - Playing Zone'
  )
)


{'label': 'Variation by Receiver ID - All', 'icc': np.float64(0.03809566675931437), 'var_between': np.float64(0.00032002050987322336), 'var_within': np.float64(0.008080423348875581), 'n_groups': 261, 'groupby': ['nfl_id'], 'filters': {'player_role': 'Targeted Receiver'}}
{'label': 'Variation by Receiver ID - On Vertical Routes', 'icc': np.float64(0.02640560734654766), 'var_between': np.float64(0.0004996064438499327), 'var_within': np.float64(0.018420861367895083), 'n_groups': 41, 'groupby': ['nfl_id'], 'filters': {'player_role': 'Targeted Receiver', 'route_type_of_targeted_receiver': 'VERTICAL'}}
{'label': 'Variation by Receiver ID - On Out Breaking Routes', 'icc': np.float64(0.04139825724709801), 'var_between': np.float64(0.0002556304196513031), 'var_within': np.float64(0.00591927733372333), 'n_groups': 65, 'groupby': ['nfl_id'], 'filters': {'player_role': 'Targeted Receiver', 'route_type_of_targeted_receiver': 'OUT_BREAK'}}
{'label': 'Variation by Receiver ID - On In Breaking Routes'

In [31]:
# ======================
# Variation by Defender (All Roles)
# ======================

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={'player_role': 'Defensive Coverage'},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - All'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        'route_type_of_targeted_receiver': "VERTICAL"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - On Vertical Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        'route_type_of_targeted_receiver': "OUT_BREAK"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - On Out Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        'route_type_of_targeted_receiver': "IN_BREAK"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - On In Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        'pass_length_bin': "SHORT"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - On Short Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        'pass_length_bin': "INTERMEDIATE"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - On Intermediate Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        'pass_length_bin': "DEEP"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - On Deep Pass Lengths'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        'num_output_frames_bin': "QUICK"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - On Quick Passes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        'num_output_frames_bin': "MEDIUM"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - On Medium-Time Passes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        'num_output_frames_bin': "LONG"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - On Long-Time Passes'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        'team_coverage_man_zone': "MAN_COVERAGE"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - Playing Man'
  )
)
print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        'team_coverage_man_zone': "ZONE_COVERAGE"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - Playing Zone'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Converging"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - When Converging'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Converging",
        'pass_length_bin': ["INTERMEDIATE", "DEEP"]},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - When Converging Deep'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Converging",
        # 'pass_length_bin': ["INTERMEDIATE", "DEEP"],
        'route_type_of_targeted_receiver': "VERTICAL"
        },
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - When Converging Verts'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Converging",
        'pass_length_bin': ["INTERMEDIATE", "DEEP"],
        'route_type_of_targeted_receiver': "VERTICAL"
        },
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - When Converging Deep Verts'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=15,
      filter_dict={
        "player_role": 'Defensive Coverage',
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Converging",
        # 'pass_length_bin': ["INTERMEDIATE", "DEEP"],
        # 'route_type_of_targeted_receiver': "VERTICAL",
        'team_coverage_man_zone': "MAN_COVERAGE"

        },
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - When Converging in Man'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Trailing"},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - When Trailing'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "potential_contact": 1},
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - Potential Contact'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        # "defender_type": 'Trailing',
        # 'team_coverage_man_zone': "ZONE_COVERAGE"},
        "potential_contact": 1},
      groupby_cols='targeted_receiver_id',
      label = 'Variation by Targeted Receiver ID - Potential Contact'
  )
)

{'label': 'Variation by Defender ID - All', 'icc': np.float64(0.02034253424080712), 'var_between': np.float64(3.0732156137650575e-05), 'var_within': np.float64(0.0014800017462294338), 'n_groups': 379, 'groupby': ['nfl_id'], 'filters': {'player_role': 'Defensive Coverage'}}
{'label': 'Variation by Defender ID - On Vertical Routes', 'icc': np.float64(0.034711370591300736), 'var_between': np.float64(8.42000238901908e-05), 'var_within': np.float64(0.002341518766692304), 'n_groups': 201, 'groupby': ['nfl_id'], 'filters': {'player_role': 'Defensive Coverage', 'route_type_of_targeted_receiver': 'VERTICAL'}}
{'label': 'Variation by Defender ID - On Out Breaking Routes', 'icc': np.float64(0.036841587929610814), 'var_between': np.float64(4.9258475858341373e-05), 'var_within': np.float64(0.0012877760719590368), 'n_groups': 125, 'groupby': ['nfl_id'], 'filters': {'player_role': 'Defensive Coverage', 'route_type_of_targeted_receiver': 'OUT_BREAK'}}
{'label': 'Variation by Defender ID - On In Breaki

In [32]:
# ======================
# Variation by Safety (All Roles)
# ======================

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=15,
      filter_dict={'player_position': ['SS', 'FS', 'S']},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - All'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        'route_type_of_targeted_receiver': "VERTICAL"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - On Vertical Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        'route_type_of_targeted_receiver': "OUT_BREAK"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - On Out Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        'route_type_of_targeted_receiver': "IN_BREAK"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - On In Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        'pass_length_bin': "SHORT"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - On Short Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        'pass_length_bin': "INTERMEDIATE"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - On Intermediate Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        'pass_length_bin': "DEEP"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - On Deep Pass Lengths'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        'num_output_frames_bin': "QUICK"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - On Quick Passes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        'num_output_frames_bin': "MEDIUM"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - On Medium-Time Passes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        'num_output_frames_bin': "LONG"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - On Long-Time Passes'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        'team_coverage_man_zone': "MAN_COVERAGE"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - Playing Man'
  )
)
print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        'team_coverage_man_zone': "ZONE_COVERAGE"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - Playing Zone'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Converging"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - When Converging'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Converging",
        'pass_length_bin': ["INTERMEDIATE", "DEEP"]
        },
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - When Converging Deep'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['SS', 'FS', 'S'],
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Trailing"},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - When Trailing'
  )
)

{'label': 'Variation by Safety ID - All', 'icc': np.float64(0.019683230882451268), 'var_between': np.float64(2.1158859613167255e-05), 'var_within': np.float64(0.0010538099673811651), 'n_groups': 129, 'groupby': ['nfl_id'], 'filters': {'player_position': ['SS', 'FS', 'S']}}
{'label': 'Variation by Safety ID - On Vertical Routes', 'icc': np.float64(0.028057073177298858), 'var_between': np.float64(3.712946437341381e-05), 'var_within': np.float64(0.0012862254037122381), 'n_groups': 84, 'groupby': ['nfl_id'], 'filters': {'player_position': ['SS', 'FS', 'S'], 'route_type_of_targeted_receiver': 'VERTICAL'}}
{'label': 'Variation by Safety ID - On Out Breaking Routes', 'icc': np.float64(0.06119059756554727), 'var_between': np.float64(7.661250232353255e-05), 'var_within': np.float64(0.0011754181260988383), 'n_groups': 20, 'groupby': ['nfl_id'], 'filters': {'player_position': ['SS', 'FS', 'S'], 'route_type_of_targeted_receiver': 'OUT_BREAK'}}
{'label': 'Variation by Safety ID - On In Breaking Rou

In [33]:
# ======================
# Variation by Cornerback (All Roles)
# ======================

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=15,
      filter_dict={'player_position': ['CB']},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - All'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        'route_type_of_targeted_receiver': "VERTICAL"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - On Vertical Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        'route_type_of_targeted_receiver': "OUT_BREAK"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - On Out Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        'route_type_of_targeted_receiver': "IN_BREAK"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - On In Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        'pass_length_bin': "SHORT"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - On Short Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        'pass_length_bin': "INTERMEDIATE"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - On Intermediate Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        'pass_length_bin': "DEEP"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - On Deep Pass Lengths'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        'num_output_frames_bin': "QUICK"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - On Quick Passes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        'num_output_frames_bin': "MEDIUM"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - On Medium-Time Passes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        'num_output_frames_bin': "LONG"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - On Long-Time Passes'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        'team_coverage_man_zone': "MAN_COVERAGE"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - Playing Man'
  )
)
print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        'team_coverage_man_zone': "ZONE_COVERAGE"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - Playing Zone'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Converging"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - When Converging'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['CB'],
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Trailing"},
      groupby_cols='nfl_id',
      label = 'Variation by Cornerback ID - When Trailing'
  )
)

{'label': 'Variation by Cornerback ID - All', 'icc': np.float64(0.01941472375924402), 'var_between': np.float64(5.041677475493616e-05), 'var_within': np.float64(0.002546415164763698), 'n_groups': 156, 'groupby': ['nfl_id'], 'filters': {'player_position': ['CB']}}
{'label': 'Variation by Cornerback ID - On Vertical Routes', 'icc': np.float64(0.03464135564254506), 'var_between': np.float64(0.0001460784962773016), 'var_within': np.float64(0.00407080313458746), 'n_groups': 83, 'groupby': ['nfl_id'], 'filters': {'player_position': ['CB'], 'route_type_of_targeted_receiver': 'VERTICAL'}}
{'label': 'Variation by Cornerback ID - On Out Breaking Routes', 'icc': np.float64(0.030632824545953598), 'var_between': np.float64(4.645433438114636e-05), 'var_within': np.float64(0.0014700344344380086), 'n_groups': 83, 'groupby': ['nfl_id'], 'filters': {'player_position': ['CB'], 'route_type_of_targeted_receiver': 'OUT_BREAK'}}
{'label': 'Variation by Cornerback ID - On In Breaking Routes', 'icc': np.float6

In [34]:
# ======================
# Variation by Free Safety (All Roles)
# ======================

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=15,
      filter_dict={'player_position': ['FS']},
      groupby_cols='nfl_id',
      label = 'Variation by Safety ID - All'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['FS'],
        'route_type_of_targeted_receiver': "VERTICAL"},
      groupby_cols='nfl_id',
      label = 'Variation by Free Safety ID - On Vertical Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['FS'],
        'route_type_of_targeted_receiver': "OUT_BREAK"},
      groupby_cols='nfl_id',
      label = 'Variation by Free Safety ID - On Out Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['FS'],
        'route_type_of_targeted_receiver': "IN_BREAK"},
      groupby_cols='nfl_id',
      label = 'Variation by Free Safety ID - On In Breaking Routes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['FS'],
        'pass_length_bin': "SHORT"},
      groupby_cols='nfl_id',
      label = 'Variation by Free Safety ID - On Short Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['FS'],
        'pass_length_bin': "INTERMEDIATE"},
      groupby_cols='nfl_id',
      label = 'Variation by Free Safety ID - On Intermediate Pass Lengths'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['FS'],
        'pass_length_bin': "DEEP"},
      groupby_cols='nfl_id',
      label = 'Variation by Free Safety ID - On Deep Pass Lengths'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['FS'],
        'num_output_frames_bin': "QUICK"},
      groupby_cols='nfl_id',
      label = 'Variation by Free Safety ID - On Quick Passes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['FS'],
        'num_output_frames_bin': "MEDIUM"},
      groupby_cols='nfl_id',
      label = 'Variation by Free Safety ID - On Medium-Time Passes'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['FS'],
        'num_output_frames_bin': "LONG"},
      groupby_cols='nfl_id',
      label = 'Variation by Free Safety ID - On Long-Time Passes'
  )
)


print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['FS'],
        'team_coverage_man_zone': "MAN_COVERAGE"},
      groupby_cols='nfl_id',
      label = 'Variation by Free Safety ID - Playing Man'
  )
)
print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_position": ['FS'],
        'team_coverage_man_zone': "ZONE_COVERAGE"},
      groupby_cols='nfl_id',
      label = 'Variation by Free Safety ID - Playing Zone'
  )
)

{'label': 'Variation by Safety ID - All', 'icc': np.float64(0.014675310320779969), 'var_between': np.float64(1.641268236869738e-05), 'var_within': np.float64(0.001101974732271341), 'n_groups': 73, 'groupby': ['nfl_id'], 'filters': {'player_position': ['FS']}}
{'label': 'Variation by Free Safety ID - On Vertical Routes', 'icc': np.float64(0.0284152227397717), 'var_between': np.float64(4.1573189768481036e-05), 'var_within': np.float64(0.0014214873024617147), 'n_groups': 49, 'groupby': ['nfl_id'], 'filters': {'player_position': ['FS'], 'route_type_of_targeted_receiver': 'VERTICAL'}}
{'label': 'Variation by Free Safety ID - On Out Breaking Routes', 'icc': np.float64(0.07585488563102841), 'var_between': np.float64(7.558251228610021e-05), 'var_within': np.float64(0.0009208267718007144), 'n_groups': 9, 'groupby': ['nfl_id'], 'filters': {'player_position': ['FS'], 'route_type_of_targeted_receiver': 'OUT_BREAK'}}
{'label': 'Variation by Free Safety ID - On In Breaking Routes', 'icc': np.float64

In [65]:
print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=20,
      filter_dict={
        "player_role": 'Defensive Coverage',
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Converging",
        'pass_length_bin': ["INTERMEDIATE", "DEEP"],
        'route_type_of_targeted_receiver': "VERTICAL"
        },
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - When Converging Deep Verts'
  )
)

print(
  assess_icc_by_cut(
      player_impacts,
      min_plays=15,
      filter_dict={
        "player_role": 'Defensive Coverage',
        # 'team_coverage_man_zone': "ZONE_COVERAGE",
        "defender_type": "Converging",
        'pass_length_bin': ["INTERMEDIATE", "DEEP"],
        # 'route_type_of_targeted_receiver': "VERTICAL",
        # 'team_coverage_man_zone': "MAN_COVERAGE"
        },
      groupby_cols='nfl_id',
      label = 'Variation by Defender ID - When Converging in Man'
  )
)


{'label': 'Variation by Defender ID - When Converging Deep Verts', 'icc': np.float64(0.031977403505987614), 'var_between': np.float64(2.0974946855212186e-05), 'var_within': np.float64(0.0006349553212569167), 'n_groups': 86, 'groupby': ['nfl_id'], 'filters': {'player_role': 'Defensive Coverage', 'defender_type': 'Converging', 'pass_length_bin': ['INTERMEDIATE', 'DEEP'], 'route_type_of_targeted_receiver': 'VERTICAL'}}
{'label': 'Variation by Defender ID - When Converging in Man', 'icc': np.float64(0.036540023513031705), 'var_between': np.float64(1.992876853372119e-05), 'var_within': np.float64(0.000525466844761759), 'n_groups': 252, 'groupby': ['nfl_id'], 'filters': {'player_role': 'Defensive Coverage', 'defender_type': 'Converging', 'pass_length_bin': ['INTERMEDIATE', 'DEEP']}}


In [None]:
rec_filtered = player_impacts[(player_impacts['player_role'] == 'Defensive Coverage')&
                          (player_impacts['potential_contact'] == 1)
                          # player_impacts['defender_type'].isin(['Trailing'])
                          # player_impacts['pass_length_bin'].isin(['INTERMEDIATE', 'DEEP'])
                          # (player_impacts['route_type_of_targeted_receiver'] == "VERTICAL")
                          # (player_impacts['team_coverage_man_zone'] == "MAN_COVERAGE")
                          # &(player_impacts['pass_length_bin'].isin(['DEEP']))
                          ].copy()

rec_filtered[rec_filtered['delta'] > 0.05].groupby('targeted_receiver_id').count()[['play_id']].reset_index().sort_values('play_id',ascending = False)
rec_filtered[rec_filtered['delta'] > 0.05].groupby('targeted_receiver_id').count()[['nfl_id']].reset_index().sort_values('play_id',ascending = False)
# rec_res = rec_filtered.groupby('targeted_receiver_id').agg(
#     mean_delta=('delta', 'mean'),
#     std_delta=('delta', 'std'),
#     n_plays=('delta', 'count')
# ).query('n_plays >= 15')

# rec_res.reset_index(inplace = True)

# rec_res.sort_values('mean_delta', ascending= False)

Unnamed: 0,targeted_receiver_id,play_id
129,54475,10
1,39973,9
11,42347,9
137,54518,8
76,47859,8
...,...,...
68,46992,1
67,46830,1
66,46705,1
65,46279,1


In [None]:
# KEEP THIS, this is good for help coverage analysis
filtered = player_impacts[(player_impacts['player_role'] == 'Defensive Coverage')&
                          player_impacts['defender_type'].isin(['Converging'])&
                          player_impacts['pass_length_bin'].isin(['INTERMEDIATE', 'DEEP'])
                          # (player_impacts['route_type_of_targeted_receiver'] == "VERTICAL")
                          # (player_impacts['team_coverage_man_zone'] == "MAN_COVERAGE")
                          # &(player_impacts['pass_length_bin'].isin(['DEEP']))
                          ]
res = filtered.groupby('nfl_id').agg(
    mean_delta=('delta', 'mean'),
    std_delta=('delta', 'std'),
    n_plays=('delta', 'count')
).query('n_plays >= 15')

res.reset_index(inplace= True)
res.sort_values('mean_delta', ascending= False)

filtered[filtered['delta']  -.01].groupby('nfl_id').count().reset_index().sort_values('play_id', ascending = False)

Unnamed: 0,nfl_id,game_id,play_id,player_role,baseline_prob,real_prob,delta,fold,pass_result,yards_gained,season,week,home_team_abbr,visitor_team_abbr,play_description,quarter,game_clock,player_position,passer_nfl_id,num_output_frames,num_output_frames_bin,possession_team,defensive_team,down_and_distance,receiver_alignment,route_of_targeted_receiver,route_type_of_targeted_receiver,pass_length,pass_length_bin,dropback_type,team_coverage_man_zone,team_coverage_type,defender_type,potential_contact,targeted_receiver_id
62,45004,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8
61,44962,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
113,47974,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
91,46699,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6
117,49410,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,46657,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
92,46700,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
94,46775,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
95,46807,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1


In [97]:
filtered[filtered['nfl_id'] == 45004].sort_values('delta', ascending= False).head(20)

Unnamed: 0,game_id,play_id,nfl_id,player_role,baseline_prob,real_prob,delta,fold,pass_result,yards_gained,season,week,home_team_abbr,visitor_team_abbr,play_description,quarter,game_clock,player_position,passer_nfl_id,num_output_frames,num_output_frames_bin,possession_team,defensive_team,down_and_distance,receiver_alignment,route_of_targeted_receiver,route_type_of_targeted_receiver,pass_length,pass_length_bin,dropback_type,team_coverage_man_zone,team_coverage_type,defender_type,potential_contact,targeted_receiver_id
5830,2023112605,1823,45004,Defensive Coverage,0.463136,0.664395,0.201259,1,I,0,2023,12,TEN,CAR,(1:05) (Shotgun) W.Levis pass incomplete short...,2,01:05,FS,55898.0,19,MEDIUM,TEN,CAR,3rd_medium,3x1,WHEEL,VERTICAL,15,INTERMEDIATE,TRADITIONAL,MAN_COVERAGE,COVER_1_MAN,Converging,0,55946
23626,2023120304,1700,45004,Defensive Coverage,0.104038,0.261975,0.157936,3,IN,0,2023,13,TB,CAR,(2:31) (Shotgun) B.Mayfield pass deep left int...,2,02:31,FS,46070.0,27,LONG,TB,CAR,3rd_short,3x1,GO,VERTICAL,28,DEEP,TRADITIONAL,ZONE_COVERAGE,COVER_3_ZONE,Converging,0,41233
36030,2023121709,2202,45004,Defensive Coverage,0.207544,0.329517,0.121973,4,I,0,2023,15,CAR,ATL,(11:37) D.Ridder pass incomplete deep right to...,3,11:37,FS,54539.0,14,MEDIUM,ATL,CAR,2nd_long,2x1,CORNER,VERTICAL,19,INTERMEDIATE,DESIGNED_ROLLOUT_RIGHT,ZONE_COVERAGE,COVER_6_ZONE,Converging,0,54473
39166,2023092409,570,45004,Defensive Coverage,0.845544,0.890875,0.045331,5,C,20,2023,3,SEA,CAR,(7:30) G.Smith pass deep left to D.Metcalf to ...,1,07:30,FS,39987.0,13,MEDIUM,SEA,CAR,1st_long,2x2,HITCH,STOP_HITCH,18,INTERMEDIATE,TRADITIONAL,ZONE_COVERAGE,COVER_6_ZONE,Converging,0,47847
28124,2023091800,1340,45004,Defensive Coverage,0.012735,0.051037,0.038302,4,I,0,2023,2,CAR,NO,"(11:03) (No Huddle, Shotgun) D.Carr pass incom...",2,11:03,FS,41265.0,23,LONG,NO,CAR,2nd_long,2x2,IN,IN_BREAK,23,DEEP,TRADITIONAL,ZONE_COVERAGE,COVER_3_ZONE,Converging,0,43336
27054,2024010705,519,45004,Defensive Coverage,0.63315,0.645653,0.012503,3,I,0,2023,18,CAR,TB,(5:56) (Shotgun) B.Mayfield pass incomplete de...,1,05:56,FS,46070.0,27,LONG,TB,CAR,3rd_medium,3x1,GO,VERTICAL,38,DEEP,TRADITIONAL,ZONE_COVERAGE,COVER_6_ZONE,Converging,0,41233
5867,2023112605,2967,45004,Defensive Coverage,0.565379,0.57683,0.01145,1,C,19,2023,12,TEN,CAR,(:24) W.Levis pass deep left to D.Hopkins push...,3,00:24,FS,55898.0,15,MEDIUM,TEN,CAR,1st_long,2x1,OUT,OUT_BREAK,18,INTERMEDIATE,TRADITIONAL,ZONE_COVERAGE,COVER_6_ZONE,Converging,0,39973
22546,2023111900,2410,45004,Defensive Coverage,0.125161,0.135506,0.010345,3,C,12,2023,11,CAR,DAL,(12:57) D.Prescott pass short right to B.Cooks...,3,12:57,FS,43424.0,11,MEDIUM,DAL,CAR,2nd_medium,2x1,HITCH,STOP_HITCH,12,INTERMEDIATE,TRADITIONAL,ZONE_COVERAGE,COVER_3_ZONE,Converging,0,41246
7379,2023122403,1323,45004,Defensive Coverage,0.589858,0.597526,0.007668,1,C,13,2023,16,CAR,GB,(10:28) (Shotgun) J.Love pass short middle to ...,2,10:28,FS,52434.0,11,MEDIUM,GB,CAR,1st_long,2x2,IN,IN_BREAK,13,INTERMEDIATE,TRADITIONAL,ZONE_COVERAGE,COVER_3_ZONE,Converging,0,54597
7496,2023122403,4065,45004,Defensive Coverage,0.425641,0.428672,0.003031,1,C,36,2023,16,CAR,GB,(2:39) (Shotgun) J.Love pass deep left to R.Do...,4,02:39,FS,52434.0,27,LONG,GB,CAR,3rd_medium,2x2,GO,VERTICAL,33,DEEP,TRADITIONAL,ZONE_COVERAGE,COVER_3_ZONE,Converging,0,54597


In [93]:
denom = filtered.groupby('nfl_id').count().reset_index().sort_values('play_id', ascending = False)[['nfl_id','play_id']]
num = filtered[filtered['delta'] < -.01].groupby('nfl_id').count().reset_index().sort_values('play_id', ascending = False)[['nfl_id','play_id']]
f = num.merge(denom, on = 'nfl_id').rename(columns = {'play_id_x': 'num',
                                                      'play_id_y': 'denom'})
f['rate'] = f['num']/f['denom']
f.sort_values('num', ascending = False).head(1)


Unnamed: 0,nfl_id,num,denom,rate
0,53554,11,81,0.135802


In [94]:
test = pd.read_csv('./outputs/defender_impact_log_wr.csv')
print(test.shape)
test = test.merge(supplementary_data[['game_id','play_id','pass_result', 'yards_gained', 'season','week','home_team_abbr','visitor_team_abbr','play_description', 'quarter','game_clock']],
        on=['game_id','play_id'], how='left')
test = test.merge(train_input[['nfl_id','player_name']].drop_duplicates(), on='nfl_id', how='left')
test[test['nfl_id'] == 45004].sort_values('delta').head(2)

(46045, 8)


Unnamed: 0,game_id,play_id,nfl_id,player_role,baseline_prob,real_prob,delta,fold,pass_result,yards_gained,season,week,home_team_abbr,visitor_team_abbr,play_description,quarter,game_clock,player_name
8100,2023123106,1090,45004,Defensive Coverage,0.436152,0.186293,-0.24986,1,C,4,2023,17,JAX,CAR,(10:38) (Shotgun) C.Beathard pass short right ...,2,10:38,Xavier Woods
16227,2023121005,3029,45004,Defensive Coverage,0.472147,0.244031,-0.228116,2,I,0,2023,14,NO,CAR,(12:27) (Shotgun) D.Carr pass incomplete short...,4,12:27,Xavier Woods
