# NFL Big Data Bowl 2026 - Exploratory Data Analysis

This notebook explores the tracking data and identifies interesting plays for broadcast visualization.

**Goal**: Find compelling plays that showcase player movement while the ball is in the air.

In [None]:
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from data.loader import NFLDataLoader, extract_ball_in_air_frames
from visualization.field import NFLField

# Set plotting style
plt.style.use('dark_background')
sns.set_palette('husl')

%matplotlib inline

## 1. Load Data

In [None]:
# Initialize data loader
loader = NFLDataLoader(data_dir='../data/raw')

# Load metadata
games = loader.games
plays = loader.plays
players = loader.players

print(f"Games: {len(games)}")
print(f"Plays: {len(plays)}")
print(f"Players: {len(players)}")

In [None]:
# Display sample data
games.head()

In [None]:
plays.head()

## 2. Filter for Pass Plays

Focus on pass plays where the ball is in the air.

In [None]:
# Get all pass plays
pass_plays = loader.get_pass_plays(pass_result=['C', 'I', 'IN'])

print(f"Total pass plays: {len(pass_plays)}")
print(f"\nPass result distribution:")
print(pass_plays['passResult'].value_counts())

In [None]:
# Distribution of pass lengths (air yards)
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(pass_plays['passLength'].dropna(), bins=50, edgecolor='black')
plt.xlabel('Pass Length (Air Yards)')
plt.ylabel('Frequency')
plt.title('Distribution of Pass Lengths')
plt.axvline(pass_plays['passLength'].median(), color='red', linestyle='--', label='Median')
plt.legend()

plt.subplot(1, 2, 2)
sns.boxplot(data=pass_plays, x='passResult', y='passLength')
plt.xlabel('Pass Result')
plt.ylabel('Pass Length (Air Yards)')
plt.title('Pass Length by Result')

plt.tight_layout()
plt.show()

## 3. Identify Interesting Plays for Visualization

Find plays that would make compelling broadcast visualizations:
- Deep shots (20+ air yards)
- Contested catches
- Interceptions
- Coverage breakdowns

In [None]:
# Deep completions
deep_completions = loader.get_pass_plays(
    min_air_yards=30,
    pass_result=['C']
)

print(f"Deep completions (30+ yards): {len(deep_completions)}")
print("\nTop 5 longest completions:")
print(deep_completions.nlargest(5, 'passLength')[[
    'gameId', 'playId', 'possessionTeam', 'passLength', 'playDescription'
]])

In [None]:
# Interceptions
interceptions = loader.get_pass_plays(pass_result=['IN'])

print(f"Interceptions: {len(interceptions)}")
print("\nSample interceptions:")
print(interceptions.head()[[
    'gameId', 'playId', 'possessionTeam', 'passLength', 'playDescription'
]])

## 4. Load and Visualize a Sample Play

Pick an interesting play and visualize it.

In [None]:
# Select first deep completion
sample_play = deep_completions.iloc[0]
game_id = sample_play['gameId']
play_id = sample_play['playId']

print(f"Game ID: {game_id}")
print(f"Play ID: {play_id}")
print(f"Description: {sample_play['playDescription']}")
print(f"Air Yards: {sample_play['passLength']}")

In [None]:
# Get play metadata
metadata = loader.get_play_metadata(game_id, play_id)
print("\nPlay Metadata:")
for key, value in metadata.items():
    print(f"{key}: {value}")

In [None]:
# Load tracking data for this play
tracking = loader.get_play_tracking(game_id, play_id, week=metadata['week'])

print(f"Total frames: {len(tracking['frameId'].unique())}")
print(f"Players tracked: {tracking['nflId'].nunique()}")
print(f"\nEvents in play:")
print(tracking[tracking['event'].notna()]['event'].unique())

In [None]:
# Extract ball-in-air frames
ball_in_air, info = extract_ball_in_air_frames(tracking)

print(f"Frames while ball in air: {info['frames_in_air']}")
print(f"Time in air: {info['time_in_air']:.2f} seconds")
print(f"Pass forward frame: {info['pass_forward_frame']}")
print(f"Outcome frame: {info['outcome_frame']}")

## 5. Static Visualization of Play

Create a static snapshot at the moment the ball arrives.

In [None]:
# Get frame when ball arrives
arrival_frame = info['outcome_frame']
arrival_data = ball_in_air[ball_in_air['frameId'] == arrival_frame]

# Separate players and ball
players = arrival_data[arrival_data['club'] != 'football']
ball = arrival_data[arrival_data['club'] == 'football']

# Create field
field = NFLField(figsize=(14, 8))
fig, ax = field.create_field()

# Plot players by team
offense_team = metadata['possession_team']
defense_team = metadata['defensive_team']

offense = players[players['club'] == offense_team]
defense = players[players['club'] == defense_team]

# Plot offense
ax.scatter(offense['x'], offense['y'], 
           c='#FF6B6B', s=200, alpha=0.9, 
           edgecolors='white', linewidths=2, 
           label=offense_team, zorder=10)

# Plot defense
ax.scatter(defense['x'], defense['y'], 
           c='#4ECDC4', s=200, alpha=0.9, 
           edgecolors='white', linewidths=2, 
           label=defense_team, zorder=10)

# Plot ball
if len(ball) > 0:
    ax.scatter(ball['x'], ball['y'], 
               c='#FFD93D', s=250, marker='*', 
               edgecolors='white', linewidths=2, 
               label='Ball', zorder=11)

# Add title
title = f"{offense_team} vs {defense_team}\n{sample_play['playDescription'][:80]}"
field.add_title(title, fontsize=14)
field.add_legend()

plt.show()

## 6. Ball Trajectory Visualization

Show the complete ball trajectory while in the air.

In [None]:
# Get ball tracking for ball-in-air period
ball_trajectory = ball_in_air[ball_in_air['club'] == 'football']

# Create field
field = NFLField(figsize=(14, 8))
fig, ax = field.create_field()

# Plot release frame (first frame)
release_frame = info['pass_forward_frame']
release_data = tracking[tracking['frameId'] == release_frame]
release_players = release_data[release_data['club'] != 'football']

offense = release_players[release_players['club'] == offense_team]
defense = release_players[release_players['club'] == defense_team]

ax.scatter(offense['x'], offense['y'], 
           c='#FF6B6B', s=150, alpha=0.5, 
           edgecolors='white', linewidths=1, 
           label=f'{offense_team} (release)', zorder=8)

ax.scatter(defense['x'], defense['y'], 
           c='#4ECDC4', s=150, alpha=0.5, 
           edgecolors='white', linewidths=1, 
           label=f'{defense_team} (release)', zorder=8)

# Plot arrival frame
ax.scatter(players[players['club'] == offense_team]['x'], 
           players[players['club'] == offense_team]['y'], 
           c='#FF6B6B', s=200, alpha=0.9, 
           edgecolors='white', linewidths=2, 
           label=f'{offense_team} (arrival)', zorder=10)

ax.scatter(players[players['club'] == defense_team]['x'], 
           players[players['club'] == defense_team]['y'], 
           c='#4ECDC4', s=200, alpha=0.9, 
           edgecolors='white', linewidths=2, 
           label=f'{defense_team} (arrival)', zorder=10)

# Plot ball trajectory
if len(ball_trajectory) > 0:
    field.plot_ball_trajectory(
        ball_trajectory['x'].values,
        ball_trajectory['y'].values
    )

title = f"Ball Trajectory: {offense_team} vs {defense_team}\nAir Yards: {sample_play['passLength']:.1f}, Time in Air: {info['time_in_air']:.2f}s"
field.add_title(title, fontsize=12)
field.add_legend(loc='upper left')

plt.show()

## 7. Next Steps

1. Create animation of this play using `src/animation/animator.py`
2. Identify 5-10 most compelling plays for final submission
3. Add advanced visualizations:
   - Separation metrics over time
   - Coverage zones (Voronoi diagrams)
   - Speed heatmaps
4. Create compelling narrative around selected plays
5. Generate high-quality video output

In [None]:
# Save interesting play IDs for further analysis
interesting_plays = {
    'deep_completions': deep_completions.head(10)[['gameId', 'playId', 'passLength']].to_dict('records'),
    'interceptions': interceptions.head(5)[['gameId', 'playId', 'passLength']].to_dict('records')
}

print("Interesting plays identified for further analysis")