In [44]:
import pandas as pd
import numpy as np

In [45]:
pd.set_option('display.max_columns', None)

In [46]:
data = pd.read_csv('2026_FAB_play_by_play.csv')
data = data.assign(success=lambda x: x['EPA'] > 0)
data.head()

Unnamed: 0,Season,Wk,HomeTeam,AwayTeam,QTR,TimeLeftQTR,OffTeam,DefTeam,OffLeadBefore,FieldSide,StartYard,Down,ToGo,EventType,RB,WR,TE,OL,DL,LB,DB,Safeties,CoverageType,ReceiverAlignment,Dropback,DropType,Scramble,RPO,Attempt,Completion,Sacked,LWR,LSWR,LTE,RWR,RSWR,RTE,L1,L2,L3,L4,R4,R3,R2,R1,TargetedPlayer,YardsOnPlay,EPA,success
0,2025,1,ATL,TB,1,894,ATL,TB,0,Own,35,1,10,pass,2,2,1,5,3.0,4.0,4.0,2,Cover 2,Spread,1.0,0/1 Step,0,0,1.0,1.0,0.0,1.0,2.0,0.0,1.0,1.0,0.0,Hook,Cross,Out,,,,Shallow,Cross,R1,11,0.529828,True
1,2025,1,ATL,TB,1,850,ATL,TB,0,Own,46,1,10,rush,1,2,2,5,3.0,4.0,4.0,1,,Slot Left,,,0,0,,,,1.0,1.0,0.0,0.0,0.0,2.0,,,,,,,,,,4,-0.23506,False
2,2025,1,ATL,TB,1,806,ATL,TB,0,Own,50,2,6,pass,1,2,2,5,3.0,4.0,4.0,1,Cover 6,Twin Right,1.0,5 Step,0,0,1.0,1.0,0.0,0.0,1.0,1.0,2.0,0.0,0.0,Cross,,,,,,Post,Corner,Back,50,3.79656,True
3,2025,1,ATL,TB,1,794,TB,ATL,-7,Own,40,1,10,pass,1,2,2,5,4.0,2.0,5.0,1,Cover 1,Balanced,1.0,3 Step,0,0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,1.0,Cross,,,,,,Flat,Corner,,0,-0.687909,False
4,2025,1,ATL,TB,1,789,TB,ATL,-7,Own,40,2,10,rush,2,2,1,5,3.0,4.0,4.0,1,,Trips Left,,,0,1,,,,1.0,2.0,0.0,0.0,0.0,1.0,,,Screen,,,,,,,2,-0.693373,False


In [74]:
data['FieldSide'].value_counts()

FieldSide
Own     17750
Oppo    15576
Name: count, dtype: int64

In [47]:
# Import visualization libraries
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Rectangle
import warnings
warnings.filterwarnings('ignore')

# Set style
sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

# Color scheme for NFL
NFL_COLORS = {
    'primary': '#013369',  # NFL blue
    'secondary': '#D50A0A',  # NFL red
    'accent': '#FFB612'  # NFL gold
}

In [None]:
# Load data with proper handling
data = pd.read_csv('2026_FAB_play_by_play.csv', low_memory=False)
data = data.assign(success=lambda x: x['EPA'] > 0)

# Create two-high coverage indicator
# According to data dictionary: Cover 2, Cover 4, and Man Cover 2 are two-high
two_high_coverages = ['Cover 2', 'Cover 4', 'Man Cover 2']
data['is_two_high'] = data['CoverageType'].isin(two_high_coverages)

# Filter to only pass plays for coverage analysis (coverage only applies to passes)
pass_plays = data[data['EventType'] == 'pass'].copy()

print(f"Total plays: {len(data):,}")
print(f"Pass plays: {len(pass_plays):,}")
print(f"Two-high coverage plays: {pass_plays['is_two_high'].sum():,}")
print(f"Two-high coverage rate: {pass_plays['is_two_high'].mean()*100:.1f}%")

# Coverage type breakdown
print("\n=== COVERAGE TYPE DISTRIBUTION ===")
coverage_breakdown = pass_plays['CoverageType'].value_counts().head(10)
print(coverage_breakdown)

Total plays: 33,326
Pass plays: 18,725
Two-high coverage plays: 5,914
Two-high coverage rate: 31.6%

=== COVERAGE TYPE DISTRIBUTION ===
CoverageType
Cover 3        4717
Cover 1        3739
Cover 4        2714
Cover 2        2417
Screen         1816
Cover 6        1183
Man Cover 2     783
Cover 0         436
Goal Line       332
Red 2           270
Name: count, dtype: int64


# Part 1: Understanding Two-High Safety Coverage Deployment and League Trends

In [49]:
# 1.1: Situational Factors Analysis - Down and Distance
down_distance_analysis = pass_plays.groupby(['Down', 'ToGo']).agg({
    'is_two_high': ['mean', 'count']
}).reset_index()
down_distance_analysis.columns = ['Down', 'ToGo', 'TwoHigh_Rate', 'Play_Count']
down_distance_analysis = down_distance_analysis[down_distance_analysis['Play_Count'] >= 50]  # Filter for sufficient sample

# Create heatmap
fig = px.density_heatmap(
    down_distance_analysis,
    x='ToGo',
    y='Down',
    z='TwoHigh_Rate',
    nbinsx=10,
    nbinsy=4,
    color_continuous_scale='Blues',
    title='Two-High Coverage Rate by Down and Distance',
    labels={'TwoHigh_Rate': 'Two-High Rate', 'ToGo': 'Yards to Go', 'Down': 'Down'}
)
fig.update_layout(
    height=500,
    font=dict(size=12),
    title_font_size=16
)
fig.show()

In [76]:
# 1.2: Field Position Analysis
# StartYard is 0-50, and FieldSide indicates "Own" or "Oppo"
# Combine them to create proper field zones
def create_field_zone(row):
    if row['FieldSide'] == 'Own':
        if row['StartYard'] <= 20:
            return 'Own 0-20'
        elif row['StartYard'] <= 40:
            return 'Own 20-40'
        else:  # 40-50
            return 'Own 40-50'
    else:  # FieldSide == 'Oppo'
        if row['StartYard'] <= 20:
            return 'Opp 20-0'  # Closer to opponent goal line
        elif row['StartYard'] <= 40:
            return 'Opp 40-20'
        else:  # 40-50
            return 'Opp 50-40'  # Closer to midfield

pass_plays['Field_Zone'] = pass_plays.apply(create_field_zone, axis=1)

field_pos_analysis = pass_plays.groupby('Field_Zone').agg({
    'is_two_high': ['mean', 'count', 'sum']
}).reset_index()
field_pos_analysis.columns = ['Field_Zone', 'TwoHigh_Rate', 'Total_Plays', 'TwoHigh_Plays']

# Define the correct order of field zones (from own goal line to opponent goal line)
field_zone_order = ['Own 0-20', 'Own 20-40', 'Own 40-50', 'Opp 50-40', 'Opp 40-20', 'Opp 20-0']
# Create a categorical type with the specified order
field_pos_analysis['Field_Zone'] = pd.Categorical(
    field_pos_analysis['Field_Zone'], 
    categories=field_zone_order, 
    ordered=True
)
field_pos_analysis = field_pos_analysis.sort_values('Field_Zone')

fig = go.Figure()
fig.add_trace(go.Bar(
    x=field_pos_analysis['Field_Zone'],
    y=field_pos_analysis['TwoHigh_Rate'] * 100,
    marker_color=NFL_COLORS['primary'],
    text=[f"{x:.1f}%" for x in field_pos_analysis['TwoHigh_Rate'] * 100],
    textposition='outside',
    name='Two-High Rate'
))

fig.update_layout(
    title='Two-High Coverage Rate by Field Position',
    xaxis_title='Field Position',
    yaxis_title='Two-High Coverage Rate (%)',
    height=500,
    showlegend=False,
    font=dict(size=12),
    title_font_size=16
)
fig.show()

In [77]:
# 1.3: Score Differential Analysis
pass_plays['Score_Margin'] = pd.cut(
    pass_plays['OffLeadBefore'],
    bins=[-30, -14, -7, 0, 7, 14, 30],
    labels=['Down 15+', 'Down 8-14', 'Down 1-7', 'Tied/Up 1-7', 'Up 8-14', 'Up 15+']
)

score_analysis = pass_plays.groupby('Score_Margin').agg({
    'is_two_high': ['mean', 'count']
}).reset_index()
score_analysis.columns = ['Score_Margin', 'TwoHigh_Rate', 'Play_Count']

fig = go.Figure()
fig.add_trace(go.Bar(
    x=score_analysis['Score_Margin'],
    y=score_analysis['TwoHigh_Rate'] * 100,
    marker_color=NFL_COLORS['secondary'],
    text=[f"{x:.1f}%" for x in score_analysis['TwoHigh_Rate'] * 100],
    textposition='outside',
    name='Two-High Rate'
))

fig.update_layout(
    title='Two-High Coverage Rate by Score Differential',
    xaxis_title='Score Differential (Offensive Team Perspective)',
    yaxis_title='Two-High Coverage Rate (%)',
    height=500,
    showlegend=False,
    font=dict(size=12),
    title_font_size=16
)
fig.show()

In [81]:
# 1.4: Time Remaining Analysis
pass_plays['Quarter'] = pass_plays['QTR']
pass_plays['Time_Left_Min'] = pass_plays['TimeLeftQTR'] / 60

# Create time buckets
pass_plays['Game_Time'] = pass_plays.apply(
    lambda x: f"Q{x['Quarter']} - {int(x['Time_Left_Min'])}:00" if x['Time_Left_Min'] >= 1 
    else f"Q{x['Quarter']} - <1:00", axis=1
)

# Simplify to quarter and time remaining
pass_plays['Time_Bucket'] = pass_plays.apply(
    lambda x: f"Q{x['Quarter']} - {15-int(x['Time_Left_Min'])}:00" if x['Time_Left_Min'] >= 1 
    else f"Q{x['Quarter']} - Final Min", axis=1
)

time_analysis = pass_plays.groupby(['Quarter', 'Time_Bucket']).agg({
    'is_two_high': ['mean', 'count']
}).reset_index()
time_analysis.columns = ['Quarter', 'Time_Bucket', 'TwoHigh_Rate', 'Play_Count']
time_analysis = time_analysis[time_analysis['Play_Count'] >= 30]

fig = px.line(
    time_analysis,
    x='Time_Bucket',
    y='TwoHigh_Rate',
    color='Quarter',
    markers=True,
    title='Two-High Coverage Rate by Quarter and Time Remaining',
    labels={'TwoHigh_Rate': 'Two-High Rate', 'Time_Bucket': 'Time in Quarter'}
)
fig.update_layout(
    height=500,
    font=dict(size=12),
    title_font_size=16
)
fig.show()

In [82]:
# 1.5: Offensive Personnel Impact on Two-High Usage
# Analyze how offensive formations affect defensive coverage choice
off_personnel = pass_plays.groupby(['WR', 'TE', 'RB']).agg({
    'is_two_high': ['mean', 'count']
}).reset_index()
off_personnel.columns = ['WR', 'TE', 'RB', 'TwoHigh_Rate', 'Play_Count']
off_personnel = off_personnel[off_personnel['Play_Count'] >= 50]
off_personnel['Personnel'] = off_personnel.apply(
    lambda x: f"{x['WR']}WR/{x['TE']}TE/{x['RB']}RB", axis=1
)

fig = go.Figure()
fig.add_trace(go.Bar(
    x=off_personnel['Personnel'],
    y=off_personnel['TwoHigh_Rate'] * 100,
    marker_color=NFL_COLORS['accent'],
    text=[f"{x:.1f}%" for x in off_personnel['TwoHigh_Rate'] * 100],
    textposition='outside',
    name='Two-High Rate'
))

fig.update_layout(
    title='Two-High Coverage Rate by Offensive Personnel Grouping',
    xaxis_title='Offensive Personnel (WR/TE/RB)',
    yaxis_title='Two-High Coverage Rate (%)',
    height=500,
    showlegend=False,
    font=dict(size=12),
    title_font_size=16,
    xaxis_tickangle=-45
)
fig.show()

In [83]:
# 1.6: Team Frequency Analysis - Which teams use two-high most/least?
team_frequency = pass_plays.groupby('DefTeam').agg({
    'is_two_high': ['mean', 'sum', 'count']
}).reset_index()
team_frequency.columns = ['Team', 'TwoHigh_Rate', 'TwoHigh_Plays', 'Total_Plays']
team_frequency = team_frequency.sort_values('TwoHigh_Rate', ascending=False)

# Create visualization
fig = go.Figure()

# Most frequent
fig.add_trace(go.Bar(
    x=team_frequency.head(10)['Team'],
    y=team_frequency.head(10)['TwoHigh_Rate'] * 100,
    marker_color='#013369',
    name='Top 10',
    text=[f"{x:.1f}%" for x in team_frequency.head(10)['TwoHigh_Rate'] * 100],
    textposition='outside'
))

fig.update_layout(
    title='Top 10 Teams by Two-High Coverage Frequency',
    xaxis_title='Defensive Team',
    yaxis_title='Two-High Coverage Rate (%)',
    height=500,
    showlegend=False,
    font=dict(size=12),
    title_font_size=16
)
fig.show()

# Display full rankings
print("\\n=== TWO-HIGH COVERAGE FREQUENCY RANKINGS ===")
print(team_frequency[['Team', 'TwoHigh_Rate', 'TwoHigh_Plays', 'Total_Plays']].to_string(index=False))

\n=== TWO-HIGH COVERAGE FREQUENCY RANKINGS ===
Team  TwoHigh_Rate  TwoHigh_Plays  Total_Plays
 MIN      0.429435            213          496
 TB       0.376860            228          605
 MIA      0.375451            208          554
 ARI      0.362126            218          602
 BUF      0.360000            180          500
 KC       0.352941            192          544
 LAC      0.352727            194          550
 CHI      0.348592            198          568
 SEA      0.344668            223          647
 NYJ      0.340111            184          541
 LAR      0.339623            216          636
 GB       0.338462            198          585
 DET      0.336093            203          604
 PIT      0.332829            220          661
 TEN      0.330935            184          556
 LV       0.326203            183          561
 WAS      0.324742            189          582
 SF       0.314570            190          604
 JAX      0.311844            208          667
 DAL      0.3

In [84]:
# 1.7: Team Effectiveness Analysis - Which teams are most/least effective at two-high?
# Effectiveness measured by EPA allowed when in two-high coverage
team_effectiveness = pass_plays[pass_plays['is_two_high']].groupby('DefTeam').agg({
    'EPA': ['mean', 'count'],
    'success': 'mean',
    'YardsOnPlay': 'mean'
}).reset_index()
team_effectiveness.columns = ['Team', 'EPA_Allowed', 'Plays', 'Success_Rate', 'Yards_Allowed']
team_effectiveness = team_effectiveness[team_effectiveness['Plays'] >= 100]  # Minimum sample size
team_effectiveness = team_effectiveness.sort_values('EPA_Allowed', ascending=True)  # Lower EPA = better defense

# Create visualization
fig = go.Figure()

fig.add_trace(go.Bar(
    x=team_effectiveness['Team'],
    y=team_effectiveness['EPA_Allowed'],
    marker_color=team_effectiveness['EPA_Allowed'].apply(
        lambda x: '#D50A0A' if x > 0 else '#013369'
    ),
    text=[f"{x:.2f}" for x in team_effectiveness['EPA_Allowed']],
    textposition='outside',
    name='EPA Allowed'
))

fig.update_layout(
    title='Defensive Effectiveness in Two-High Coverage (EPA Allowed)',
   #subtitle='Lower is Better - Teams sorted by EPA Allowed',
    xaxis_title='Defensive Team',
    yaxis_title='EPA Allowed per Play',
    height=600,
    showlegend=False,
    font=dict(size=12),
    title_font_size=16,
    xaxis_tickangle=-45
)
fig.show()

print("\\n=== TWO-HIGH COVERAGE EFFECTIVENESS RANKINGS ===")
print("(Lower EPA Allowed = Better Defense)")
print(team_effectiveness[['Team', 'EPA_Allowed', 'Success_Rate', 'Yards_Allowed', 'Plays']].to_string(index=False))

\n=== TWO-HIGH COVERAGE EFFECTIVENESS RANKINGS ===
(Lower EPA Allowed = Better Defense)
Team  EPA_Allowed  Success_Rate  Yards_Allowed  Plays
 HOU    -0.341836      0.403727       5.260870    161
 CLE    -0.286503      0.432692       4.240385    104
 JAX    -0.260802      0.413462       5.129808    208
 BUF    -0.177555      0.377778       5.000000    180
 DEN    -0.171173      0.378378       5.551351    185
 LAC    -0.157376      0.412371       5.505155    194
 NE     -0.124446      0.456140       5.350877    171
 LAR    -0.118168      0.439815       4.837963    216
 PHI    -0.076626      0.419540       5.850575    174
 MIN    -0.073976      0.417840       5.553991    213
 SEA    -0.052924      0.457399       5.623318    223
 NYG    -0.035831      0.494118       5.588235    170
 GB     -0.009587      0.469697       5.914141    198
 ATL     0.009581      0.435583       6.601227    163
 CAR     0.028694      0.448485       5.812121    165
 KC      0.053469      0.453125       6.109375  

In [85]:
# 1.8: Comprehensive Situational Analysis Dashboard
# Create a summary visualization showing all key factors

# Down analysis
down_analysis = pass_plays.groupby('Down').agg({
    'is_two_high': ['mean', 'count']
}).reset_index()
down_analysis.columns = ['Down', 'TwoHigh_Rate', 'Play_Count']

# Create subplot dashboard
fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('By Down', 'By Field Position', 'By Score Differential', 'By Quarter'),
    specs=[[{"type": "bar"}, {"type": "bar"}],
           [{"type": "bar"}, {"type": "bar"}]]
)

# Down
fig.add_trace(
    go.Bar(x=down_analysis['Down'], y=down_analysis['TwoHigh_Rate']*100, 
           marker_color=NFL_COLORS['primary'], showlegend=False),
    row=1, col=1
)

# Field Position
fig.add_trace(
    go.Bar(x=field_pos_analysis['Field_Zone'], y=field_pos_analysis['TwoHigh_Rate']*100,
           marker_color=NFL_COLORS['secondary'], showlegend=False),
    row=1, col=2
)

# Score Differential
fig.add_trace(
    go.Bar(x=score_analysis['Score_Margin'], y=score_analysis['TwoHigh_Rate']*100,
           marker_color=NFL_COLORS['accent'], showlegend=False),
    row=2, col=1
)

# Quarter
quarter_analysis = pass_plays.groupby('Quarter').agg({
    'is_two_high': 'mean'
}).reset_index()
fig.add_trace(
    go.Bar(x=quarter_analysis['Quarter'], y=quarter_analysis['is_two_high']*100,
           marker_color='#4A90E2', showlegend=False),
    row=2, col=2
)

fig.update_layout(
    title_text="Two-High Coverage Deployment: Situational Factors Dashboard",
    height=800,
    font=dict(size=10),
    title_font_size=16
)

fig.update_xaxes(title_text="Down", row=1, col=1)
fig.update_xaxes(title_text="Field Position", row=1, col=2)
fig.update_xaxes(title_text="Score Differential", row=2, col=1)
fig.update_xaxes(title_text="Quarter", row=2, col=2)

fig.update_yaxes(title_text="Two-High Rate (%)", row=1, col=1)
fig.update_yaxes(title_text="Two-High Rate (%)", row=1, col=2)
fig.update_yaxes(title_text="Two-High Rate (%)", row=2, col=1)
fig.update_yaxes(title_text="Two-High Rate (%)", row=2, col=2)

fig.show()

# Part 2: Offensive Attack Strategies Against Two-High Coverage

In [86]:
# 2.1: Formation Effectiveness Against Two-High
# Filter to only plays where defense was in two-high
two_high_plays = pass_plays[pass_plays['is_two_high']].copy()

# Analyze receiver alignments
formation_effectiveness = two_high_plays.groupby('ReceiverAlignment').agg({
    'EPA': ['mean', 'count'],
    'success': 'mean',
    'YardsOnPlay': 'mean',
    'Completion': 'mean'
}).reset_index()
formation_effectiveness.columns = ['Formation', 'EPA', 'Plays', 'Success_Rate', 'Yards', 'Completion_Rate']
formation_effectiveness = formation_effectiveness[formation_effectiveness['Plays'] >= 50]
formation_effectiveness = formation_effectiveness.sort_values('EPA', ascending=False)

# Create visualization
fig = go.Figure()

fig.add_trace(go.Bar(
    x=formation_effectiveness['Formation'],
    y=formation_effectiveness['EPA'],
    marker_color=formation_effectiveness['EPA'].apply(
        lambda x: '#013369' if x > 0 else '#D50A0A'
    ),
    text=[f"{x:.2f}" for x in formation_effectiveness['EPA']],
    textposition='outside',
    name='EPA'
))

fig.update_layout(
    title='Offensive Formation Effectiveness vs Two-High Coverage',
    xaxis_title='Receiver Alignment/Formation',
    yaxis_title='EPA per Play',
    height=600,
    showlegend=False,
    font=dict(size=12),
    title_font_size=16,
    xaxis_tickangle=-45
)
fig.show()

print("\\n=== FORMATION EFFECTIVENESS VS TWO-HIGH ===")
print(formation_effectiveness[['Formation', 'EPA', 'Success_Rate', 'Yards', 'Completion_Rate', 'Plays']].to_string(index=False))

\n=== FORMATION EFFECTIVENESS VS TWO-HIGH ===
   Formation       EPA  Success_Rate    Yards  Completion_Rate  Plays
  Bunch Left  0.252749      0.510490 6.405594         0.622378    143
Single Right  0.116537      0.409091 7.803030         0.590909     66
    Balanced  0.048836      0.483140 6.072708         0.614858   1898
   Twin Left  0.044262      0.501229 6.402948         0.641278    407
 Bunch Right  0.017856      0.429448 5.699387         0.527607    163
      Spread  0.012255      0.439086 5.251269         0.560914    394
  Twin Right  0.011608      0.478372 6.284987         0.587786    393
  Trips Left  0.009459      0.465490 6.367576         0.584270    623
 Single Left  0.008298      0.506667 7.040000         0.693333     75
   Slot Left -0.001901      0.483366 5.941292         0.626223    511
 Trips Right -0.002549      0.452349 6.167785         0.579866    745
  Slot Right -0.030854      0.448661 6.319196         0.595982    448


In [89]:
# 2.2: Personnel Grouping Effectiveness (avoiding heavy formations that signal run)
# Focus on lighter personnel that won't cause defense to audible
personnel_effectiveness = two_high_plays.groupby(['WR', 'TE', 'RB']).agg({
    'EPA': ['mean', 'count'],
    'success': 'mean',
    'YardsOnPlay': 'mean'
}).reset_index()
personnel_effectiveness.columns = ['WR', 'TE', 'RB', 'EPA', 'Plays', 'Success_Rate', 'Yards']
personnel_effectiveness = personnel_effectiveness[personnel_effectiveness['Plays'] >= 30]
personnel_effectiveness['Personnel'] = personnel_effectiveness.apply(
    lambda x: f"{x['WR']}WR/{x['TE']}TE/{x['RB']}RB", axis=1
)
personnel_effectiveness = personnel_effectiveness.sort_values('EPA', ascending=False)

# Filter out heavy personnel (2+ RB, 2+ TE) that signal run
personnel_effectiveness_filtered = personnel_effectiveness[
    (personnel_effectiveness['RB'] <= 1) & (personnel_effectiveness['TE'] <= 2)
].copy()

fig = go.Figure()

fig.add_trace(go.Bar(
    x=personnel_effectiveness_filtered['Personnel'],
    y=personnel_effectiveness_filtered['EPA'],
    marker_color=personnel_effectiveness_filtered['EPA'].apply(
        lambda x: '#013369' if x > 0 else '#D50A0A'
    ),
    text=[f"{x:.2f}" for x in personnel_effectiveness_filtered['EPA']],
    textposition='outside',
    name='EPA'
))

fig.update_layout(
    title='Offensive Personnel Effectiveness vs Two-High (Light Personnel Only)',
    #subtitle='Filtered to avoid heavy formations that signal run',
    xaxis_title='Offensive Personnel (WR/TE/RB)',
    yaxis_title='EPA per Play',
    height=600,
    showlegend=False,
    font=dict(size=12),
    title_font_size=16,
    xaxis_tickangle=-45
)
fig.show()

print("\\n=== PERSONNEL EFFECTIVENESS VS TWO-HIGH (Light Personnel) ===")
print(personnel_effectiveness_filtered[['Personnel', 'EPA', 'Success_Rate', 'Yards', 'Plays']].to_string(index=False))

\n=== PERSONNEL EFFECTIVENESS VS TWO-HIGH (Light Personnel) ===
        Personnel       EPA  Success_Rate     Yards  Plays
2.0WR/1.0TE/1.0RB  0.571410      0.594595 12.081081     37
4.0WR/1.0TE/0.0RB  0.233810      0.516129  9.032258     31
3.0WR/1.0TE/1.0RB  0.031720      0.467513  6.026136   4094
2.0WR/2.0TE/1.0RB -0.016675      0.475737  5.977470   1154
4.0WR/0.0TE/1.0RB -0.137078      0.433333  5.300000     30


In [90]:
# 2.3: Route Concept Analysis - Which routes work best against two-high?
# Collect all routes from L1-L4 and R1-R4 columns
route_columns = ['L1', 'L2', 'L3', 'L4', 'R1', 'R2', 'R3', 'R4']

# Create a long format dataset of routes
route_data = []
for idx, row in two_high_plays.iterrows():
    for route_col in route_columns:
        route = row[route_col]
        if pd.notna(route) and route != '':
            route_data.append({
                'Route': route,
                'EPA': row['EPA'],
                'Success': row['success'],
                'Yards': row['YardsOnPlay'],
                'Targeted': row['TargetedPlayer'] == route_col,
                'Completion': row['Completion'] if row['TargetedPlayer'] == route_col else None
            })

routes_df = pd.DataFrame(route_data)

# Analyze route effectiveness
route_effectiveness = routes_df.groupby('Route').agg({
    'EPA': ['mean', 'count'],
    'Success': 'mean',
    'Yards': 'mean',
    'Targeted': 'sum'
}).reset_index()
route_effectiveness.columns = ['Route', 'EPA', 'Plays', 'Success_Rate', 'Yards', 'Targets']
route_effectiveness = route_effectiveness[route_effectiveness['Plays'] >= 100]  # Minimum sample
route_effectiveness = route_effectiveness.sort_values('EPA', ascending=False)

# Create visualization
fig = go.Figure()

fig.add_trace(go.Bar(
    x=route_effectiveness['Route'],
    y=route_effectiveness['EPA'],
    marker_color=route_effectiveness['EPA'].apply(
        lambda x: '#013369' if x > 0 else '#D50A0A'
    ),
    text=[f"{x:.2f}" for x in route_effectiveness['EPA']],
    textposition='outside',
    name='EPA'
))

fig.update_layout(
    title='Route Concept Effectiveness vs Two-High Coverage',
    xaxis_title='Route Type',
    yaxis_title='EPA per Play',
    height=700,
    showlegend=False,
    font=dict(size=12),
    title_font_size=16,
    xaxis_tickangle=-45
)
fig.show()

print("\\n=== ROUTE EFFECTIVENESS VS TWO-HIGH ===")
print(route_effectiveness[['Route', 'EPA', 'Success_Rate', 'Yards', 'Targets', 'Plays']].to_string(index=False))

\n=== ROUTE EFFECTIVENESS VS TWO-HIGH ===
   Route       EPA  Success_Rate    Yards  Targets  Plays
Vertical  0.062887      0.455224 6.724876      496   4020
   Other  0.060831      0.456790 7.672840       30    162
    Hook  0.035919      0.495281 5.811050     1164   5086
    Post  0.033309      0.441631 6.611489      196   1619
   Cross  0.029276      0.473738 6.257777      604   3922
     Out  0.009176      0.479039 5.434934      602   2290
    Flat  0.003248      0.463415 6.166463      389   1640
 Shallow -0.004484      0.469142 5.510441      611   2155
  Corner -0.031086      0.469523 5.579360      169   1657
Comeback -0.195437      0.419014 5.091549       58    284


In [91]:
# 2.4: Targeted Route Analysis - Which routes are most effective when targeted?
targeted_routes = routes_df[routes_df['Targeted'] == True].copy()

targeted_route_effectiveness = targeted_routes.groupby('Route').agg({
    'EPA': ['mean', 'count'],
    'Success': 'mean',
    'Yards': 'mean',
    'Completion': 'mean'
}).reset_index()
targeted_route_effectiveness.columns = ['Route', 'EPA', 'Targets', 'Success_Rate', 'Yards', 'Completion_Rate']
targeted_route_effectiveness = targeted_route_effectiveness[targeted_route_effectiveness['Targets'] >= 50]
targeted_route_effectiveness = targeted_route_effectiveness.sort_values('EPA', ascending=False)

# Create visualization
fig = go.Figure()

fig.add_trace(go.Bar(
    x=targeted_route_effectiveness['Route'],
    y=targeted_route_effectiveness['EPA'],
    marker_color=targeted_route_effectiveness['EPA'].apply(
        lambda x: '#013369' if x > 0 else '#D50A0A'
    ),
    text=[f"{x:.2f}" for x in targeted_route_effectiveness['EPA']],
    textposition='outside',
    name='EPA'
))

fig.update_layout(
    title='Targeted Route Effectiveness vs Two-High Coverage',
    #subtitle='Routes when they are the target of the pass',
    xaxis_title='Route Type',
    yaxis_title='EPA per Target',
    height=700,
    showlegend=False,
    font=dict(size=12),
    title_font_size=16,
    xaxis_tickangle=-45
)
fig.show()

print("\\n=== TARGETED ROUTE EFFECTIVENESS VS TWO-HIGH ===")
print(targeted_route_effectiveness[['Route', 'EPA', 'Success_Rate', 'Yards', 'Completion_Rate', 'Targets']].to_string(index=False))

\n=== TARGETED ROUTE EFFECTIVENESS VS TWO-HIGH ===
   Route       EPA  Success_Rate     Yards  Completion_Rate  Targets
    Post  0.611158      0.428571 12.933673         0.433673      196
Vertical  0.428768      0.415323 11.520161         0.423387      496
   Cross  0.393150      0.591060  9.811258         0.619205      604
    Hook  0.282571      0.614261  7.000859         0.768041     1164
  Corner  0.256766      0.408284  9.816568         0.402367      169
     Out  0.201213      0.566445  6.652824         0.674419      602
 Shallow  0.172816      0.561375  6.271686         0.710311      611
    Flat -0.030694      0.508997  5.017995         0.832905      389
Comeback -0.202328      0.413793  4.517241         0.448276       58


In [92]:
# 2.5: Dropback Type Analysis
dropback_effectiveness = two_high_plays.groupby('DropType').agg({
    'EPA': ['mean', 'count'],
    'success': 'mean',
    'YardsOnPlay': 'mean',
    'Completion': 'mean'
}).reset_index()
dropback_effectiveness.columns = ['DropType', 'EPA', 'Plays', 'Success_Rate', 'Yards', 'Completion_Rate']
dropback_effectiveness = dropback_effectiveness[dropback_effectiveness['Plays'] >= 50]
dropback_effectiveness = dropback_effectiveness.sort_values('EPA', ascending=False)

fig = go.Figure()

fig.add_trace(go.Bar(
    x=dropback_effectiveness['DropType'],
    y=dropback_effectiveness['EPA'],
    marker_color=NFL_COLORS['primary'],
    text=[f"{x:.2f}" for x in dropback_effectiveness['EPA']],
    textposition='outside',
    name='EPA'
))

fig.update_layout(
    title='Dropback Type Effectiveness vs Two-High Coverage',
    xaxis_title='Dropback Type',
    yaxis_title='EPA per Play',
    height=500,
    showlegend=False,
    font=dict(size=12),
    title_font_size=16,
    xaxis_tickangle=-45
)
fig.show()

print("\\n=== DROPBACK TYPE EFFECTIVENESS VS TWO-HIGH ===")
print(dropback_effectiveness[['DropType', 'EPA', 'Success_Rate', 'Yards', 'Completion_Rate', 'Plays']].to_string(index=False))

\n=== DROPBACK TYPE EFFECTIVENESS VS TWO-HIGH ===
              DropType       EPA  Success_Rate    Yards  Completion_Rate  Plays
                7 Step  0.103492      0.482692 8.446154         0.586538    520
              0/1 Step  0.073422      0.508502 5.248583         0.665587   1235
                3 Step  0.028032      0.470690 5.858446         0.595310   2303
Designed Rollout Right  0.011325      0.459716 6.142180         0.616114    211
                5 Step -0.030240      0.438289 6.514025         0.553296   1426
                   RPO -0.048345      0.523810 5.841270         0.761905     63
 Designed Rollout Left -0.161345      0.438017 6.123967         0.586777    121


In [93]:
# 2.6: Run Game Effectiveness vs Two-High
# Analyze rushing plays when defense shows two-high pre-snap
rush_plays_analysis = data[data['EventType'] == 'rush'].copy()
rush_plays_analysis['is_two_high'] = rush_plays_analysis['Safeties'] == 2

rush_effectiveness = rush_plays_analysis.groupby('is_two_high').agg({
    'EPA': ['mean', 'count'],
    'success': 'mean',
    'YardsOnPlay': 'mean'
}).reset_index()
rush_effectiveness.columns = ['TwoHigh', 'EPA', 'Plays', 'Success_Rate', 'Yards']

# Also analyze by personnel
rush_personnel = rush_plays_analysis[rush_plays_analysis['is_two_high']].groupby(['WR', 'TE', 'RB']).agg({
    'EPA': ['mean', 'count'],
    'success': 'mean',
    'YardsOnPlay': 'mean'
}).reset_index()
rush_personnel.columns = ['WR', 'TE', 'RB', 'EPA', 'Plays', 'Success_Rate', 'Yards']
rush_personnel = rush_personnel[rush_personnel['Plays'] >= 30]
rush_personnel['Personnel'] = rush_personnel.apply(
    lambda x: f"{x['WR']}WR/{x['TE']}TE/{x['RB']}RB", axis=1
)
rush_personnel = rush_personnel.sort_values('EPA', ascending=False)

# Create comparison visualization
fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Run vs Two-High vs One-High', 'Run Personnel Effectiveness vs Two-High'),
    specs=[[{"type": "bar"}, {"type": "bar"}]]
)

# Comparison - handle case where we might not have both values
one_high_epa = rush_effectiveness[rush_effectiveness['TwoHigh']==False]['EPA'].values
two_high_epa = rush_effectiveness[rush_effectiveness['TwoHigh']==True]['EPA'].values

y_values = []
x_labels = []
if len(one_high_epa) > 0:
    y_values.append(one_high_epa[0])
    x_labels.append('One-High')
if len(two_high_epa) > 0:
    y_values.append(two_high_epa[0])
    x_labels.append('Two-High')

if len(y_values) > 0:
    fig.add_trace(
        go.Bar(x=x_labels, y=y_values,
               marker_color=NFL_COLORS['primary'], showlegend=False),
        row=1, col=1
    )

# Personnel
if len(rush_personnel) > 0:
    fig.add_trace(
        go.Bar(x=rush_personnel['Personnel'], y=rush_personnel['EPA'],
               marker_color=NFL_COLORS['secondary'], showlegend=False),
        row=1, col=2
    )

fig.update_layout(
    title_text="Run Game Effectiveness vs Two-High Coverage",
    height=600,
    font=dict(size=12),
    title_font_size=16
)

fig.update_xaxes(title_text="Coverage Type", row=1, col=1)
fig.update_xaxes(title_text="Offensive Personnel", row=1, col=2, tickangle=-45)
fig.update_yaxes(title_text="EPA per Play", row=1, col=1)
fig.update_yaxes(title_text="EPA per Play", row=1, col=2)

fig.show()

print("\\n=== RUN GAME EFFECTIVENESS COMPARISON ===")
print(rush_effectiveness.to_string(index=False))
print("\\n=== RUN PERSONNEL EFFECTIVENESS VS TWO-HIGH ===")
if len(rush_personnel) > 0:
    print(rush_personnel[['Personnel', 'EPA', 'Success_Rate', 'Yards', 'Plays']].to_string(index=False))
else:
    print("Insufficient data for personnel breakdown")

\n=== RUN GAME EFFECTIVENESS COMPARISON ===
 TwoHigh       EPA  Plays  Success_Rate    Yards
   False -0.034585   8409      0.448395 3.890369
    True  0.035947   6191      0.478113 4.963657
\n=== RUN PERSONNEL EFFECTIVENESS VS TWO-HIGH ===
        Personnel       EPA  Success_Rate    Yards  Plays
3.0WR/1.0TE/1.0RB  0.077483      0.505886 5.375736   3058
2.0WR/1.0TE/1.0RB  0.056712      0.504274 5.316239    117
1.0WR/3.0TE/1.0RB  0.020959      0.480720 4.313625    389
2.0WR/1.0TE/2.0RB  0.018733      0.482966 4.777555    499
2.0WR/2.0TE/1.0RB  0.010367      0.455657 4.718654   1635
1.0WR/1.0TE/2.0RB -0.086592      0.419355 4.161290     31
1.0WR/2.0TE/1.0RB -0.099333      0.421875 3.851562    128
1.0WR/2.0TE/2.0RB -0.170069      0.324607 3.455497    191
3.0WR/0.0TE/1.0RB -0.223749      0.333333 3.933333     30


In [94]:
# 2.7: Route Combination Analysis - Multi-route concepts
# Analyze plays with multiple routes to identify effective route combinations
def get_route_combination(row):
    routes = []
    for col in route_columns:
        if pd.notna(row[col]) and row[col] != '':
            routes.append(row[col])
    return ' + '.join(sorted(routes)) if routes else None

two_high_plays['Route_Combo'] = two_high_plays.apply(get_route_combination, axis=1)

route_combo_effectiveness = two_high_plays.groupby('Route_Combo').agg({
    'EPA': ['mean', 'count'],
    'success': 'mean',
    'YardsOnPlay': 'mean'
}).reset_index()
route_combo_effectiveness.columns = ['Route_Combo', 'EPA', 'Plays', 'Success_Rate', 'Yards']
route_combo_effectiveness = route_combo_effectiveness[route_combo_effectiveness['Plays'] >= 30]
route_combo_effectiveness = route_combo_effectiveness.sort_values('EPA', ascending=False)

# Show top 15 route combinations
top_combos = route_combo_effectiveness.head(15)

fig = go.Figure()

fig.add_trace(go.Bar(
    x=top_combos['Route_Combo'],
    y=top_combos['EPA'],
    marker_color=top_combos['EPA'].apply(
        lambda x: '#013369' if x > 0 else '#D50A0A'
    ),
    text=[f"{x:.2f}" for x in top_combos['EPA']],
    textposition='outside',
    name='EPA'
))

fig.update_layout(
    title='Top 15 Route Combination Effectiveness vs Two-High',
    xaxis_title='Route Combination',
    yaxis_title='EPA per Play',
    height=700,
    showlegend=False,
    font=dict(size=10),
    title_font_size=16,
    xaxis_tickangle=-45
)
fig.show()

print("\\n=== TOP ROUTE COMBINATIONS VS TWO-HIGH ===")
print(top_combos[['Route_Combo', 'EPA', 'Success_Rate', 'Yards', 'Plays']].to_string(index=False))

\n=== TOP ROUTE COMBINATIONS VS TWO-HIGH ===
                              Route_Combo      EPA  Success_Rate     Yards  Plays
               Cross + Flat + Hook + Post 0.679864      0.666667 10.633333     30
             Hook + Hook + Out + Vertical 0.582309      0.580645  8.806452     31
         Hook + Out + Vertical + Vertical 0.469600      0.781250  7.406250     32
                Cross + Hook + Hook + Out 0.356393      0.656250  6.890625     64
                  Cross + Hook + Vertical 0.334442      0.593750  8.687500     32
Vertical + Vertical + Vertical + Vertical 0.279038      0.380952  9.952381     42
         Cross + Out + Shallow + Vertical 0.234476      0.512821  6.692308     39
                 Hook + Hook + Hook + Out 0.218275      0.647059  6.088235     34
               Cross + Hook + Hook + Post 0.189817      0.552632  7.342105     38
         Corner + Cross + Hook + Vertical 0.187312      0.511628  6.837209     43
        Hook + Hook + Vertical + Vertical 0.174038   

In [95]:
# 2.8: Strategic Balance Analysis - Mixed Strategy Effectiveness
# Analyze how mixing run and pass affects success against two-high
two_high_plays['Play_Type'] = 'pass'
two_high_rush = rush_plays_analysis[rush_plays_analysis['is_two_high']].copy()
two_high_rush['Play_Type'] = 'rush'

# Combine for analysis - ensure same columns
pass_df = two_high_plays[['Play_Type', 'EPA', 'success', 'YardsOnPlay']].copy()
rush_df = two_high_rush[['Play_Type', 'EPA', 'success', 'YardsOnPlay']].copy()

all_two_high = pd.concat([pass_df, rush_df], ignore_index=True)

play_type_effectiveness = all_two_high.groupby('Play_Type').agg({
    'EPA': ['mean', 'count'],
    'success': 'mean',
    'YardsOnPlay': 'mean'
}).reset_index()
play_type_effectiveness.columns = ['Play_Type', 'EPA', 'Plays', 'Success_Rate', 'Yards']

fig = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Overall Play Type Effectiveness', 'Success Rate by Play Type'),
    specs=[[{"type": "bar"}, {"type": "bar"}]]
)

fig.add_trace(
    go.Bar(x=play_type_effectiveness['Play_Type'], y=play_type_effectiveness['EPA'],
           marker_color=NFL_COLORS['primary'], showlegend=False, text=[f"{x:.2f}" for x in play_type_effectiveness['EPA']],
           textposition='outside'),
    row=1, col=1
)

fig.add_trace(
    go.Bar(x=play_type_effectiveness['Play_Type'], y=play_type_effectiveness['Success_Rate']*100,
           marker_color=NFL_COLORS['secondary'], showlegend=False, text=[f"{x:.1f}%" for x in play_type_effectiveness['Success_Rate']*100],
           textposition='outside'),
    row=1, col=2
)

fig.update_layout(
    title_text="Play Type Effectiveness vs Two-High Coverage",
    height=500,
    font=dict(size=12),
    title_font_size=16
)

fig.update_xaxes(title_text="Play Type", row=1, col=1)
fig.update_xaxes(title_text="Play Type", row=1, col=2)
fig.update_yaxes(title_text="EPA per Play", row=1, col=1)
fig.update_yaxes(title_text="Success Rate (%)", row=1, col=2)

fig.show()

print("\\n=== PLAY TYPE EFFECTIVENESS VS TWO-HIGH ===")
print(play_type_effectiveness.to_string(index=False))

\n=== PLAY TYPE EFFECTIVENESS VS TWO-HIGH ===
Play_Type      EPA  Plays  Success_Rate    Yards
     pass 0.021115   5914      0.470240 6.109571
     rush 0.035947   6191      0.478113 4.963657


In [96]:
# 2.9: Comprehensive Offensive Strategy Dashboard
# Create a summary visualization of key offensive strategies

# Top formations
top_formations = formation_effectiveness.head(5)

# Top routes
top_routes = route_effectiveness.head(8)

# Top personnel
top_personnel = personnel_effectiveness_filtered.head(5)

fig = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Top 5 Formations', 'Top 8 Routes', 'Top 5 Personnel Groupings', 'Play Type Comparison'),
    specs=[[{"type": "bar"}, {"type": "bar"}],
           [{"type": "bar"}, {"type": "bar"}]]
)

# Formations
fig.add_trace(
    go.Bar(x=top_formations['Formation'], y=top_formations['EPA'],
           marker_color=NFL_COLORS['primary'], showlegend=False),
    row=1, col=1
)

# Routes
fig.add_trace(
    go.Bar(x=top_routes['Route'], y=top_routes['EPA'],
           marker_color=NFL_COLORS['secondary'], showlegend=False),
    row=1, col=2
)

# Personnel
fig.add_trace(
    go.Bar(x=top_personnel['Personnel'], y=top_personnel['EPA'],
           marker_color=NFL_COLORS['accent'], showlegend=False),
    row=2, col=1
)

# Play Type
fig.add_trace(
    go.Bar(x=play_type_effectiveness['Play_Type'], y=play_type_effectiveness['EPA'],
           marker_color='#4A90E2', showlegend=False),
    row=2, col=2
)

fig.update_layout(
    title_text="Offensive Strategy Effectiveness vs Two-High: Comprehensive Dashboard",
    height=900,
    font=dict(size=10),
    title_font_size=16
)

fig.update_xaxes(title_text="Formation", row=1, col=1, tickangle=-45)
fig.update_xaxes(title_text="Route", row=1, col=2, tickangle=-45)
fig.update_xaxes(title_text="Personnel", row=2, col=1, tickangle=-45)
fig.update_xaxes(title_text="Play Type", row=2, col=2)

fig.update_yaxes(title_text="EPA", row=1, col=1)
fig.update_yaxes(title_text="EPA", row=1, col=2)
fig.update_yaxes(title_text="EPA", row=2, col=1)
fig.update_yaxes(title_text="EPA", row=2, col=2)

fig.show()

## Key Takeaways Summary

### Part 1: Two-High Coverage Deployment
1. **Situational Factors**: Two-high coverage usage varies significantly by down, distance, field position, score differential, and game situation
2. **Team Frequency**: Teams show wide variation in how frequently they deploy two-high coverage
3. **Team Effectiveness**: Some teams are significantly more effective at executing two-high coverage than others

### Part 2: Offensive Attack Strategies
1. **Formations**: Certain receiver alignments are more effective against two-high coverage
2. **Personnel**: Light personnel groupings (avoiding heavy formations) maintain disguise while maximizing effectiveness
3. **Routes**: Specific route concepts generate more success against two-high coverage
4. **Route Combinations**: Multi-route concepts can create effective passing attacks
5. **Run Game**: Running the ball can be effective against two-high, especially with proper personnel
6. **Strategic Balance**: Mixing run and pass maintains unpredictability while exploiting two-high weaknesses