In [1]:
import pandas as pd
import numpy as np
pd.set_option('display.max_colwidth', 0)

In [2]:
plays = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/plays.csv")
games = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2025/games.csv")

In [3]:
# Read in analysis results from the notebook "BDB25 - Analysis" and create a new column "difference" which calculates the 
# the difference in percent frequencies of various outcomes.

analysis1 = pd.read_csv("/kaggle/input/bdb25-analysis2/analysis1.csv")
analysis2 = pd.read_csv("/kaggle/input/bdb25-analysis2/analysis2.csv")

analysis1 = analysis1.drop('Unnamed: 0', axis=1)
analysis2 = analysis2.drop('Unnamed: 0', axis=1)

analysis1['difference'] = abs(analysis1['Test Probability'] - analysis1['Control Probability'])
analysis2['difference'] = abs(analysis2['Test Probability'] - analysis2['Control Probability'])

analysis1 = analysis1.sort_values(by='difference', ascending=False).reset_index(drop=True)
analysis2 = analysis2.sort_values(by='difference', ascending=False).reset_index(drop=True)

In [4]:
results = ['passLen_bins', 'yardsGained_bins','passResult','OUT','SLANT','FLAT','IN','GO','CROSS','HITCH','ANGLE','CORNER','POST','SCREEN','ispass']

In [5]:
# Filter the results file for where the computed probability represents a play outcome/result

results1 = analysis1[analysis1['P(feature)'].isin(results)].reset_index(drop=True)
results2 = analysis2[analysis2['P(feature)'].isin(results)].reset_index(drop=True)


In [6]:
# Create a feature to identify the number of routes looked at in an analysis.
cnts = []
for i in range(len(results1)):
    a1 = results1['Analysis'][i].split("|")
    cnt = 0
    for j in range(len(a1)):
        if a1[j] in ['OUT(1)','SLANT(1)','FLAT(1)','IN(1)','GO(1)','CROSS(1)','HITCH(1)','ANGLE(1)','CORNER(1)','POST(1)','SCREEN(1)']:
            cnt += 1

    cnts.append(cnt)

results1['# routes'] = cnts

cnts2 = []
for k in range(len(results2)):
    a2 = results2['Analysis'][k].split("|")
    cnt2 = 0
    for l in range(len(a2)):
        if a2[l] in ['OUT(1)','SLANT(1)','FLAT(1)','IN(1)','GO(1)','CROSS(1)','HITCH(1)','ANGLE(1)','CORNER(1)','POST(1)','SCREEN(1)']:
            cnt2 += 1

    cnts2.append(cnt2)

results2['# routes'] = cnts2

In [7]:
# To find results that have the best change of being useful to coaches, the results datasets are filtered for where the change in probability is greater
# than or equal to 10 and the number of routes in each analysis is less than 2.

m1 = results1['difference'] >= 10
m2 = results1['# routes'] < 2

results1.loc[m1 & m2].head()

Unnamed: 0,Analysis,Significance,Result,P(feature),P(val),Control Probability,Test Probability,difference,# routes
7,offenseFormation(SINGLEBACK)|down(1)|passResult(C)|CROSS(1),1.4e-05,Less often than expected,CROSS,1,64.93,39.08,25.85,1
8,offenseFormation(SINGLEBACK)|passLen_bins(0-10 yards)|CROSS(1),0.000156,Less often than expected,CROSS,1,62.77,38.36,24.41,1
9,yardsGained_bins(15-20 yards)|ispass(0),1.9e-05,More often than expected,ispass,0,19.86,43.75,23.89,0
10,yardsGained_bins(15-20 yards)|ispass(1),1.9e-05,Less often than expected,ispass,1,80.14,56.25,23.89,0
11,offenseFormation(SHOTGUN)|pff_passCoverage(Cover-3)|down(1)|ispass(1)|GO(1),0.00096,Less often than expected,GO,1,59.03,35.59,23.44,1


In [8]:
m1 = results2['difference'] >= 13
m2 = results2['# routes'] < 2

results2.loc[m1 & m2].reset_index(drop=True).head()

Unnamed: 0,Analysis,Significance,Result,P(feature),P(val),Control Probability,Test Probability,difference,# routes
0,offenseFormation(SINGLEBACK)|down(1)|passResult(C)|CROSS(1),7.19903e-08,Less often than expected,CROSS,1,77.68,39.08,38.6,1
1,offenseFormation(SINGLEBACK)|passLen_bins(0-10 yards)|CROSS(1),1.685518e-06,Less often than expected,CROSS,1,75.49,38.36,37.13,1
2,offenseFormation(SINGLEBACK)|down(1)|ispass(1)|CROSS(1),1.806851e-09,Less often than expected,CROSS,1,74.25,39.57,34.68,1
3,offenseFormation(SINGLEBACK)|pff_passCoverage(Cover-3)|down(1)|ispass(1)|CROSS(1),0.0005763008,Less often than expected,CROSS,1,73.33,41.82,31.51,1
4,offenseFormation(SINGLEBACK)|passResult(C)|CROSS(1),1.075953e-07,Less often than expected,CROSS,1,73.03,41.94,31.09,1
