In [None]:
import pandas as pd
import numpy as np
from numpy import mean
from numpy import std
from numpy.random import randn
from numpy.random import seed
import matplotlib.pyplot as plt
import math
import seaborn as sns
import re
import matplotlib.image as mpimg
import time
import scipy.stats as stats


# How does a defense react to certain types of offensive plays?

In [None]:
play = pd.read_csv('../input/nfl-big-data-bowl-2021/plays.csv')
play.head()

# Data cleaning

In [None]:
for i in play.columns:
    print(i,play[i].isna().sum())

In [None]:
med_def=play['defendersInTheBox'].median()
print(med_def)

In [None]:
play.update(play['defendersInTheBox'].fillna(med_def))

In [None]:
play['numberOfPassRushers'] = play.apply(
    lambda row: row['defendersInTheBox'] if math.isnan(row['numberOfPassRushers']) else row['numberOfPassRushers'],
    axis=1
)

play['numberOfPassRushers'].isnull().sum()

In [None]:
pre_home=play['preSnapHomeScore'].median()
print(pre_home)
play.update(play['preSnapHomeScore'].fillna(pre_home))

In [None]:
play['preSnapVisitorScore'] = play.apply(
    lambda row: row['preSnapHomeScore'] if math.isnan(row['preSnapVisitorScore']) else row['preSnapVisitorScore'],
    axis=1
)

play['preSnapVisitorScore'].isnull().sum()

In [None]:
nanP = play['passResult'].isnull()
lst=[]
count=0
for i in nanP:
    if i:
        lst.append(count)
    count+=1
print(lst)

In [None]:
play = play.drop(lst)

In [None]:
from sklearn.impute import SimpleImputer
si = SimpleImputer(
    missing_values=np.nan,  
    strategy='constant',  
    fill_value=0, 
    verbose=0,
    copy=True
)

In [None]:
si.fit(X=play)

play = pd.DataFrame.from_records(
    data=si.transform(
        X=play
    ),  
    columns=play.columns  
)

In [None]:
for i in play.columns:
    print(i,play[i].isna().sum())

# Data Transformation

In [None]:
play['defendersInTheBox'] = play['defendersInTheBox'].astype(int)

In [None]:
play['numberOfPassRushers'] = play['numberOfPassRushers'].astype(int)

In [None]:
s = set()
for ind in play.index:
    if isinstance(play['personnelD'][ind],int):
        break
    lst = str(play['personnelD'][ind]).split(', ')
    yo={}
    if not lst:
        break
    for i in lst:
        x1 = re.search("([0-9])\s([A-Z][A-Z])", i)
        yo[x1[2]]=int(x1[1])
    l=list(yo.keys())
    for g in l:
        s.add(g)
        
print(s)

In [None]:
for i in s:
    play[i] = 0

In [None]:
for ind in play.index:
    if isinstance(play['personnelD'][ind],int) == False:
        lst = str(play['personnelD'][ind]).split(', ')
        yo={}
        if not lst:
            break
        for i in lst:
            x1 = re.search("([0-9])\s([A-Z][A-Z])", i)
            yo[x1[2]]=int(x1[1])
        for i in yo:
            play.at[ind, i] = yo[i]

Selecting the attributes needed

# Offensive attributes:
yards to go\
play type\
yard Line number\
offense formation\
type of Drop back\
pre Snap visitor score\
pre Snap home score\
absolute yard line number\
is Defensive pi\
personnel O

----

# Defensive reactions:
defenders in the box\
number of pass rushers\
personnel D

----

# Other attributes(Determining attributes)
quarter\
down\
pass result\
offensive play result\
play result

1. How does defence react to different Offensive formations

In [None]:
play1 = play [['offenseFormation','defendersInTheBox','numberOfPassRushers','down','quarter','offensePlayResult']]

In [None]:
play1.corr()

In [None]:
sns.pairplot(play1, kind="scatter")
plt.show()
plt.savefig('plt1.png')

In [None]:
g  = sns.catplot(x="offenseFormation", y="offensePlayResult", data=play1 )
plt.savefig('plt2.png')

In [None]:
g  = sns.catplot(x="offenseFormation", y="defendersInTheBox", data=play1 )
plt.savefig('plt3.png')

In [None]:
play1.plot.scatter(x='defendersInTheBox',y='offensePlayResult')
plt.show()
plt.savefig('plt4.png')

In [None]:
sns.lmplot('defendersInTheBox','offensePlayResult', data=play1, hue='offenseFormation', fit_reg=False)
plt.show()
plt.savefig('plt5.png')

In [None]:
sns.lmplot('numberOfPassRushers','offensePlayResult', data=play1, hue='offenseFormation', fit_reg=False)
plt.show()
plt.savefig('plt6.png')

In [None]:
sns.lmplot('quarter','offensePlayResult', data=play1, hue='offenseFormation', fit_reg=False)
plt.show()
plt.savefig('plt7.png')

I_FORM

In [None]:
play_iform = play1[play1['offenseFormation'] == 'I_FORM']
play_iform

In [None]:
play_iform.plot.hist(y='defendersInTheBox')
plt.show()
plt.savefig('plt8.png')

In [None]:
play_iform.plot.hist(y='numberOfPassRushers')
plt.show()
plt.savefig('plt9.png')

Animation

In [None]:
form = list(dict(play['offenseFormation'].value_counts()).keys())
for i in form:
    print(i)
    print('WR',int(play[play['offenseFormation'] == i]['WR'].mean()))
    print('RB',int(play[play['offenseFormation'] == i]['RB'].mean()))
    print('LB',int(play[play['offenseFormation'] == i]['LB'].mean()))
    print('TE',int(play[play['offenseFormation'] == i]['TE'].mean()))
    print('DL',int(play[play['offenseFormation'] == i]['DL'].mean()))
    print('OL',int(play[play['offenseFormation'] == i]['OL'].mean()))
    print('DB',int(play[play['offenseFormation'] == i]['DB'].mean()))
    print()

In [None]:
field = mpimg.imread('https://upload.wikimedia.org/wikipedia/commons/thumb/c/c5/AmFBfield.svg/1200px-AmFBfield.svg.png')
imgplot = plt.imshow(field)
plt.show()
plt.savefig('mapdiag.png')

In [None]:
fig, ax = plt.subplots(figsize=(14,8))
imgplot = ax.imshow(field, extent=[0, 120, 0, 53.3])
plt.savefig('resize_field.png')

In [None]:
fig, ax = plt.subplots(figsize=(14,8))
imgplot = ax.imshow(field, extent=[0, 120, 0, 53.3])
ax.scatter(x = 70, y = 20, c='#FFA500',s=[300],edgecolors ='#000000')
plt.xlabel("X")
plt.ylabel("Y")

plt.show()
plt.savefig('map1.png')

In [None]:
for index, row in play.iterrows():
    dl = int(row.DL)
    db = int(row.DB)
    lb = int(row.LB)

    fig, ax = plt.subplots(figsize=(14,8))
    imgplot = ax.imshow(field, extent=[0, 120, 0, 53.3])

    y1 = int(50/dl)-5
    for i in range(dl):
        ax.scatter(x = 70, y =y1 , c='#FFA500',s=[300],edgecolors ='#000000')
        y1+=int(50/dl)

    y2 = int(50/db)-5
    for i in range(db):
        ax.scatter(x = 85, y =y2 , c='#FF0000',s=[300],edgecolors ='#000000')
        y2+=int(50/db)

    if lb == 2:
       y3 = int(50/lb)-10
    else:
        y3 = int(50/lb)-5

    for i in range(lb):
        ax.scatter(x = 100, y =y3 , c='#0000FF',s=[300],edgecolors ='#000000')
        y3+=int(50/lb)
        
    plt.xlabel("X")
    plt.ylabel("Y")
    break
    plt.savefig('map2.png')
    

In [None]:
fig, ax = plt.subplots(figsize=(14,8))
imgplot = ax.imshow(field, extent=[0, 120, 0, 53.3])
for index, row in play.iterrows():
    dl = int(row.DL)
    db = int(row.DB)
    lb = int(row.LB)

    y1 = int(50/dl)-5
    for i in range(dl):
        ax.scatter(x = 70, y =y1 , c='#FFA500',s=[300],edgecolors ='#000000')
        y1+=int(50/dl)

    y2 = int(50/db)-5
    for i in range(db):
        ax.scatter(x = 85, y =y2 , c='#FF0000',s=[300],edgecolors ='#000000')
        y2+=int(50/db)

    if lb == 2:
       y3 = int(50/lb)-10
    else:
        y3 = int(50/lb)-5

    for i in range(lb):
        ax.scatter(x = 100, y =y3 , c='#0000FF',s=[300],edgecolors ='#000000')
        y3+=int(50/lb)
    break

plt.xlabel("X")
plt.ylabel("Y")
plt.show()
plt.savefig('map3.png')

Analysis of each offense formation

In [None]:
play_shotgun = play[play['offenseFormation'] == 'SHOTGUN']

In [None]:
sns.lmplot('DB','offensePlayResult', data=play_shotgun, hue='down', fit_reg=False)
plt.show()
plt.savefig('plt10.png')

In [None]:
play_shotgun.plot.hist(y='offensePlayResult')
plt.show()
plt.savefig('plt11.png')

In [None]:
play_shotgun.plot.hist(y='DB')
plt.show()
plt.savefig('plt12.png')

In [None]:
plt.scatter(x=play_shotgun.DB,y=play_shotgun.offensePlayResult)
plt.title("Scatter plot")
plt.xlabel("DB")
plt.ylabel("Offense Score")
plt.savefig('plt13.png')

In [None]:
play_shotgun.DB.mean()

Analysis each combination vs each formation

In [None]:
play.DB.value_counts()

In [None]:
list1 = ['DB','DL','WR','RB','LB','TE','OL']
for i in list1:
   print(str(i),play[i].value_counts())
    

In [None]:
play.personnelD.value_counts()

In [None]:
form =  "SHOTGUN"

In [None]:
play_form = play[play['offenseFormation'] == form]
print(play_form.personnelD.value_counts())
print(len(play_form.personnelD.value_counts()))

In [None]:
for i, val in play_form.personnelD.value_counts().iteritems():
    print(i,
          "  Mean Score:",round(play_form[play_form['personnelD'] == i]['offensePlayResult'].mean()),
          " Standard Deviation:",math.sqrt(play_form[play_form['personnelD'] == i]['offensePlayResult'].var()),val)

In [None]:
mu=play_form[play_shotgun['personnelD'] == i]['offensePlayResult'].mean()
sigma = math.sqrt(play_form[play_shotgun['personnelD'] == "4 DL, 2 LB, 5 DB"]['offensePlayResult'].var())
x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
plt.plot(x, stats.norm.pdf(x, mu, sigma))
plt.title('Normal Distribution of offensive points')
plt.show()
plt.savefig('plt14.png')

In [None]:
positions = pd.DataFrame()

In [None]:
positions['Formation'] = ''
positions['meanYardsGiven']=0
positions['meanEpa']=0
positions['timesUsed']=0

positions.head()

In [None]:
play[['playResult','epa']].head(10)

In [None]:
count=0
count1=0
for i,row in play.iterrows():
    if row['epa']>row['playResult']:
        count+=1
    else:
        count1+=1
print('Correct',count)
print('Inorrect',count1)

In [None]:
print("Unfiltered mean")
for i, val in play_form.personnelD.value_counts().iteritems():
    dict1 = {'Formation': i,
             'meanYardsGiven' : (play_form[play_form['personnelD'] == i]['offensePlayResult'].mean()) ,
             'sdYardsGiven' : (math.sqrt(play_form[play_form['personnelD'] == i]['offensePlayResult'].var()))}
    print(dict1)

In [None]:
def filtered_mean(data,sd):
    f = 2
    data = data.to_numpy()
    if len(data) == 1:
        return data[0]
    mean = np.mean(data)
    final_data = [x for x in data if (x > (mean -(f*sd))) ]
    final_data = [x for x in data if (x < (mean +(f*sd))) ]
    return np.mean(final_data)

In [None]:
print("Filtered mean")
for i, val in play_form.personnelD.value_counts().iteritems():
    dict1 = {'Formation': i,
             'meanYardsGiven' :
             filtered_mean(play_form[play_form['personnelD'] == i]['offensePlayResult'],(math.sqrt(play_form[play_form['personnelD'] == i]['offensePlayResult'].var()))) ,
             'sdYardsGiven' : (math.sqrt(play_form[play_form['personnelD'] == i]['offensePlayResult'].var()))}
    print(dict1)

In [None]:
for i, val in play_form.personnelD.value_counts().iteritems():
    dict1 = {'Formation': i,
             'meanYardsGiven' :
             filtered_mean(play_form[play_form['personnelD'] == i]['offensePlayResult'],(math.sqrt(play_form[play_form['personnelD'] == i]['offensePlayResult'].var()))) ,
                'meanEpa'  : play_form[play_form['personnelD'] == i]['epa'].mean(),
                'timesUsed' : val}
    positions = positions.append(dict1, ignore_index=True)

In [None]:
positions = positions[positions.timesUsed > 10]

In [None]:
for ind in positions.index:
    lst = str(positions['Formation'][ind]).split(', ')
    yo={}
    if not lst:
         break
    for i in lst:
        x1 = re.search("([0-9])\s([A-Z][A-Z])", i)
        yo[x1[2]]=int(x1[1])
    for i in yo:
        positions.at[ind, i] = int(yo[i])

In [None]:
positions.meanYardsGiven = positions.meanYardsGiven.fillna(0)

positions.DL = positions.DL.astype(int)
positions.LB = positions.LB.astype(int)
positions.DB = positions.DB.astype(int)

In [None]:
positions['Weight'] = 0.0

In [None]:
for i,row in positions.iterrows():
    val = 200.0 - positions['meanYardsGiven'][i] - positions['meanEpa'][i]
    positions.at[i,'Weight'] = val

In [None]:
positions = positions.sort_values(['Weight', 'timesUsed'], ascending=[False, False])

Top 5 best defensive formations vs the given offense formation
Appropriate DL LB and DB values taking the weighted Average

In [None]:
dl = 0
for i in positions.index:
    dl += positions['Weight'][i]*positions['DL'][i]
dl/=positions.Weight.sum()
dl=round(dl).astype(int)
print("Appropriate DL value is:",dl)

In [None]:
lb = 0
for i in positions.index:
    lb += positions['Weight'][i]*positions['LB'][i]
lb/=positions.Weight.sum()
lb = int(round(lb))
print("Appropriate LB value is:",lb)

In [None]:
db = 0
for i in positions.index:
    db += positions['Weight'][i]*positions['DB'][i]
db/=positions.Weight.sum()
db = int(round(db))
print("Appropriate DB value is:",db)

In [None]:
print("The best formation is DL:",dl,"LB:",lb,"DB:",db)
print(dl+lb+db == 11)

In [None]:
fig, ax = plt.subplots(figsize=(14,8))
imgplot = ax.imshow(field, extent=[0, 120, 0, 53.3])

if dl == 2:
   y1 = int(50/lb)-3
elif dl == 3:
   y1 = int(50/lb)-5
else:
   y1 = int(50/dl)-5
    
for i in range(dl):
    ax.scatter(x = 70, y =y1 , c='#FFA500',s=[300],edgecolors ='#000000')
    y1+=int(50/dl)

if db == 2:
   y2 = int(50/lb)-3
elif db == 6:
   y2 = 6
else:
   y2 = int(50/lb)-5

for i in range(db):
    ax.scatter(x = 85, y =y2 , c='#FF0000',s=[300],edgecolors ='#000000')
    y2+=int(50/db)

if lb == 2:
   y3 = int(50/lb)-15
else:
   y3 = int(50/lb)-5

for i in range(lb):
    ax.scatter(x = 100, y =y3 , c='#0000FF',s=[300],edgecolors ='#000000')
    y3+=int(50/lb)
        
plt.xlabel("X")
plt.ylabel("Y")
plt.show()
plt.savefig('map5.png')

Interestingness Measure: Lift

In [None]:
d = 6
lift=0
num =0
den1=0
den2=0
for i in play.index:
    if play['offensePlayResult'][i] < play['epa'][i]:
        den2+=1
    if play['offensePlayResult'][i] < play['epa'][i] and play['DL'][i] ==dl:
        num+=1
    if play['DL'][i] ==dl:
        den1+=1
lift+=(num/(den1*den2))

In [None]:
for i in positions.index:
    if positions['meanYardsGiven'][i] < d:
        den2+=1
    if positions['meanYardsGiven'][i] < d and positions['LB'][i] ==lb:
        num+=1
    if positions['LB'][i] ==2:
        den1+=1
lift+=(num/(den1*den2))

In [None]:
for i in positions.index:
    if positions['meanYardsGiven'][i] < d:
        den2+=1
    if positions['meanYardsGiven'][i] < d and positions['DB'][i] ==db:
        num+=1
    if positions['DB'][i] ==2:
        den1+=1
if den1 == 0 or den2==0:
    lift+=0
else:
    lift+=(num/(den1*den2))

In [None]:
print(lift)

# What are coverage schemes (man, zone, etc) that the defense employs? What coverage options tend to be better performing?

In [None]:
cover = pd.read_csv("../input/nfl-big-data-bowl-2021-bonus/coverages_week1.csv")
w1 = pd.read_csv("../input/nfl-big-data-bowl-2021/week1.csv")
plays = pd.read_csv('../input/nfl-big-data-bowl-2021/plays.csv')

cover["coverage"] = cover["coverage"].replace({"Cover 0 Man": "Man","Cover 1 Man": "Man","Cover 2 Man": "Man","Cover 2 Zone": "Zone","Cover 3 Zone": "Zone","Cover 4 Zone": "Zone","Cover 6 Zone": "Zone"})

result = pd.merge(plays, cover, left_on='playId', right_index=True,how='left', sort=False);

result1 = result.dropna(how='any', subset=['coverage'])

res2 = pd.merge(w1, cover, left_on='playId', right_index=True,how='left', sort=False);

res2 = res2.dropna(how='any', subset=['coverage'])

psr = pd.DataFrame()
selected_columns = result1[["passResult","coverage"]]
psr = selected_columns.copy()

man = pd.DataFrame()
man1 = pd.DataFrame()
zone = pd.DataFrame()
zone1 = pd.DataFrame()

man = psr[psr['coverage'] == 'Man']
zone = psr[psr['coverage'] == 'Zone']

rut = pd.DataFrame()
selected_columns = res2[["route","coverage"]]
rut = selected_columns.copy()
rut = rut.dropna(how='any', subset=['route'])

man2 = pd.DataFrame()
zone2 = pd.DataFrame()

man2 = rut[rut['coverage'] == 'Man']
zone2 = rut[rut['coverage'] == 'Zone']

#indexNames = result1[result['coverage'] == "Prevent Zone" ].index
#result1.drop(indexNames , inplace=True)
#result1 = result.dropna(how='any', subset=['coverage'])
#result1["coverage"] = result1["coverage"].replace({"Man": "1","Zone": "2 "})
result1
#ax = sns.heatmap(result1)

In [None]:
for i, val in result1.coverage.value_counts().iteritems():
    print(i,
          "  Mean Score:",round(result1[result1['coverage'] == i]['epa'].mean()),
          " Standard Deviation:",math.sqrt(result1[result1['coverage'] == i]['epa'].var()),val)

In [None]:
CS = pd.DataFrame()
CS['timesUsed']=0

In [None]:
print("Unfiltered mean")
for i, val in result1.coverage.value_counts().iteritems():
    dict1 = {'Formation': i,
             'meanYardsGiven' : (result1[result1['coverage'] == i]['playResult'].mean()) ,
             'sdYardsGiven' : (math.sqrt(result1[result1['coverage'] == i]['playResult'].var()))}
    print(dict1)

In [None]:
def filtered_mean(data,sd):
    f = 2
    data = data.to_numpy()
    if len(data) == 1:
        return data[0]
    mean = np.mean(data)
    final_data = [x for x in data if (x > (mean -(f*sd))) ]
    final_data = [x for x in data if (x < (mean +(f*sd))) ]
    return np.mean(final_data)

In [None]:
print("Filtered mean")
for i, val in result1.coverage.value_counts().iteritems():
    dict1 = {'coverage': i,
             'meanYardsGiven' :
             filtered_mean(result1[result1['coverage'] == i]['playResult'],(math.sqrt(result1[result1['coverage'] == i]['playResult'].var()))) ,
             'sdYardsGiven' : (math.sqrt(result1[result1['coverage'] == i]['playResult'].var()))}
    print(dict1)

In [None]:
for i, val in result.coverage.value_counts().iteritems():
    dict1 = {'coverage': i,
             'meanYardsGiven' :
             filtered_mean(result1[result1['coverage'] == i]['playResult'],(math.sqrt(result1[result1['coverage'] == i]['playResult'].var()))) ,
                'meanEpa'  : result1[result1['coverage'] == i]['epa'].mean(),
                'timesUsed' : val}
    CS = CS.append(dict1, ignore_index=True)
CS    

In [None]:
for i,row in CS.iterrows():
    val = 10.0 - CS['meanYardsGiven'][i] - CS['meanEpa'][i]
    CS.at[i,'Weight'] = val

In [None]:
CS = CS[CS.coverage != "Prevent Zone"]
del CS['Weight']
CS

In [None]:
man['passResult'].value_counts(normalize=True)*100


In [None]:
zone['passResult'].value_counts(normalize=True)*100

In [None]:
zone = zone[zone.passResult != 'R']
zone = zone.dropna(how='any', subset=['passResult'])
zone['passResult'].unique()

plt.scatter(zone['passResult'].unique(),zone['passResult'].value_counts(normalize=True)*100)
plt.scatter(man['passResult'].unique(),man['passResult'].value_counts(normalize=True)*100, color = 'red')
plt.show()
plt.savefig('plt15.png')

In [None]:
plt.scatter(man['passResult'].unique(),man['passResult'].value_counts())
plt.show()
plt.savefig('plt16.png')

In [None]:
plt.plot(man['passResult'].unique(),man['passResult'].value_counts(),)
plt.plot(zone['passResult'].unique(),zone['passResult'].value_counts())
plt.show()
plt.savefig('plt17.png')

In [None]:
man2['route'].value_counts(normalize=True)*100

In [None]:
zone2['route'].value_counts(normalize=True)*100

In [None]:
plt.scatter(zone2['route'].unique(),zone2['route'].value_counts(normalize=True)*100)
plt.scatter(man2['route'].unique(),man2['route'].value_counts(normalize=True)*100, color = 'red')
plt.show()
plt.savefig('plt1.png')
plt.savefig('plt18.png')

In [None]:
result = pd.merge(plays, cover, left_on='playId', right_index=True,how='left', sort=False);

# Is there any way to use player tracking data to predict whether or not certain penalties – for example, defensive pass interference – will be called?

## * Importing all dataset files.

In [None]:
plays = pd.read_csv('../input/nfl-big-data-bowl-2021/plays.csv')
week1 = pd.read_csv('../input/nfl-big-data-bowl-2021/week1.csv')
week2 = pd.read_csv('../input/nfl-big-data-bowl-2021/week2.csv')
week3 = pd.read_csv('../input/nfl-big-data-bowl-2021/week3.csv')
week4 = pd.read_csv('../input/nfl-big-data-bowl-2021/week4.csv')
week5 = pd.read_csv('../input/nfl-big-data-bowl-2021/week5.csv')
week6 = pd.read_csv('../input/nfl-big-data-bowl-2021/week6.csv')
week7 = pd.read_csv('../input/nfl-big-data-bowl-2021/week7.csv')
week8 = pd.read_csv('../input/nfl-big-data-bowl-2021/week8.csv')
week9 = pd.read_csv('../input/nfl-big-data-bowl-2021/week9.csv')
week10 = pd.read_csv('../input/nfl-big-data-bowl-2021/week10.csv')
week11 = pd.read_csv('../input/nfl-big-data-bowl-2021/week11.csv')
week12 = pd.read_csv('../input/nfl-big-data-bowl-2021/week12.csv')
week13 = pd.read_csv('../input/nfl-big-data-bowl-2021/week13.csv')
week14 = pd.read_csv('../input/nfl-big-data-bowl-2021/week14.csv')
week15 = pd.read_csv('../input/nfl-big-data-bowl-2021/week15.csv')
week16 = pd.read_csv('../input/nfl-big-data-bowl-2021/week16.csv')
week17 = pd.read_csv('../input/nfl-big-data-bowl-2021/week17.csv')
week = pd.concat([week1, week2, week3, week4, week5, week6, week7, week8, week9, week10, week11, week12, week13, week14, week15, week16, week17], ignore_index=True)

## Data Preprocessing

In [None]:
weekMod=week[['gameId','playId','frameId','team','jerseyNumber','x','y','event','nflId']]
weekMod.head(20)
del week

In [None]:
plays = pd.read_csv('../input/nfl-big-data-bowl-2021/plays.csv')
play=plays[['gameId','playId','penaltyJerseyNumbers','isDefensivePI']]

In [None]:
play1=play[play.isDefensivePI==True]

In [None]:
play1=play1[['gameId','playId']]

In [None]:
play1
weekMod

In [None]:
newdf=pd.merge(weekMod,play1,on=['gameId','playId'])
newdf

In [None]:
weekMod=newdf
weekMod.to_csv('all_frames_with_DPI.csv')

## Finding distance from closest opponent player.

In [None]:
groupedWeek = weekMod.groupby(['gameId', 'playId', 'frameId'])
playerXY = {}
for name, group in groupedWeek:
    playerXY[name] = []
    for row in group.iterrows():
        data = [row[1]['nflId'], row[1]['team'], row[1]['x'], row[1]['y']]
        playerXY[name].append(data)

features = list(weekMod.columns)
weekArray = np.array(weekMod)
minOppDist = []
for player in weekArray:
    if player[features.index('team')] != 'football':
        opponentPositions = playerXY[(player[features.index('gameId')], player[features.index('playId')], player[features.index('frameId')])]
        distances = []
        opponents = []
        xs = []
        ys = []
        for oppPos in opponentPositions: 
            if player[features.index('team')] != oppPos[1] and player[features.index('team')] != 'football' and oppPos[1] != 'football':
                dx = (player[features.index('x')] - oppPos[2])**2
                dy = (player[features.index('y')] - oppPos[3])**2
                dist = np.sqrt(dx+dy)
                distances.append(dist)
                opponents.append(oppPos[0])
                xs.append(oppPos[2])
                ys.append(oppPos[3])
        minDist = min(distances)
        closestOpponent = opponents[np.argmin(distances)]
        opponentX = xs[np.argmin(distances)]
        opponentY = ys[np.argmin(distances)]
        summary = [player[features.index('gameId')], player[features.index('playId')], player[features.index('frameId')], player[features.index('nflId')], minDist, closestOpponent, opponentX, opponentY]
        minOppDist.append(summary)
        
minOppDist = pd.DataFrame(minOppDist, columns=['gameId', 'playId', 'frameId', 'nflId', 'oppMinDist', 'closestOpp(nflId)', 'oppX', 'oppY'])
weekMod = pd.merge(weekMod, minOppDist, how='left', on=['gameId', 'frameId', 'playId', 'nflId'])
oppVar = weekMod.groupby(['gameId', 'playId', 'event', 'nflId'])['oppMinDist'].agg(['var']).reset_index().rename(columns={"var": "oppVar"})
oppMean = weekMod.groupby(['gameId', 'playId', 'event', 'nflId'])['oppMinDist'].agg(['mean']).reset_index().rename(columns={"mean": "oppMean"})

In [None]:
weekMod

In [None]:
weekMod['oppMinDist'].describe()

In [None]:
boxplot = weekMod.boxplot(by='event', column=['oppMinDist'], grid = False)
plt.savefig("box1.png")

In [None]:
sns.set_style("whitegrid") 
sns.set(rc={'figure.figsize':(20,7)})
  
sns.boxplot(x = 'event', y = 'oppMinDist', data = weekMod) 
plt.savefig("box2.png")

The pass_tipped event has lowest values of oppMinDist which shows that tipping occurs when players are closest to one-another. And logically there are high chances of pass intercept following pass tipping.