In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt

# Separation & Coverage Area in the NFL 2018-19 Season


<img src="https://media.giphy.com/media/q6GOATYKkWN0VCYK86/giphy.gif">

# Introduction


In this report I aim to investigate two key areas of a team's defense, the separation of receivers, and the areas of the pitch covered by defenders. With respect to separation, I split player separation into x and y components and use these to look at the advantages and disadvantages of a defender's positioning in reference to the receiver. Regarding defensive areas, I introduce a metric 'Total Coverage Area' to determine how a defense is set up and if it can be used to draw conclusions on why a particular defense has an edge over others.

# Separation

In [None]:
# import data
import glob
df = pd.concat([pd.read_csv(f, encoding='latin1') for f in glob.glob('../input/nfl-big-data-bowl-2021/week*.csv')], ignore_index=True)
plays = pd.read_csv("../input/nfl-big-data-bowl-2021/plays.csv")
##targets csv from utility script
targets = pd.read_csv("../input/targets/targets.csv")


Separation between a defender and the targeted receiver is a crucial factor in whether the pass is successful and in turn whether the offence gains yards. For this section of the report we define separation as the distance in yards from the receiver to the closest defender at the time the ball is thrown by the quarterback. The data confirms our observations that separation is a key factor in pass completion. The histograms below show all passes in the dataset split by completion. A much higher proportion of passes with a defender close to the target resulted in an incompletion. 


In [None]:
#Get data at moment of pass & data on players that are running routes
passes = df[df['event']=='pass_forward']
route_runners = passes[passes['route'].notnull()].merge(targets, on=['playId', 'gameId', 'nflId'], how = 'left', suffixes=['', '_target'])
#Select defensive players & important columns
d_players = passes[passes['position'].isin(['CB', 'OLB', 'DB', 'DE', 'DL', 'ILB', 'FS', 'LB', 'MLB', 'NT', 'OLB', 'SS'])]
d_cols = d_players[['x', 'y', 's', 'a', 'dis', 'o', 'dir','nflId','displayName', 'jerseyNumber', 'position','team', 'gameId','playId']]
#Join Defenders to Offensive players, so we can compare co-ordinates (left join, so we can see every
#defender for every route runner)
join_df = route_runners.merge(d_cols, how = 'left', on = ['gameId', 'playId'], suffixes=['_off', '_def'])

In [None]:
#Define our distance metric
def distance(x,y):
    dist = np.sqrt(((x[0]-y[0])**2)+((x[1]-y[1])**2) )
    return dist

In [None]:
#Calculate separations between every player, remove null (play w/ no defense info), & find closest defender 
join_df['separations'] = join_df.apply(lambda row: distance([row['x_off'], row['y_off']], [row['x_def'],row['y_def']]), axis=1)
join_df = join_df[join_df['separations'].notnull()]
min_separations = join_df.loc[join_df.groupby(['displayName_off','playId','gameId']).separations.idxmin()]
#Merge with play data to get pass results
merge_df = min_separations.merge(plays[['yardsToGo','down','quarter','offensePlayResult', 'epa', 'passResult', 'gameId','playId']], on=['gameId', 'playId'], how='left')
#Select only the targets
targ_sep = merge_df[merge_df['target']==1]

In [None]:
plt.style.use('seaborn-bright')
fig, axs = plt.subplots(1, 1, figsize=(15, 5))
axs.hist(targ_sep[targ_sep['passResult']=='I']['separations'], alpha = 0.5, density = True, bins = 36, label='Incompletions', color = 'coral')
axs.hist(targ_sep[targ_sep['passResult']=='C']['separations'], alpha = 0.5, density = True, bins = 36, label='Completions', color = 'palegreen')
axs.vlines(x=targ_sep[targ_sep['passResult']=='C']['separations'].median(), ymin=0, ymax=0.28, linestyle=':', color='green', lw=4.0)
axs.vlines(x=targ_sep[targ_sep['passResult']=='I']['separations'].median(), ymin=0, ymax=0.34, linestyle=':', color='red', lw=4.0)
axs.text(x=2.4, y=0.3, s ='50% of incompletions occured \n when the defenders was within \n 2.2 yards of the receiver', color='red', fontsize=8)
axs.text(x=4.7, y=0.25, s ='Only 50% of completions occured \n when the defenders was within \n 4.5 yards of the receiver', color='green', fontsize=8)
axs.spines['right'].set_visible(False)
axs.spines['top'].set_visible(False)
axs.spines['left'].set_visible(False)
axs.axes.get_yaxis().set_visible(False)
axs.set_xlim(0,18)
axs.set_xlabel('Defender\'s Separation from Targeted Receiver (yards)')
#plt.hist(targ_sep[targ_sep['passResult']=='IN']['separations'], alpha = 0.4, density = False, bins = 20, label='interception')
axs.legend(loc='center', frameon=False)
plt.show()

The median values for each group is marked on the chart at 2.2 yards for incompletions and 4.5 yards for completions. Using these values and the counts of passes we can calculate the conditional probability of a successful pass given separation. There are 4924 passes in the dataset where separation is less than 2.2 yards, only 1973 of these were completed. This gives a pass completion rate of 40.1%. A similar method for separations greater than 4.5 yards gives a completion rate of 81.1%.

Further to this, we can split separation into an x and y component. This breakdown gives us more information on the type of separation between the defender and the target. An example of how this changed over time can be seen in the animation at the beginning of the document.

In [None]:
#X Distance Metric
def onex_distance(direction,x1,x2):
    if direction == 'right':
        x_dist = x2 - x1
    elif direction == 'left':
        x_dist = x1 - x2
    else:
        x_dist = 0
    return x_dist
#Y Distance Metric
def oney_distance(direction,y1,y2):
    if y1 > 53.3/2:
        y_dist = y2 - y1
    elif y1 <= 53.3/2:
        y_dist = y1 - y2
    else:
        y_dist = 0
    return y_dist

In [None]:
#Calculate the x & y components of separation
targ_sep=targ_sep.reset_index()
targ_sep.loc[:,'horiz_sep'] = targ_sep.apply(lambda row: onex_distance(row['playDirection'], row['x_off'], row['x_def']), axis=1)
targ_sep.loc[:,'vert_sep'] = targ_sep.apply(lambda row: oney_distance(row['playDirection'], row['y_off'], row['y_def']), axis=1)

The importance of the two separation types is clear when we plot the histograms. While the completions are fairly symmetric about zero with respect to y-separation, the x-separation histogram is shifted to the right showing that completions are much more frequent when the defender is behind the target. This is to be expected, a positive value for x-separation corresponds to the defender being between the receiver and the line of scrimmage, so it would make sense for them to be able to break up the pass more easily from this position. 

In [None]:
plt.style.use('seaborn-bright')
fig, axs = plt.subplots(2, 1, sharex=True, figsize=(14,7))
axs[0].hist(targ_sep[targ_sep['passResult']=='I']['horiz_sep'], alpha = 0.5, density = True, bins = 36, label='Incompletions', color = 'coral')
axs[0].hist(targ_sep[targ_sep['passResult']=='C']['horiz_sep'], alpha = 0.5, density = True, bins = 36, label='Completions', color = 'palegreen')
#axs[0].vlines(x=targ_sep[targ_sep['passResult']=='C']['horiz_sep'].median(), ymin=0, ymax=0.28, linestyle=':', color='green', lw=4.0)
#axs[0].vlines(x=targ_sep[targ_sep['passResult']=='I']['horiz_sep'].median(), ymin=0, ymax=0.34, linestyle=':', color='red', lw=4.0)
axs[0].text(x=-12, y=0.06, s ='Defender in front of target', color='black', fontsize=7)
axs[0].text(x=7, y=0.06, s ='Defender behind target', color='black', fontsize=7)
axs[0].arrow(-4, 0.05, -8, 0, length_includes_head=True, head_width=0.01, head_length=0.5)
axs[0].arrow(4, 0.05, 8, 0, length_includes_head=True, head_width=0.01, head_length=0.5)
axs[0].spines['right'].set_visible(False)
axs[0].spines['top'].set_visible(False)
axs[0].spines['left'].set_visible(False)
axs[0].axes.get_yaxis().set_visible(False)
axs[0].set_xlabel('X Separation from Targeted Receiver (yards)')
#axs[0].set_xlim(0,18)
#axs[0].set_xlabel('Defenders Separation from Targeted Receiver (yards)')
#plt.hist(targ_sep[targ_sep['passResult']=='IN']['separations'], alpha = 0.4, density = False, bins = 20, label='interception')
axs[0].legend( frameon=False)
##################
axs[1].hist(targ_sep[targ_sep['passResult']=='I']['vert_sep'], alpha = 0.5, density = True, bins = 36, label='Incompletions', color = 'coral')
axs[1].hist(targ_sep[targ_sep['passResult']=='C']['vert_sep'], alpha = 0.5, density = True, bins = 36, label='Completions', color = 'palegreen')
#axs[1].vlines(x=targ_sep[targ_sep['passResult']=='C']['vert_sep'].median(), ymin=0, ymax=0.28, linestyle=':', color='green', lw=4.0)
#axs[1].vlines(x=targ_sep[targ_sep['passResult']=='I']['vert_sep'].median(), ymin=0, ymax=0.34, linestyle=':', color='red', lw=4.0)
#axs[1].text(x=2.4, y=0.3, s ='50% of incompletions occured \n when the defenders was within \n 2.2 yards of the receiver', color='red', fontsize=8)
#axs[1].text(x=4.7, y=0.25, s ='Only 50% of completions occured \n when the defenders was within \n 4.5 yards of the receiver', color='green', fontsize=8)
axs[1].arrow(-4, 0.05, -8, 0, length_includes_head=True, head_width=0.01, head_length=0.5)
axs[1].arrow(4, 0.05, 8, 0, length_includes_head=True, head_width=0.01, head_length=0.5)
axs[1].text(x=-12, y=0.06, s ='Defender outside of target', color='black', fontsize=7)
axs[1].text(x=7, y=0.06, s ='Defender inside target', color='black', fontsize=7)
axs[1].spines['right'].set_visible(False)
axs[1].spines['top'].set_visible(False)
axs[1].spines['left'].set_visible(False)
axs[1].axes.get_yaxis().set_visible(False)
axs[1].set_xlim(-18,18)
axs[1].set_xlabel('Y Separation from Targeted Receiver (yards)')
plt.show()

If we take a similar approach to calculate pass completion percentage, we find that pass completion is only 57.3% when x-separation is less than 0. This is well below the 66.6% success rate when x-separation is greater than 0, this rises further to 76.5% if only x-separation > +2 is considered. It should also be noted that completion percentage does also increase if the x-separation is large enough in the negative direction. This could be due to the fact the ball can now be thrown over the defenders head, without the risk of the pass being broken up.

## Player differences

We can take these ideas and look at them on a granular player level. The two players below covered the targeted receiver over 90 times each in the dataset. We can see the difference between them in the x-separation, Munnerlyn has a balanced distribution, but Ramsey never strays in front of the target before the ball has been released. 

In [None]:
#Select the 2 players from df
secondary = targ_sep[targ_sep['displayName_def']=='Jalen Ramsey']
mun = targ_sep[targ_sep['displayName_def']=='Captain Munnerlyn']

In [None]:
plt.style.use('seaborn-bright')
fig, axs = plt.subplots(2, 2, sharex=True, figsize=(12,8))
axs[0,0].hist(secondary['horiz_sep'], alpha = 0.5, density = True, bins = 10, label='Incompletions', color = 'coral')
axs[0,0].text(x=-12, y=0.06, s ='Defender in front of target', color='black', fontsize=7)
axs[0,0].text(x=7, y=0.06, s ='Defender behind target', color='black', fontsize=7)
axs[0,0].arrow(-4, 0.05, -8, 0, length_includes_head=True, head_width=0.01, head_length=0.5)
axs[0,0].arrow(4, 0.05, 8, 0, length_includes_head=True, head_width=0.01, head_length=0.5)
axs[0,0].spines['right'].set_visible(False)
axs[0,0].spines['top'].set_visible(False)
axs[0,0].spines['left'].set_visible(False)
axs[0,0].axes.get_yaxis().set_visible(False)
axs[0,0].set_xlabel('X Separation from Targeted Receiver (yards)')
##################
axs[1,0].hist(secondary['vert_sep'], alpha = 0.5, density = True, bins = 10, label='Incompletions', color = 'coral')
axs[1,0].arrow(-4, 0.05, -8, 0, length_includes_head=True, head_width=0.01, head_length=0.5)
axs[1,0].arrow(4, 0.05, 8, 0, length_includes_head=True, head_width=0.01, head_length=0.5)
axs[1,0].text(x=-12, y=0.06, s ='Defender outside of target', color='black', fontsize=7)
axs[1,0].text(x=7, y=0.06, s ='Defender inside target', color='black', fontsize=7)
axs[1,0].spines['right'].set_visible(False)
axs[1,0].spines['top'].set_visible(False)
axs[1,0].spines['left'].set_visible(False)
axs[1,0].axes.get_yaxis().set_visible(False)
axs[1,0].set_xlim(-18,18)
axs[1,0].set_xlabel('Y Separation from Targeted Receiver (yards)')
##################
axs[0,1].hist(mun['horiz_sep'], alpha = 0.5, density = True, bins = 10, label='Incompletions', color = 'coral')
axs[0,1].text(x=-12, y=0.06, s ='Defender in front of target', color='black', fontsize=7)
axs[0,1].text(x=7, y=0.06, s ='Defender behind target', color='black', fontsize=7)
axs[0,1].arrow(-4, 0.05, -8, 0, length_includes_head=True, head_width=0.01, head_length=0.5)
axs[0,1].arrow(4, 0.05, 8, 0, length_includes_head=True, head_width=0.01, head_length=0.5)
axs[0,1].spines['right'].set_visible(False)
axs[0,1].spines['top'].set_visible(False)
axs[0,1].spines['left'].set_visible(False)
axs[0,1].axes.get_yaxis().set_visible(False)
axs[0,1].set_xlabel('X Separation from Targeted Receiver (yards)')
##################
axs[1,1].hist(mun['vert_sep'], alpha = 0.5, density = True, bins = 10, label='Incompletions', color = 'coral')
axs[1,1].arrow(-4, 0.05, -8, 0, length_includes_head=True, head_width=0.01, head_length=0.5)
axs[1,1].arrow(4, 0.05, 8, 0, length_includes_head=True, head_width=0.01, head_length=0.5)
axs[1,1].text(x=-12, y=0.06, s ='Defender outside of target', color='black', fontsize=7)
axs[1,1].text(x=7, y=0.06, s ='Defender inside target', color='black', fontsize=7)
axs[1,1].spines['right'].set_visible(False)
axs[1,1].spines['top'].set_visible(False)
axs[1,1].spines['left'].set_visible(False)
axs[1,1].axes.get_yaxis().set_visible(False)
axs[1,1].set_xlim(-18,18)
axs[1,1].set_xlabel('Y Separation from Targeted Receiver (yards)')
axs[0,0].set_title('Jalen Ramsey')
axs[0,1].set_title('Captain Munnerlyn')
plt.show()

This could lead us to believe that Ramsey is unlikely to break up as many plays as Munnerlyn, but in fact the Rams’ cornerback causes one of the highest percentages of incompletions in the league when covering a targeted receiver. It is testament to the quality of his play that he is able to disrupt so many passes without the risk of moving in front of the receiver before the ball is thrown.

We can see a full player breakdown by x-separation and pass completion when covering the target below, with notable players highlighted.

In [None]:
#Select players that gave defended more than 6 passes
targ_sep1 = targ_sep[targ_sep.groupby('displayName_def').displayName_def.transform(len) > 6]
#Find the median and mean of x-separation for each player
xsep_med = targ_sep1.groupby('displayName_def')['horiz_sep'].median().reset_index()
xsep_mean = targ_sep1.groupby('displayName_def')['horiz_sep'].mean().reset_index()
#Calculate completion % of passes defended
pres = targ_sep1.groupby(['displayName_def','passResult', 'position_def'])['vert_sep'].count()
percs = pres.groupby(level=0).apply(lambda x:100 * x / float(x.sum())).reset_index()
complete_percs = percs[percs['passResult']=='C'].reset_index()
complete_percs.loc[:,'inc'] = 100-complete_percs['vert_sep']
#Join these two dfs
fulldf = complete_percs.merge(xsep_med, how='inner', on='displayName_def')
fulldf = fulldf[fulldf['inc']>0]

In [None]:
#Find interesting players
kazee = fulldf[fulldf['displayName_def']=='Damontae Kazee']
hooker = fulldf[fulldf['displayName_def']=='Malik Hooker']
reid = fulldf[fulldf['displayName_def']=='Eric Reid']
wright = fulldf[fulldf['displayName_def']=='K.J. Wright']
thompson = fulldf[fulldf['displayName_def']=='Tedric Thompson']

In [None]:
fig, axs = plt.subplots(1,1, figsize=(8,8))
axs.scatter(fulldf['horiz_sep'], fulldf['inc'],s =8, color='grey')
axs.scatter(kazee['horiz_sep'], kazee['inc'],s =16, color='red')
axs.scatter(hooker['horiz_sep'], hooker['inc'],s =16, color='red')
axs.scatter(reid['horiz_sep'], reid['inc'],s =16, color='red')
axs.scatter(wright['horiz_sep'], wright['inc'],s =16, color='red')
axs.scatter(thompson['horiz_sep'], thompson['inc'],s =16, color='red')
axs.text(kazee['horiz_sep']+0.1, kazee['inc']-4, 'Damontae Kazee', size=8)
axs.text(reid['horiz_sep']+0.1, reid['inc']+2, 'Eric Reid', size=8)
axs.text(thompson['horiz_sep']+0.1, thompson['inc']-4, 'Tedric Thompson', size=8)
axs.text(wright['horiz_sep']+0.1, wright['inc']+2, 'K.J. Wright', size=8)
axs.text(hooker['horiz_sep']+0.1, hooker['inc']-4, 'Malik Hooker', size=8)
axs.spines['right'].set_visible(False)
axs.spines['top'].set_visible(False)
axs.set_xlabel('x seperation')
axs.set_ylabel('Incompletion %')
plt.show()

This metric is biased on the defender’s playing position. Ideally it would show, no matter the player's position, a fair description of how a player defends a receiver and classes how succesful this method is. As we see on the plot below, there are large differences depending on where the player starts on the pitch. This discrepancy could come from the fact that safeties and cornerbacks are deeper when the ball is snapped, leading them to be much more likely to be behind a receiver when the ball is thrown. This would not then be a fair comparison to linebackers, as receivers would have more time run behind them, even if the ball was thrown quickly. This position disparity could also come from how we classify the covering defender. They are the player closest to the receiver when the ball is thrown, but this may not always be true given a player's orientation and current speed. 

In [None]:
#Tools for plotting Convex Hull
from scipy.spatial import ConvexHull
def encircle(x,y, ax=None, **kw):
    if not ax: ax=plt.gca()
    p = np.c_[x,y]
    hull = ConvexHull(p)
    poly = plt.Polygon(p[hull.vertices,:], **kw)
    axs.add_patch(poly)
#Plot
fig, axs = plt.subplots(1,1, figsize=(7,5))
#axs.scatter(fulldf['horiz_sep'], fulldf['inc'],s =8, color='grey')
axs.plot(fulldf[fulldf['position_def'].isin(['OLB','MLB', 'LB', 'ILB'])].horiz_sep, fulldf[fulldf['position_def'].isin(['OLB','MLB', 'LB', 'ILB'])].inc, marker='.', linestyle='', markersize=7, color='yellow', alpha = 0.6)
encircle(fulldf[fulldf['position_def'].isin(['OLB','MLB', 'LB', 'ILB'])].horiz_sep, fulldf[fulldf['position_def'].isin(['OLB','MLB', 'LB', 'ILB'])].inc, ec="yellow", fc='yellow', alpha=0.1, label='Linebackers')
axs.plot(fulldf[fulldf['position_def'].isin(['FS','SS'])].horiz_sep, fulldf[fulldf['position_def'].isin(['FS','SS'])].inc, marker='.', linestyle='', markersize=7, color='blue', alpha = 0.6)
encircle(fulldf[fulldf['position_def'].isin(['FS','SS'])].horiz_sep, fulldf[fulldf['position_def'].isin(['FS','SS'])].inc, ec="blue", fc='blue', alpha=0.1, label='Safeties')
axs.plot(fulldf[fulldf['position_def'].isin(['CB','DB'])].horiz_sep, fulldf[fulldf['position_def'].isin(['CB','DB'])].inc, marker='.', linestyle='', markersize=7, color='red', alpha = 0.6)
encircle(fulldf[fulldf['position_def'].isin(['CB','DB'])].horiz_sep, fulldf[fulldf['position_def'].isin(['CB','DB'])].inc, ec="red", fc='red', alpha=0.1, label='Cornerbacks')
axs.spines['right'].set_visible(False)
axs.spines['top'].set_visible(False)
axs.set_xlabel('x seperation')
axs.set_ylabel('Incompletion %')
axs.legend()
plt.show()

## Completion - Yards Gained Trade Off

The advantage of the defender staying behind the receiver can be seen when we look at yards gained on completed passes. Using similar histograms, we can show that although a defender being in front of the receiver has a better chance of breaking up the pass, if the pass is completed then the play is likely to go for more yards than if he was positioned behind the receiver. It suggests the ball was thrown over the defender and the receiver is more likely to have space downfield to run into and gain yards. 

In [None]:
#Add x-separation to min_separations df
min_separations=min_separations#.reset_index()
min_separations.loc[:,'horiz_dist'] = min_separations.apply(lambda row: onex_distance(row['playDirection'], row['x_off'], row['x_def']), axis=1)
#Select Targets
route = min_separations[(min_separations['target']==1)]
#Find statistics of min, max, sum of each target runner (Think this is duplicating above...)
x_separation_dist = route.groupby(['playId','gameId', 'route']).agg({'horiz_dist' : ['sum','min', 'max','mean']}).reset_index()
x_separation_dist.columns = ['playId','gameId','route','sum','min', 'max','mean']
#Merge with plays for yards gained and EPA
x_dist_df = x_separation_dist.merge(plays[['yardsToGo','down','quarter','offensePlayResult', 'epa', 'passResult', 'gameId','playId']], on=['gameId', 'playId'], how='left')
#Look at just first and 10 plays in the first and third quarters
fandtenq13 = x_dist_df[(x_dist_df['yardsToGo']==10)&(x_dist_df['down']==1)&(x_dist_df['quarter'].isin([1,3]))]
#Also Pick out completions
ften_comp = fandtenq13[fandtenq13['passResult']=='C']
ften_comp.dropna(inplace=True)

In [None]:
#Calculate values for lines
import statsmodels.api as sm
import statsmodels.formula.api as smf
#LAD
mod = smf.quantreg('offensePlayResult ~ mean', ften_comp)
res = mod.fit(q=.5)
#print(res.summary())
abline_values_comp = [-0.6711 * i + 11.6040 for i in ften_comp['mean']]

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(12,6))
axs[0].hist(fandtenq13[fandtenq13['passResult']=='C']['mean'], density=True, alpha=0.5, bins=20, label = 'complete', color = 'palegreen');
axs[0].hist(fandtenq13[fandtenq13['passResult']=='I']['mean'], density=True, alpha=0.5, bins=20,label = 'incomplete',color = 'coral');
axs[0].set_xlabel('X Separation to Target (Yards)')
axs[0].set_ylabel('Density')
axs[0].spines['right'].set_visible(False)
axs[0].spines['top'].set_visible(False)
axs[0].legend()
axs[1].scatter(ften_comp['mean'], ften_comp['offensePlayResult'], color='palegreen', s=7, alpha =0.5, label ='plays')
axs[1].plot(np.unique(ften_comp['mean']), np.poly1d(np.polyfit(ften_comp['mean'], ften_comp['offensePlayResult'], 1))(np.unique(ften_comp['mean'])), label ='OLS Fit')
axs[1].plot(ften_comp['mean'], abline_values_comp, ':', color='red', label ='LAD Fit')
axs[1].set_xlabel('X separation of target when ball is thrown')
axs[1].set_ylabel('Yards Gained')
axs[1].arrow(-1, -6, -8, 0, head_width=1)
axs[1].arrow(1, -6, 8, 0, head_width=1)
axs[1].text(-12, -10.5, 'In front of receiver', fontsize=7)
axs[1].text(7, -10.5, 'Behind receiver', fontsize=7)
axs[1].spines['right'].set_visible(False)
axs[1].spines['top'].set_visible(False)
axs[1].legend()
plt.show()

This may seem an obvious observation, but it highlights the significance of players like Ramsey being able to stay behind the defender when the ball is thrown and still break up passes. 

# Total Coverage Area

Another way of quantifying a defense is to look at the area that the players are capable of covering. To do this I developed a new metric, ‘Player Coverage Area’. It is calculated by drawing a circle around the defensive player which has a radius proportional to the distance they are from the ball. It represents an area that the defender could reach when the ball is thrown. This area will be larger the further downfield a defender is, and it will grow proportioinally with the square of the distance from the ball. It can also be visualised dynamically, as in the animation below. 

<img src="https://media.giphy.com/media/lZxDs0p1uiNGBQrLN9/giphy.gif">

Initially I investigated whether passes that finished inside a defenders coverage circle were more likely to be incomplete, as the defender has a greater chance of reaching the ball and breaking the pass up. In fact, despite varying the scaling factor of the circle’s radius, often the ratio of complete:incomplete passes was exactly the same for passes inside a circle compared to those that finished outside. 

On a team level I was interested to find any patterns of the total area covered by a team’s defense, the ‘Total Coverage Area’ (TCA), the sum of each defensive player’s coverage area. To do this I used a grid sampling algorithm to ensure the area estimate did not double count the areas of any overlapping circles. The area was also bounded by the end of the endzone and the sidelines so that any area that was out of bounds was not included in the sum. I looked for any correlations of the TCA to yards gained, EPA or the current game state. No clear patterns emerged and the distribution of each team’s TCA were all very similar.

TCA can be used as an alternative measure to show how defenses line up. Rather than just giving the positions of players, we can give a value of TCA. A larger TCA value signifies a deeper defense with more defenders covering deep passes, they are further from the quarterback. The differences in TCA when the offense is fielding varying numbers of wide receivers, and also the number of defensive backs lining up in the secondary are shown in the histograms below. There is a lot of overlap in TCA values for the number of wide receivers faced or defensive backs on the play. This is why I think TCA can be more useful for describing a defense. It gives a sense of how the players are positioned on the pitch in comparison to the quarter back, rather than just a list of positions on the play. TCA can be measured at any point during a play, but the examples below use the moment the ball was thrown.

In [None]:
#Get first downs joined to defensive players
first = plays[(plays['yardsToGo']==10)&(plays['down']==1)]
defense_p = d_players.merge(first, on = ['gameId', 'playId'], how = 'left')
fball_loc = passes[passes['displayName']=='Football']

In [None]:
#Calculate distance from every player to the ball, and give a radius value dependent on this
ball_dist = []
for i in range(len(defense_p)):
    play = defense_p.iloc[i]['playId']
    game = defense_p.iloc[i]['gameId']
    ball = fball_loc[(fball_loc['playId']==play)&(fball_loc['gameId']==game)]
    try:
        ball_dist.append(distance([defense_p.iloc[i]['x'], defense_p.iloc[i]['y']], [ball.iloc[0]['x'],ball.iloc[0]['y']]))
    except:
        ball_dist.append(0)
defense_p['ball'] = ball_dist
defense_p['radius'] = (defense_p['ball']/2)

In [None]:
#Create the Circles for each player and use Grid Sampling to find the total area
from collections import namedtuple
 
Circle = namedtuple("Circle", "x y r")
 
#circles = []
#for i in range(len(test_snap)):
    #circ = Circle(test_snap['x'].iloc[i], test_snap['y'].iloc[i], test_snap['radius'].iloc[i])
    #circles.append(circ)

def main(list_of_circles):
    # compute the bounding box of the circles
    x_min = max(0,min(c.x - c.r for c in list_of_circles))
    x_max = min(120,max(c.x + c.r for c in list_of_circles))
    y_min = max(0, min(c.y - c.r for c in list_of_circles))
    y_max = min(53.3, max(c.y + c.r for c in list_of_circles))
 
    box_side = 200
 
    dx = (x_max - x_min) / box_side
    dy = (y_max - y_min) / box_side
 
    count = 0
 
    for r in range(box_side):
        y = y_min + r * dy
        for c in range(box_side):
            x = x_min + c * dx
            if any((x-circle.x)**2 + (y-circle.y)**2 <= (circle.r ** 2)
                   for circle in list_of_circles):
                count += 1
 
    return(count * dx * dy)
defense_p['circle'] = defense_p.apply(lambda x: Circle(x['x'],x['y'],x['radius']), axis =1)
#Then Group by game and play, and sum
group_circles = defense_p.groupby(['gameId', 'playId']).agg({'circle':lambda x: list(x)}).reset_index()
group_circles['area'] = group_circles.apply(lambda x: main(x['circle']), axis=1)

In [None]:
#join to play to get offense and defense personnel
players = group_circles.merge(plays[['possessionTeam','yardsToGo','down','offensePlayResult', 'epa', 'passResult', 'gameId','playId','offenseFormation','defendersInTheBox','numberOfPassRushers', 'personnelO', 'personnelD']], on=['gameId', 'playId'], how='left')

In [None]:
#remove sacks etc, with no pass point
plot_df = players[players['area']>0]
#remove NaNs, missing personnel data in some rows
plot_df = plot_df[plot_df['personnelO'].notna()]
#Get no. of WRs & DBs on each play 
import re
plot_df['WRS'] = plot_df.apply(lambda x: int(re.findall(r'([0-9.]) WR', x['personnelO'])[0]), axis=1)
plot_df['DBS'] = plot_df.apply(lambda x: int(re.findall(r'([0-9.]) DB', x['personnelD'])[0]), axis=1)

In [None]:
plt.style.use('seaborn-bright')
fig, ax = plt.subplots(figsize=(12, 10), nrows=4, sharey=True, sharex=True)
ax[0].scatter(plot_df[plot_df['WRS']==1]['area'], plot_df[plot_df['WRS']==1]['offensePlayResult'],alpha=0.15, s=8, color='#00008B')
ax[1].scatter(plot_df[plot_df['WRS']==2]['area'], plot_df[plot_df['WRS']==2]['offensePlayResult'],alpha=0.15, s=8, color='#00008B')
ax[2].scatter(plot_df[plot_df['WRS']==3]['area'], plot_df[plot_df['WRS']==3]['offensePlayResult'],alpha=0.15, s=8, color='#00008B')
ax[3].scatter(plot_df[plot_df['WRS']==4]['area'], plot_df[plot_df['WRS']==4]['offensePlayResult'],alpha=0.15, s=8, color='#00008B')
ax0 = ax[0].twinx()
ax1 = ax[1].twinx()
ax2 = ax[2].twinx()
ax3 = ax[3].twinx()
ax0.hist(plot_df[plot_df['WRS']==1]['area'], bins=50, density = True, alpha = 0.2, color='#8B008B')
ax1.hist(plot_df[plot_df['WRS']==2]['area'], bins=50, density = True, alpha = 0.2, color='#8B008B')
ax2.hist(plot_df[plot_df['WRS']==3]['area'], bins=50, density = True, alpha = 0.2, color='#8B008B')
ax3.hist(plot_df[plot_df['WRS']==4]['area'], bins=50, density = True, alpha = 0.2, color='#8B008B')
ax[0].set_title('1 Wide Receiver')
ax[1].set_title('2 Wide Receiver')
ax[2].set_title('3 Wide Receiver')
ax[3].set_title('4 Wide Receiver')
#ax[0].set_xlabel('Total Coverage Area of Defense')
fig.text(0.5, 0.08, 'Total Coverage Area', ha='center')
fig.text(0.08, 0.5, 'Yards Gained', ha='center', rotation=90)
for i in [0,1,2,3]:
    ax[i].spines['right'].set_visible(False)
    ax[i].spines['top'].set_visible(False)
    #ax[i].spines['left'].set_visible(False)
    ax[i].set_xlim([0, 3500])
for j in [ax0, ax1,ax2,ax3]:
    j.spines['right'].set_visible(False)
    j.spines['top'].set_visible(False)
    j.spines['left'].set_visible(False)
    j.axes.get_yaxis().set_visible(False)
plt.show()

In [None]:
plt.style.use('seaborn-bright')
fig, ax = plt.subplots(figsize=(12, 10), nrows=4, sharey=True, sharex=True)
ax[0].scatter(plot_df[plot_df['DBS']==4]['area'], plot_df[plot_df['DBS']==4]['offensePlayResult'],alpha=0.15, s=8, color='#00008B')
ax[1].scatter(plot_df[plot_df['DBS']==5]['area'], plot_df[plot_df['DBS']==5]['offensePlayResult'],alpha=0.15, s=8, color='#00008B')
ax[2].scatter(plot_df[plot_df['DBS']==6]['area'], plot_df[plot_df['DBS']==6]['offensePlayResult'],alpha=0.15, s=8, color='#00008B')
ax[3].scatter(plot_df[plot_df['DBS']==7]['area'], plot_df[plot_df['DBS']==7]['offensePlayResult'],alpha=0.15, s=8, color='#00008B')
ax0 = ax[0].twinx()
ax1 = ax[1].twinx()
ax2 = ax[2].twinx()
ax3 = ax[3].twinx()
ax0.hist(plot_df[plot_df['DBS']==4]['area'], bins=50, density = True, alpha = 0.2, color='#8B008B')
ax1.hist(plot_df[plot_df['DBS']==5]['area'], bins=50, density = True, alpha = 0.2, color='#8B008B')
ax2.hist(plot_df[plot_df['DBS']==6]['area'], bins=50, density = True, alpha = 0.2, color='#8B008B')
ax3.hist(plot_df[plot_df['DBS']==7]['area'], bins=50, density = True, alpha = 0.2, color='#8B008B')
ax[0].set_title('4 Defensive Backs')
ax[1].set_title('5 Defensive Backs')
ax[2].set_title('6 Defensive Backs')
ax[3].set_title('7 Defensive Backs')
#ax[0].set_xlabel('Total Coverage Area of Defense')
fig.text(0.5, 0.08, 'Total Coverage Area', ha='center')
fig.text(0.08, 0.5, 'Yards Gained', ha='center', rotation=90)
for i in [0,1,2,3]:
    ax[i].spines['right'].set_visible(False)
    ax[i].spines['top'].set_visible(False)
    #ax[i].spines['left'].set_visible(False)
    ax[i].set_xlim([0, 3500])
for j in [ax0, ax1,ax2,ax3]:
    j.spines['right'].set_visible(False)
    j.spines['top'].set_visible(False)
    j.spines['left'].set_visible(False)
    j.axes.get_yaxis().set_visible(False)
plt.show()

I believe that this is an interesting, but too simplistic metric. While it is informative for describing a team line-up, further developments would be needed to improve the metric to a stage it could be used to inform choices for a team. I would try to include more information in the circles defined for each defender. This could be changing to an ellipse with the defender at one focal point and incorporating the current speed and orientation of the player into the size and shape of the ellipse.

## Conclusion

This report has highlighted the trade-off defenders face with respect to separation when covering a receiver. By being in front of the receiver when the ball is thrown, they increase the probability of breaking the pass up, but if the pass is completed it is more likely the play will result in more yards gained. Total Coverage Area was introduced as a metric to track defensive coverage during a play and we looked into using it to identify a correlation with yards gained by the offense.