In [None]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import glob
import matplotlib.pyplot as plt
import matplotlib.patches as patches
pd.set_option('max_columns', 1000)
from tqdm import tqdm
from sklearn.neighbors import BallTree
import math
from scipy.spatial import Voronoi, voronoi_plot_2d
from datetime import datetime
import pytz
from IPython.display import HTML
import scipy.stats as stats
import matplotlib as mpl
from matplotlib import animation, rc, use
from matplotlib.patches import Rectangle, Arrow
import tensorflow as tf
from matplotlib.patches import Polygon
import matplotlib.patheffects as pe
import gc


def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    
    return df


def get_dx_dy(radian_angle, dist):
    dx = dist * math.cos(radian_angle)
    dy = dist * math.sin(radian_angle)
    return dx, dy


def create_football_field(linenumbers=True,
                          endzones=True,
                          highlight_line=False,
                          highlight_line_number=50,
                          highlighted_name='Line of Scrimmage',
                          fifty_is_los=False,
                          figsize=(12*2, 6.33*2)):
    """
    Function that plots the football field for viewing plays.
    Allows for showing or hiding endzones.
    """
    rect = patches.Rectangle((0, 0), 120, 53.3, linewidth=0.1,
                             edgecolor='r', facecolor='slategrey', zorder=0)

    fig, ax = plt.subplots(1, figsize=figsize)
    ax.add_patch(rect)

    plt.plot([10, 10, 10, 20, 20, 30, 30, 40, 40, 50, 50, 60, 60, 70, 70, 80,
              80, 90, 90, 100, 100, 110, 110, 120, 0, 0, 120, 120],
             [0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3,
              53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 53.3, 0, 0, 53.3],
             color='white')
    if fifty_is_los:
        plt.plot([60, 60], [0, 53.3], color='gold')
        plt.text(62, 50, '<- Player Yardline at Snap', color='gold')
    # Endzones
    if endzones:
        ez1 = patches.Rectangle((0, 0), 10, 53.3,
                                linewidth=0.3,
                                edgecolor='k',
                                facecolor='royalblue',
                                alpha=0.4,
                                zorder=1)
        ez2 = patches.Rectangle((110, 0), 120, 53.3,
                                linewidth=0.3,
                                edgecolor='k',
                                facecolor='royalblue',
                                alpha=0.4,
                                zorder=1)
        ax.add_patch(ez1)
        ax.add_patch(ez2)
    plt.xlim(0, 120)
    plt.ylim(0, 53.3)
    plt.axis('off')
    if linenumbers:
        for x in range(20, 110, 10):
            numb = x
            if x > 50:
                numb = 120 - x
            plt.text(x, 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='white')
            plt.text(x - 0.95, 53.3 - 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='white', rotation=180)
    if endzones:
        hash_range = range(11, 110)
    else:
        hash_range = range(1, 120)

    for x in hash_range:
        ax.plot([x, x], [0.4, 0.7], color='white')
        ax.plot([x, x], [53.0, 52.5], color='white')
        ax.plot([x, x], [22.91, 23.57], color='white')
        ax.plot([x, x], [29.73, 30.39], color='white')

    if highlight_line:
        hl = highlight_line_number + 10
        plt.plot([hl, hl], [0, 53.3], color='yellow')
        plt.text(hl + 2, 50, '<- {}'.format(highlighted_name),
                 color='yellow')
    return fig, ax



class CreateNFLData:

    def __init__(self):
        pass

    def LoadData(self, Normal=True):
        if Normal == True:
            print("Loading Original Data")
            globbed_files = glob.glob("week*.csv") #creates a list of all csv files
            data = []
            for csv in tqdm(globbed_files):
                frame = pd.read_csv(csv, index_col=0)
                data.append(frame)

            WeekData = pd.concat(data).reset_index()
            WeekData
        
        else:
            print("Loading Modified Data")
            globbed_files = glob.glob("Revised Data/*.csv") #creates a list of all csv files
            data = []
            for csv in tqdm(globbed_files):
                frame = pd.read_csv(csv, index_col=0)
                data.append(frame)

            WeekData = pd.concat(data).reset_index()
            WeekData
        return WeekData



    def Standardize(self,W):
        print("Standardizing Data..")
        W['Dir_rad'] = np.mod(90 - W.dir, 360) * math.pi/180.0
        W['ToLeft'] = W.playDirection == "left"
        W['TeamOnOffense'] = "home"
        W.loc[W.possessionTeam != W.PlayerTeam, 'TeamOnOffense'] = "away"
        W['IsOnOffense'] = W.PlayerTeam == W.TeamOnOffense # Is player on offense?
        W['YardLine_std'] = 100 - W.yardlineNumber
        W.loc[W.yardlineSide.fillna('') == W.possessionTeam,  
                'YardLine_std'
                ] = W.loc[W.yardlineSide.fillna('') == W.possessionTeam,  
                'yardlineNumber']
        W['X_std'] = W.x
        W.loc[W.ToLeft, 'X_std'] = 120 - W.loc[W.ToLeft, 'x'] 
        W['Y_std'] = W.y
        W.loc[W.ToLeft, 'Y_std'] = 160/3 - W.loc[W.ToLeft, 'y'] 
        #W['Orientation_std'] = -90 + W.Orientation
        #W.loc[W.ToLeft, 'Orientation_std'] = np.mod(180 + W.loc[W.ToLeft, 'Orientation_std'], 360)
        W['Dir_std'] = W.Dir_rad
        W.loc[W.ToLeft, 'Dir_std'] = np.mod(np.pi + W.loc[W.ToLeft, 'Dir_rad'], 2*np.pi)
        W['dx'] = round(W['s']*np.cos(W['Dir_std']),2)
        W['dy'] = round(W['s']*np.sin(W['Dir_std']),2)
        W['X_std'] = round(W['X_std'],2)
        W['Y_std'] = round(W['Y_std'],2)
        #W['Orientation_rad'] = np.mod(W.o, 360) * math.pi/180.0
        W['Orientation_rad'] = np.mod(-W.o + 90, 360) * math.pi/180.0
        W['Orientation_std'] = W.Orientation_rad
        W.loc[W.ToLeft, 'Orientation_std'] = np.mod(np.pi + W.loc[W.ToLeft, 'Orientation_rad'], 2*np.pi)
        W['MPH'] = W['s'] / 0.488889
        return W

    
    def FrameData(self,WeekData1):
        NotNone = WeekData1.query('event != "None"')
        NotNone = NotNone.groupby(['gameId','playId','event'])['frameId'].max().reset_index()
        NotNone = NotNone.set_index(['gameId','playId','event'], drop= True).unstack('event').reset_index()
        NotNone.columns = [' '.join(col).strip() for col in NotNone.columns.values]
        NotNone.columns = NotNone.columns.str.replace('frameId' , '')
        NotNone.columns = NotNone.columns.str.replace(' ' , '')
        NotNone['Code'] = NotNone['gameId'].astype(str) + "-" + NotNone['playId'].astype(str)
        NotNone = NotNone.set_index('Code')
        NotNone = NotNone.loc[~NotNone.index.duplicated(keep='first')]

        for col in tqdm(NotNone.columns):
            NotNone['Contains_' + str(col)] = np.where(NotNone[col] > 0, True, False)

        Cols = ['ball_snap', 'man_in_motion', 'pass_arrived', 'pass_forward','pass_outcome_caught', 'play_action', 'run_pass_option', 'Contains_man_in_motion', 'Contains_pass_arrived', 'Contains_pass_forward', 'Contains_pass_outcome_caught', 'Contains_play_action','Contains_run_pass_option']
    #   WeekData1 = pd.merge(df, NotNone, how="left", left_on=['gameId','playId'], right_on=['gameId','playId'] )
        for col in Cols:
            WeekData1[col] = WeekData1.Code.map(NotNone[col])

        del NotNone
        gc.collect()

        WeekData1['After_snap'] = np.where(WeekData1['frameId'] > WeekData1['ball_snap'],1,0)
        WeekData1['After_Throw'] = np.where(WeekData1['frameId'] > WeekData1['pass_forward'],1,0)
        WeekData1['After_PassArrived'] = np.where(WeekData1['frameId'] > WeekData1['pass_arrived'],1,0)
        WeekData1['After_PlayAction'] = np.where(WeekData1['frameId'] > WeekData1['play_action'],1,0)
    #   WeekData1['After_run_pass_option'] = np.where(WeekData1['frameId'] > WeekData1['run_pass_option'],1,0)
        WeekData1['After_Catch'] = np.where(WeekData1['frameId'] > WeekData1['pass_outcome_caught'],1,0)

        
        LOS = WeekData1.query('displayName == "Football" & After_snap == 0')
        LOS = LOS.groupby(['gameId','playId'])['X_std','Y_std'].agg('median').reset_index()
        LOS.columns = ['gameId','playId','LOSX','LOSY']
        LOS['Code'] = LOS['gameId'].astype(str) + "-" + LOS['playId'].astype(str)
        LOS = LOS.set_index('Code')
        LOS = LOS.loc[~LOS.index.duplicated(keep='first')]
        WeekData1["LOSX"] = WeekData1.Code.map(LOS['LOSX'])
        WeekData1["LOSY"] = WeekData1.Code.map(LOS['LOSY'])
        WeekData1['Distfrom_LOSX'] = WeekData1['X_std'] - WeekData1['LOSX']
        WeekData1['Distfrom_LOSY'] = WeekData1['Y_std'] - WeekData1['LOSY']
        WeekData1['AbsDistfrom_LOSX'] = np.abs(WeekData1['X_std'] - WeekData1['LOSX'])
        WeekData1['AbsDistfrom_LOSY'] = np.abs(WeekData1['Y_std'] - WeekData1['LOSY'])
        del LOS
        gc.collect()
        return WeekData1
    
    def import_data(self,file,columns=False,cols=""):
        """create a dataframe and optimize its memory usage"""
        if columns == False:
            df = pd.read_csv(file, low_memory=False)
            df = reduce_mem_usage(df)
        else:
            df = pd.read_csv(file, low_memory=False, usecols=cols)
            df = reduce_mem_usage(df)
        return df
    
    






class AnimatePlay:
    def __init__(self, play_df,player_id=[], Tri = False, MPH = False,Text="",Show='jerseyNumber',method='all' ) -> None:
        self._MAX_FIELD_Y = 53.3
        self._MAX_FIELD_X = 120
        self._MAX_FIELD_PLAYERS = 22
        

        self.Tri = Tri
        self.MPH = MPH
        self.player_id = player_id
        self.Show = Show
        self.method = method
        self.Text = Text

        self._CPLT = sns.color_palette("husl", 2)
        self._frame_data = play_df
        self._times = sorted(play_df.time.unique())
        self._stream = self.data_stream()
        
        self._date_format = "%Y-%m-%dT%H:%M:%S.%fZ" 
        self._mean_interval_ms = np.mean([delta.microseconds/1000 for delta in np.diff(np.array([pytz.timezone('US/Eastern').localize(datetime.strptime(date_string, self._date_format)) for date_string in self._times]))])
        
        self._fig, self._ax_field = create_football_field()

        self._fig.set_figheight(10)
        self._fig.set_figwidth(15)
        
        self._fig.tight_layout()
        
        self._ax_field = plt.gca()
        
        self._ax_home = self._ax_field.twinx()
        self._ax_away = self._ax_field.twinx()
        self._ax_jersey = self._ax_field.twinx()

        self.ani = animation.FuncAnimation(self._fig, self.update, frames=len(self._times), interval = self._mean_interval_ms, 
                                          init_func=self.setup_plot, blit=False)
        
        plt.close()
       
    @staticmethod
    def set_axis_plots(ax, max_x, max_y) -> None:
        ax.xaxis.set_visible(False)
        ax.yaxis.set_visible(False)

        ax.set_xlim([0, max_x])
        ax.set_ylim([0, max_y])
        
    @staticmethod
    def convert_orientation(x):
        return (-x + 90)%360
    
    @staticmethod
    def polar_to_z(r, theta):
        return r * np.exp( 1j * theta)
    
    @staticmethod
    def deg_to_rad(deg):
        return deg*np.pi/180
        
    def data_stream(self):
        for time in self._times:
            yield self._frame_data[self._frame_data.time == time]
    
    def setup_plot(self): 
        self.set_axis_plots(self._ax_field, self._MAX_FIELD_X, self._MAX_FIELD_Y)
        
        ball_snap_df = self._frame_data[(self._frame_data.event == 'ball_snap') & (self._frame_data.team == 'football')]
        self._ax_field.axvline(ball_snap_df.X_std.to_numpy()[0], color = 'yellow', linestyle = '--')
        
        self.set_axis_plots(self._ax_home, self._MAX_FIELD_X, self._MAX_FIELD_Y)
        self.set_axis_plots(self._ax_away, self._MAX_FIELD_X, self._MAX_FIELD_Y)
        self.set_axis_plots(self._ax_jersey, self._MAX_FIELD_X, self._MAX_FIELD_Y)
        
        for idx in range(10,120,10):
            self._ax_field.axvline(idx, color = 'k', linestyle = '-', alpha = 0.05)
            
        self._scat_field = self._ax_field.scatter([], [], s = 200, color = 'red')
        self._scat_home = self._ax_home.scatter([], [], s = 900, color = self._CPLT[0], edgecolors = 'k')
        self._scat_away = self._ax_away.scatter([], [], s = 900, color = self._CPLT[1], edgecolors = 'k')
        
        self._scat_jersey_list = []
        self._scat_number_list = []
        self._scat_name_list = []
        self._scat_mph_list = []
        self._a_dir_list = []
        self._a_or_list = []
        self._a_tri_list = []
        for _ in range(self._MAX_FIELD_PLAYERS):
            self._scat_jersey_list.append(self._ax_jersey.text(0, 0, '', horizontalalignment = 'center', verticalalignment = 'center', c = 'black',fontweight='bold',fontsize='large',path_effects=[pe.withStroke(linewidth=3, foreground="white")]))
            self._scat_number_list.append(self._ax_jersey.text(0, 0, '', horizontalalignment = 'center', verticalalignment = 'center', c = 'white',fontweight='bold',fontsize=14,path_effects=[pe.withStroke(linewidth=5, foreground="dodgerblue")]))
            self._scat_name_list.append(self._ax_jersey.text(0, 0, '', horizontalalignment = 'center', verticalalignment = 'center', c = 'black',fontweight='bold',fontsize='larger',path_effects=[pe.withStroke(linewidth=5, foreground="gold")]))
            self._scat_mph_list.append(self._ax_jersey.text(0, 0, '', horizontalalignment = 'center', verticalalignment = 'center', c = 'lime',fontweight='bold',fontsize='larger'))

            self._a_dir_list.append(self._ax_field.add_patch(Arrow(0, 0, 0, 0, color = 'k')))
            self._a_or_list.append(self._ax_field.add_patch(Arrow(0, 0, 0, 0, color = 'k')))
            self._a_tri_list.append(self._ax_field.add_patch(Arrow(0, 0, 0, 0, color = 'k')))
            
        return (self._scat_field, self._scat_home, self._scat_away,*self._scat_mph_list, *self._scat_jersey_list, *self._scat_number_list, *self._scat_name_list)
        
    def update(self, anim_frame):
        pos_df = next(self._stream)
        
        for label in pos_df.team.unique():
            label_data = pos_df[pos_df.team == label]

            if label == 'football':
                self._scat_field.set_offsets(np.hstack([label_data.X_std, label_data.Y_std]))
            elif label == 'home':
                self._scat_home.set_offsets(np.vstack([label_data.X_std, label_data.Y_std]).T)
            elif label == 'away':
                self._scat_away.set_offsets(np.vstack([label_data.X_std, label_data.Y_std]).T)

        jersey_df = pos_df[pos_df.jerseyNumber.notnull()]
        
        for (index, row) in pos_df[pos_df.jerseyNumber.notnull()].reset_index().iterrows():
            self._scat_jersey_list[index].set_position((row.X_std, row.Y_std))
            self._scat_jersey_list[index].set_text(row.position)
            if self.method == 'single':
                try:
                    self._scat_number_list[index].set_text(np.where(np.isin(row.nflId,self.player_id) == True,str(self.Text) +" "+ str(round(row[self.Show],2)),""))
                    self._scat_number_list[index].set_position((row.X_std, row.Y_std+2.4))
                except:
                    self._scat_number_list[index].set_text(np.where(np.isin(row.nflId,self.player_id) == True,str(self.Text) +" "+ str(row[self.Show]),""))
                    self._scat_number_list[index].set_position((row.X_std, row.Y_std+2.4))
                    pass
            else:
                try:
                    self._scat_number_list[index].set_text(str(round(row[self.Show],2)))
                    self._scat_number_list[index].set_position((row.X_std, row.Y_std+2.4))
                except:
                    self._scat_number_list[index].set_text(row[self.Show])
                    self._scat_number_list[index].set_position((row.X_std, row.Y_std+2.4))
                    pass               

            self._scat_name_list[index].set_text(np.where(row.frameId <= 10,row.displayName.split()[-1],""))
            self._scat_name_list[index].set_position((row.X_std, row.Y_std-1.9))
            if self.MPH == True:
                self._scat_mph_list[index].set_text(np.where((row.s / 0.488889) > 17,str(round(float(row.s / 0.488889),2)) + " MPH",""))
                self._scat_mph_list[index].set_position((row.X_std, row.Y_std+1.9))
            else:
                pass

            player_vel = np.array([row.dx, row.dy])
            player_orient = np.array([np.real(self.polar_to_z(3, row.Orientation_std)), np.imag(self.polar_to_z(3, row.Orientation_std))])
            
            self._a_dir_list[index].remove()
            self._a_dir_list[index] = self._ax_field.add_patch(Arrow(row.X_std, row.Y_std, player_vel[0], player_vel[1], color = 'black'))
            
            self._a_or_list[index].remove()
            self._a_or_list[index] = self._ax_field.add_patch(Arrow(row.X_std, row.Y_std, player_orient[0], player_orient[1], color = 'blue', width = 2))

            if self.Tri == True:
                if (self.method == 'single') & (np.isin(row.nflId,self.player_id) == True):
                    self._a_tri_list[index].remove()
                    self._a_tri_list[index] = self._ax_field.add_patch(Polygon([[row.X_std, row.Y_std], [row.X_std_COpp,row.Y_std_COpp],[row.X_std_QB,row.Y_std_QB]], closed=True, fill=False, hatch='/',color='lime'))
                else:
      #              self._a_tri_list[index].remove()
      #              self._a_tri_list[index] = self._ax_field.add_patch(Polygon([[row.X_std, row.Y_std], [row.X_std_COpp,row.Y_std_COpp],[row.X_std_QB,row.Y_std_QB]], closed=True, fill=False, hatch='/',color='lime'))
                    pass
            else:
                pass
        
        return (self._scat_field, self._scat_home, self._scat_away, *self._scat_jersey_list, *self._scat_number_list, *self._scat_name_list)

In [None]:
import pandas as pd
import numpy as np
import os
import seaborn as sns
import glob
from tqdm import tqdm

import matplotlib.pyplot as plt
import matplotlib.patches as patches
pd.set_option('max_columns', 1000)

from sklearn.neighbors import BallTree

#from BDBUtils.Utilities import CreateNFLData
from IPython.core.display import HTML
import time
import math
from scipy.spatial import Voronoi, voronoi_plot_2d
from datetime import datetime
import pytz
from IPython.display import HTML
import scipy.stats as stats
import matplotlib as mpl
from matplotlib import animation, rc
from matplotlib.patches import Rectangle, Arrow
import tensorflow as tf
#from BDBUtils.Utilities import CreateNFLData

import glob
import os

np.set_printoptions(suppress=True)

import gc

def convert_orientation(x):
    return (x)%360

def deg_to_rad(deg):
        return deg*np.pi/180



Create = CreateNFLData()

start = time.process_time()

Weeks = range(1,18)

#globbed_files = glob.glob("../input/revised-data/*.csv") #creates a list of all csv files
data = []
for n in tqdm(Weeks):
    filename = '../input/revised-data/week' + str(n) + '.csv'
    frame = Create.import_data(filename,columns=True, cols=['week', 'gameId', 'playId', 'frameId', 'time', 'nflId', 'displayName', 'jerseyNumber', 'position', 'team', 'X_std', 'Y_std', 'Dir_std', 'dx', 'dy', 'Orientation_std', 's', 'MPH', 'a', 'dis', 'event','route', 'PlayerTeam','yardlineNumber', 'YardLine_std', 'OnOffense',  'closestOpp_Id', 'Opp_Dist_COpp','route_COpp', 'X_std_COpp', 'Y_std_COpp', 'Dir_std_COpp', 'dx_COpp', 'dy_COpp', 'Orientation_std_COpp', 'MPH_COpp', 's_COpp', 'a_COpp', 'dis_COpp', 'position_COpp', 'Pos_Rank_COpp','closestTeam_Id', 'Team_Dist_CTm', 'nflId_y_CTm', 'route_CTm', 'X_std_CTm', 'Y_std_CTm','QB_Dist_QB', 'X_std_QB', 'Y_std_QB', 'Orientation_std_QB', 'FootDist', 'Targeted'])
    frame['Code'] = frame['gameId'].astype(str) + "-" + frame['playId'].astype(str)
    plays = pd.read_csv('../input/nfl-big-data-bowl-2021/plays.csv', usecols=['gameId', 'playId','down', 'yardsToGo','penaltyCodes', 'penaltyJerseyNumbers', 'passResult', 'offensePlayResult', 'playResult', 'epa', 'isDefensivePI','offenseFormation',	'personnelO',	'defendersInTheBox',	'personnelD',	'typeDropback','playType'])
    plays['Code'] = plays['gameId'].astype(str) + "-" + plays['playId'].astype(str)
    plays = plays.set_index('Code')
    plays = plays.loc[~plays.index.duplicated(keep='first')]
    Cols = ['playId','down', 'yardsToGo','penaltyCodes', 'penaltyJerseyNumbers', 'passResult', 'offensePlayResult', 'playResult', 'epa', 'isDefensivePI','offenseFormation',	'personnelO',	'defendersInTheBox',	'personnelD',	'typeDropback','playType']
    for col in Cols:
        frame[col] = frame.Code.map(plays[col])
    Obs = frame.select_dtypes(include=['object']).columns.to_list()
    frame[Obs] = frame[Obs].astype('category')
    data.append(frame)
    del plays
    gc.collect()
    del frame
    del Cols
    del Obs
    gc.collect()

print("finish")
WeekData = pd.concat(data).reset_index()
del data
gc.collect()


Finaldf1 = Create.FrameData(WeekData)
del WeekData
gc.collect()
Finaldf1.drop(['index','Code', 'ball_snap', 'man_in_motion', 'pass_arrived', 'pass_forward', 'pass_outcome_caught', 'play_action', 'run_pass_option'], axis=1, inplace=True)
Finaldf1.memory_usage().sum() / (1024**2)

gc.collect()
Finaldf1.memory_usage().sum() / (1024**2)

Finaldf1['QBslope'] = deg_to_rad(convert_orientation(np.rad2deg(np.arctan2(Finaldf1['Y_std_QB'] - Finaldf1['Y_std'], Finaldf1['X_std_QB'] - Finaldf1['X_std']))))
Finaldf1['WRslope'] = deg_to_rad(convert_orientation(np.rad2deg(np.arctan2(Finaldf1['Y_std_COpp'] - Finaldf1['Y_std'],Finaldf1['X_std_COpp'] - Finaldf1['X_std']))))

Finaldf1['QBslope1'] = convert_orientation(np.rad2deg(np.arctan2(Finaldf1['Y_std_QB'] - Finaldf1['Y_std'], Finaldf1['X_std_QB'] - Finaldf1['X_std'])))
Finaldf1['WRslope1'] = convert_orientation(np.rad2deg(np.arctan2(Finaldf1['Y_std_COpp'] - Finaldf1['Y_std'],Finaldf1['X_std_COpp'] - Finaldf1['X_std'])))
Finaldf1['Def_Or'] = convert_orientation(np.rad2deg(Finaldf1['Orientation_std']))
Finaldf1['Diff_QB'] = Finaldf1['QBslope1'] - Finaldf1['Def_Or']
Finaldf1['Diff_WR'] = Finaldf1['WRslope1'] - Finaldf1['Def_Or']

Finaldf1['Diff_QB'] = abs(np.where(Finaldf1['Diff_QB'] < -180,Finaldf1['Diff_QB'] + 360,Finaldf1['Diff_QB'] ))
Finaldf1['Diff_QB'] = abs(np.where(Finaldf1['Diff_QB'] > 180,Finaldf1['Diff_QB'] - 360,Finaldf1['Diff_QB'] ))

Finaldf1['Diff_WR'] = abs(np.where(Finaldf1['Diff_WR'] < -180,Finaldf1['Diff_WR'] + 360,Finaldf1['Diff_WR'] ))
Finaldf1['Diff_WR'] = abs(np.where(Finaldf1['Diff_WR'] > 180,Finaldf1['Diff_WR'] - 360,Finaldf1['Diff_WR'] ))

Finaldf1['Player_POV'] = np.where(Finaldf1['Diff_QB'] < Finaldf1['Diff_WR'],"QB",Finaldf1['position_COpp'])
Finaldf1['Looking_AtQB'] = np.where(Finaldf1['Diff_QB'] < Finaldf1['Diff_WR'],1,0)

Finaldf1['diffDir'] = np.absolute(Finaldf1['Dir_std'] - Finaldf1['Dir_std_COpp'])

Finaldf1['disRatio'] = Finaldf1['Opp_Dist_COpp'] / np.sqrt((Finaldf1['X_std_COpp'] - Finaldf1['X_std_CTm'])**2 + (Finaldf1['Y_std_COpp'] - Finaldf1['Y_std_CTm'])**2)

Finaldf1['Event2'] = np.where(Finaldf1['After_snap'] == 0,"Before Snap","After Snap - Before Throw")
Finaldf1['Event2'] = np.where((Finaldf1['After_Throw'] == 1 & (Finaldf1['After_PassArrived'] == 0)),"Ball in the Air", Finaldf1['Event2'])

Finaldf1['EventCount'] = Finaldf1.groupby(['gameId','playId','Event2'])['Event2'].transform('count') / Finaldf1.groupby(['gameId','playId'])['nflId'].transform('nunique')
Finaldf1['EventOrder'] = Finaldf1.groupby(['gameId','playId','Event2'])['frameId'].rank(ascending=True, method='dense').astype(int)
Finaldf1['EventPct'] = Finaldf1['EventOrder'] / Finaldf1['EventCount']

Finaldf1['Partition'] = np.where(Finaldf1['EventPct'] > (1/2), "2nd Phase", "1st Phase")

Finaldf1['Group'] = Finaldf1['Event2'] + "-" + Finaldf1['Partition']

Finaldf1['Player_POV'] = Finaldf1['Player_POV'].astype('category')
Finaldf1['Event2'] = Finaldf1['Event2'].astype('category')
Finaldf1['Partition'] = Finaldf1['Partition'].astype('category')
Finaldf1['Group'] = Finaldf1['Group'].astype('category')

In [None]:
Example = Finaldf1.query('displayName == "Allen Hurns" & gameId == 2018110500 & playId == 1918 & After_snap == 1').filter(['gameId','playId','frameId','nflId','displayName','a'], axis=1).head(30)
Example['a'] = Example['a'].astype(np.float64)

def highlight_max(s):
    is_max = s.isin(s.nlargest(n=2, keep='first'))
    return ['background-color: yellow' if v else '' for v in is_max]

# Defender Evaluation: One-Cut Routes + Double Moves

Some may argue that the most critical aspect of man-to-man coverage relies on the cornerback’s ability to stay with the receiver after he makes his cut. The modern-day NFL route tree has evolved to a point where nearly every route involves some sort of cut, or multiple cuts.

We want to quantify how well the defender can stay with the receiver after he makes his initial cut on one-cut routes, and second cut on double-moves.

**Single-Cut Routes:** Hitches, Slants, Outs, Digs (Ins), and Comebacks

**Double-Move Routes:** Out n’ up's, Slant n' Go's (Sluggos), Hitch n’ Go's


![](https://media.giphy.com/media/wdIRaIIikhq7LHRV9a/giphy.gif)

# Identify the Cut

Above you will see Allen Hurns run a double move called a "Sluggo" route, in which his initial cut led Malcolm Butler to believe he's running a slant, only to make a second cut up field blowing right by him for an easy touchdown.

This is a perfect example to show how we can pin-point the exact frames Allen Hurns made both of his cuts. We can do this by analyzing Hurns's acceleration percent change frame-by-frame.

Below you will see Hurns's acceleration ("a") frame-by-frame, using a pct_change function and lagging by -1, the 2 largest percent increases in the acceleration help us identify when Hurns made each cut.

-------------------------------------------------------------

In [None]:
Example['a_pct_change'] = Example.groupby(['gameId','playId','nflId']).apply(lambda x: x['a'].shift(-1).pct_change()).reset_index(0,drop=True).fillna(0).T

Example.style.apply(highlight_max, subset=['a_pct_change'])

------------------------------------------------

In [None]:
%%time
import sys
import warnings
from pandas.core.common import SettingWithCopyWarning

warnings.simplefilter(action="ignore", category=SettingWithCopyWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)

# Find outside receivers

OutsideWR = Finaldf1.query('OnOffense == True & After_PassArrived == 0 & After_snap == 1 & After_Catch == 0 & EventOrder == 1 & Group == "After Snap - Before Throw-1st Phase"')#.groupby(['gameId','playId'])['Y_std'].agg(['min','max']).reset_index()
OutsideWR['Min'] = OutsideWR.groupby(['gameId','playId'])['Y_std'].transform('min')
OutsideWR['Max'] = OutsideWR.groupby(['gameId','playId'])['Y_std'].transform('max')

OutsideWR = OutsideWR.groupby(['gameId','playId'])['Min','Max'].agg('first').reset_index()

OutsideWR['Code'] = OutsideWR['gameId'].astype(str) + "-" + OutsideWR['playId'].astype(str)
OutsideWR = OutsideWR.set_index('Code')
OutsideWR = OutsideWR.loc[~OutsideWR.index.duplicated(keep='first')]

#Find defender orientation - are they looking at the receiver?


Finaldf1['WRslope1_Opp'] = convert_orientation(np.rad2deg(np.arctan2(Finaldf1['Y_std'] - Finaldf1['Y_std_COpp'],Finaldf1['X_std'] - Finaldf1['X_std_COpp'])))
Finaldf1['Def_Or_Opp'] = convert_orientation(np.rad2deg(Finaldf1['Orientation_std_COpp']))
Finaldf1['Diff_WR_Opp'] = Finaldf1['WRslope1_Opp'] - Finaldf1['Def_Or_Opp']


Finaldf1['Diff_WR_Opp'] = abs(np.where(Finaldf1['Diff_WR_Opp'] < -180,Finaldf1['Diff_WR_Opp'] + 360,Finaldf1['Diff_WR_Opp'] ))
Finaldf1['Diff_WR_Opp'] = abs(np.where(Finaldf1['Diff_WR_Opp'] > 180,Finaldf1['Diff_WR_Opp'] - 360,Finaldf1['Diff_WR_Opp'] ))

from scipy.signal import argrelextrema

def get_maxima(x):
    return x.iloc[argrelextrema(x['a_pct_change'].values,np.greater)]

def f(x):    
    return x['a_pct_change'] + x['playId'] + x['nflId'] + x['gameId']
#2018100706

df = Finaldf1.query('Targeted == 1 & position == "WR" & After_PassArrived == 0 & After_snap == 1 & After_Catch == 0').filter(['week','gameId','playId','frameId','time','event','nflId','displayName','route','EventOrder','Event2','closestOpp_Id','position_COpp','passResult','down','yardsToGo','offensePlayResult','isDefensivePI','epa','After_Throw','Distfrom_LOSY','a','a_COpp','Opp_Dist_COpp','X_std','Y_std','dx','dx_COpp','dy','dy_COpp','Dir_std','Dir_std_COpp','Diff_WR_Opp'], axis=1)




df['Code'] = df['gameId'].astype(str) + "-" + df['playId'].astype(str)

# Find outside receivers

df['minY'] = df.Code.map(OutsideWR['Min'])

df['maxY'] = df.Code.map(OutsideWR['Max'])




del OutsideWR
gc.collect()




df['a'] = df['a'].astype(np.float64)
df['a_COpp'] = df['a_COpp'].astype(np.float64)
df['X_std'] = df['X_std'].astype(np.float64)
df['Opp_Dist_COpp'] = df['Opp_Dist_COpp'].astype(np.float64)

# Apply Acceleration pct change function

df['a_pct_change'] = df['a'].shift(-1).pct_change()
df['a_pct_change'] = df.groupby(['gameId','playId','nflId']).apply(lambda x: x['a'].shift(-1).pct_change()).reset_index(0,drop=True).reset_index(0,drop=True).reset_index(0,drop=True).fillna(0).T

df['a_COpp_pct_change'] = df.groupby(['gameId','playId','nflId']).apply(lambda x: x['a_COpp'].shift(-1).pct_change()).reset_index(0,drop=True).reset_index(0,drop=True).reset_index(0,drop=True).fillna(0).T

df['Opp_Dist_COpp_change'] = df.groupby(['gameId','playId','nflId']).apply(lambda x: x['Opp_Dist_COpp'].shift(-1).pct_change()).reset_index(0,drop=True).reset_index(0,drop=True).reset_index(0,drop=True).fillna(0).T

# Cumulative sum of yards from line of scrimmage, will help us determine specific routes. i.e cut at 10 yds = 10 yd Hitch

df['XYards'] = df.groupby(['gameId','playId','nflId']).apply(lambda x: x['X_std'].diff(1).cumsum()).reset_index(0,drop=True).reset_index(0,drop=True).reset_index(0,drop=True).fillna(0).T

df['YYards'] = df.groupby(['gameId','playId','nflId']).apply(lambda x: x['Y_std'].diff(1).cumsum()).reset_index(0,drop=True).reset_index(0,drop=True).reset_index(0,drop=True).fillna(0).T

df['OutsideWR'] = np.where((df.groupby(['gameId','playId','nflId'])['Y_std'].transform('first') == df['maxY']) | (df.groupby(['gameId','playId','nflId'])['Y_std'].transform('first') == df['minY']), 1,0)

# Is the receiver on the left or right side of the formation

df['Ballsnap_pos'] = np.where(df.groupby(['gameId','playId','nflId'])['Distfrom_LOSY'].transform('first') > 0,0,1) 


# Press or no press coverage

df['DefenderInitial'] = df.groupby(['gameId','playId','nflId'])['Opp_Dist_COpp'].transform('first')

df['PressCoverage'] = np.where(df['DefenderInitial'] <= 3, 1,0)

df['a_pct_change_max'] = df.groupby(['gameId','playId','nflId'])['a_pct_change'].transform('max')
df['a_pct_change_max'] = df.where((df['After_Throw'] == 0) & (df['EventOrder'] >= 5)).groupby(['gameId','playId','nflId'])['a_pct_change'].transform('max')

df = df.replace([np.inf, -np.inf], 0)

df['Code'] = df.apply(f, axis=1)

# May seem confusing but we are trying to find the 1-2 maxima pct changes for each play. We have all of the pct changes in a list, however some plays share the same pct change #, so we can add pct change + playid + gameid + nflid to create unique values in the list
# instead of using iterrows function, this method actually saves us a lot of time even though it is unorthodox


df['a_pct_change_max'] = df[df['Code'].where((df['After_Throw'] == 0) & (df['EventOrder'] >= 5) ).isin(df.where((df['After_Throw'] == 0) & (df['EventOrder'] >= 5) ).groupby(['gameId','playId','nflId']).apply(get_maxima).reset_index(0,drop=True).reset_index(0,drop=True).reset_index(0,drop=True).groupby(['gameId','playId','nflId'])['a_pct_change'].nlargest(2, keep='all').to_frame().reset_index().query('a_pct_change > .35').apply(f, axis=1).to_list())]

# Find cut frames

df['after_receiver_cut'] = np.where(df['a_pct_change_max'].notnull(),1,0)
df['after_receiver_cut'] = df.groupby(['gameId','playId','nflId'])['after_receiver_cut'].cumsum()

df['a_pct_change_max'] = df.where((df['After_Throw'] == 0) & (df['EventOrder'] >= 5)).groupby(['gameId','playId','nflId','after_receiver_cut'])['a_pct_change'].transform('max')
df['a_pct_change_max'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['a_pct_change_max'].transform('max')

df['receiver_cutframe'] = np.where(df['a_pct_change'] == df['a_pct_change_max'] ,df['frameId'],0)
df['receiver_cutframe'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['receiver_cutframe'].transform('max')

df['a_COpp_pct_change_max'] = df.where((df['EventOrder'] >= df['receiver_cutframe'])).groupby(['gameId','playId','nflId','after_receiver_cut'])['a_COpp_pct_change'].transform('max')
df['a_COpp_pct_change_max'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['a_COpp_pct_change_max'].transform('max')


df['Def_cutframe'] = np.where(df['a_COpp_pct_change'] == df['a_COpp_pct_change_max'] ,df['frameId'],0)
df['Def_cutframe'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['Def_cutframe'].transform('max')





df = df.replace([np.inf, -np.inf], 0)

# Only keep data on/after receiver is made their first cut

df = df.query('after_receiver_cut >= 1')


df['a_pct_change_max'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['a_pct_change'].transform('max')
df['a_COpp_pct_change_max'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['a_COpp_pct_change'].transform('max')

df['receiver_cutframe'] = np.where(df['a_pct_change'] == df['a_pct_change_max'] ,df['frameId'],0)
df['receiver_cutframe'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['receiver_cutframe'].transform('max')

df['Def_cutframe'] = np.where(df['a_COpp_pct_change'] == df['a_COpp_pct_change_max'] ,df['frameId'],0)
df['Def_cutframe'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['Def_cutframe'].transform('max')

df = df.replace([np.inf, -np.inf], 0)


df['EventOrder'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['frameId'].rank(ascending=True, method='dense').astype(int)

df = df.query('EventOrder <= 25')

df['a_diff'] =  df['a'] - df['a_COpp']
df['dx_diff'] =  df['dx'] - df['dx_COpp']
df['a_pct_change_diff'] =  df['a_pct_change'] - df['a_COpp_pct_change']
df['CutFrame_diff'] =  df['receiver_cutframe'] - df['Def_cutframe']

df['Route2'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['YYards'].transform('last')
df['Route3'] = np.where((df['Ballsnap_pos'] == 0) & (df['Route2'] < 0), 1,2)
df['Route3'] = np.where((df['Ballsnap_pos'] == 1) & (df['Route2'] < 0), 2, df['Route3'])

df['Route3'] = np.where((df['after_receiver_cut'] == 2), np.nan, df['Route3'])
df['Route3'] = df.groupby(['gameId','playId','nflId'])['Route3'].transform('max')

df['Route3'] = np.where(df['Route3'] == 1, "IN", "OUT")

df['Route2'] = df['Route3'] + "+" + df['route']


Corr = df.groupby(['gameId','playId','nflId','after_receiver_cut'])[['a','a_COpp']].corr().reset_index()
Corr = Corr.filter(['gameId','playId','nflId','after_receiver_cut','a'], axis=1).query('a < 1')

Corr['Code'] = Corr['gameId'].astype(str) + "-" + Corr['playId'].astype(str) + "-" + Corr['nflId'].astype(str) + "-" + Corr['after_receiver_cut'].astype(str)
Corr = Corr.set_index('Code')
Corr = Corr.loc[~Corr.index.duplicated(keep='first')]

df['Code'] = df['gameId'].astype(str) + "-" + df['playId'].astype(str) + "-" + df['nflId'].astype(str) + "-" + df['after_receiver_cut'].astype(str)
df['a_corr'] = df.Code.map(Corr['a'])

Corr = df.groupby(['gameId','playId','nflId','after_receiver_cut'])[['dx','dx_COpp']].corr().reset_index()
Corr = Corr.filter(['gameId','playId','nflId','after_receiver_cut','dx'], axis=1).query('dx < 1')

Corr['Code'] = Corr['gameId'].astype(str) + "-" + Corr['playId'].astype(str) + "-" + Corr['nflId'].astype(str) + "-" + Corr['after_receiver_cut'].astype(str)
Corr = Corr.set_index('Code')
Corr = Corr.loc[~Corr.index.duplicated(keep='first')]

df['dx_corr'] = df.Code.map(Corr['dx'])

Corr = df.groupby(['gameId','playId','nflId','after_receiver_cut'])[['dy','dy_COpp']].corr().reset_index()
Corr = Corr.filter(['gameId','playId','nflId','after_receiver_cut','dy'], axis=1).query('dy < 1')

Corr['Code'] = Corr['gameId'].astype(str) + "-" + Corr['playId'].astype(str) + "-" + Corr['nflId'].astype(str) + "-" + Corr['after_receiver_cut'].astype(str)
Corr = Corr.set_index('Code')
Corr = Corr.loc[~Corr.index.duplicated(keep='first')]

df['dy_corr'] = df.Code.map(Corr['dy'])

df['a_diff'] =  df['a'] - df['a_COpp']
df['dx_diff'] =  df['dx'] - df['dx_COpp']
df['dy_diff'] =  df['dy'] - df['dy_COpp']
df['a_pct_change_diff'] =  df['a_pct_change'] - df['a_COpp_pct_change']
df['CutFrame_diff'] =  df['receiver_cutframe'] - df['Def_cutframe']


df['Opp_Dist_COpp_max'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['Opp_Dist_COpp'].transform('max')

df['Opp_Dist_COpp_maxframe'] = np.where(df['Opp_Dist_COpp'] == df['Opp_Dist_COpp_max'] ,df['EventOrder'],0)
df['Opp_Dist_COpp_maxframe'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['Opp_Dist_COpp_maxframe'].transform('max')

df['Diff_Ori_Opp_max_1sec'] = df.where((df['EventOrder'] <= 10)).groupby(['gameId','playId','nflId','after_receiver_cut'])['Diff_WR_Opp'].transform('max')
df['Diff_Ori_Opp_mean_1sec'] = df.where((df['EventOrder'] <= 10)).groupby(['gameId','playId','nflId','after_receiver_cut'])['Diff_WR_Opp'].transform('mean')
df['Diff_Ori_Opp_var_1sec'] = df.where((df['EventOrder'] <= 10)).groupby(['gameId','playId','nflId','after_receiver_cut'])['Diff_WR_Opp'].transform('var')

df['Diff_Ori_Opp_max_1sec'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['Diff_Ori_Opp_max_1sec'].transform('max')
df['Diff_Ori_Opp_mean_1sec'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['Diff_Ori_Opp_mean_1sec'].transform('max')
df['Diff_Ori_Opp_var_1sec'] = df.groupby(['gameId','playId','nflId','after_receiver_cut'])['Diff_Ori_Opp_var_1sec'].transform('max')



df['Diff_WR_Opp_max_1sec'] = (df['Diff_Ori_Opp_max_1sec']**df.groupby(['gameId','playId','nflId','after_receiver_cut'])['Opp_Dist_COpp'].transform('mean')) / 1000
df['Diff_WR_Opp_mean_1sec'] = (df['Diff_Ori_Opp_mean_1sec']**df.groupby(['gameId','playId','nflId','after_receiver_cut'])['Opp_Dist_COpp'].transform('mean'))  / 1000
df['Diff_WR_Opp_var_1sec'] = (df['Diff_Ori_Opp_mean_1sec']**df.groupby(['gameId','playId','nflId','after_receiver_cut'])['Opp_Dist_COpp'].transform('mean')) / 1000



df = df.replace([np.inf, -np.inf], 0)

In [None]:
df['Type'] = np.where(df.groupby(['week','gameId','playId','nflId'])['after_receiver_cut'].transform("max") > 1,2,1)
Try = df.query('Type == 2 & after_receiver_cut == 2 | Type == 1 & after_receiver_cut == 1 ')

MAAE = Try.groupby(['week','gameId','playId','nflId','displayName','route','Route2','Ballsnap_pos','closestOpp_Id','position_COpp','passResult','down','yardsToGo','offensePlayResult','isDefensivePI','epa','after_receiver_cut', 'Type','OutsideWR','PressCoverage']).agg({'EventOrder':[('max','max')],
                                                'XYards':[('mean', 'mean')],
                                                'Opp_Dist_COpp':[('first', 'first'),('last', 'last'), ('mean', 'mean'), ('count', 'count'),('max','max')],
                                                'Opp_Dist_COpp_change':[('var', 'var'), ('mean', 'mean'), ('min', 'min'),('max','max')],
                                                'a_pct_change':[('max', 'max'),('var', 'var'),('mean', 'mean'),('first', 'first'),('min', 'min'),('last', 'last')],
                                                'dx_COpp':[('max', 'max'),('var', 'var'),('mean', 'mean'),('first', 'first'),('min', 'min'),('last', 'last')],
                                                'a_COpp':[('max', 'max'),('var', 'var'),('mean', 'mean'),('first', 'first'),('min', 'min'),('last', 'last')],
                                                'Diff_WR_Opp':[('max', 'max'),('var', 'var'),('mean', 'mean'),('first', 'first'),('min', 'min'),('last', 'last')],
                                                'a_diff':[('first', 'first'),('mean', 'mean'),('max','max'),('var','var'),('min', 'min'),('last', 'last')],
                                                'dx_diff':[('first', 'first'),('mean', 'mean'),('max','max'),('var','var'),('min', 'min'),('last', 'last')],
                                                'dy_diff':[('first', 'first'),('mean', 'mean'),('max','max'),('var','var'),('min', 'min'),('last', 'last')],
                                                'a_pct_change_diff':[('first', 'first'),('mean', 'mean'),('max','max'),('var','var'),('min', 'min'),('last', 'last')],
                                                'Opp_Dist_COpp_maxframe':[('mean', 'mean')],
                                                'CutFrame_diff':[('mean', 'mean')],
                                                'Diff_WR_Opp_max_1sec':[('max', 'max')],
                                                'Diff_WR_Opp_mean_1sec':[('max', 'max')],
                                                'Diff_WR_Opp_var_1sec':[('max', 'max')],
                                                }).reset_index(drop=False)
MAAE.columns = MAAE.columns.map('_'.join)

MAAE['Total'] = MAAE.groupby(['playId_','nflId_','route_'])['EventOrder_max'].transform('sum')
MAAE['Cov_pct'] = MAAE.groupby(['playId_','nflId_','route_'])['Opp_Dist_COpp_count'].transform('sum') / MAAE['Total']
MAAE['DefenderSuccess'] = np.where(MAAE['passResult_'] == "C",0,1)

Routes = ['SLANT','OUT','HITCH','GO','IN']

MAAE = MAAE.query('Opp_Dist_COpp_first < 10 & Cov_pct > .70 & EventOrder_max > 10 & isDefensivePI_ == False')
MAAE = MAAE.query('position_COpp_ == "CB" | position_COpp_ == "DB"  ')
MAAE = MAAE.query('route_.isin(@Routes)', engine='python')


MAAE['Opp_Dist_COpp_diff'] = MAAE['Opp_Dist_COpp_last'] - MAAE['Opp_Dist_COpp_first']

MAAE['XYards_mean'] = 5 * round(MAAE['XYards_mean']/5).astype(int)


MAAE['Route2_'] = np.where((MAAE['Route2_'] == "IN+GO") | (MAAE['Route2_'] == "OUT+GO"),MAAE['Route2_'], MAAE['route_'] )


MAAE = MAAE.query('XYards_mean != 0 &  XYards_mean <= 20')

MAAE['XYards_mean'] = np.where(MAAE['XYards_mean'] > 15, 15,MAAE['XYards_mean'])


MAAE['Opp_Dist_COpp_maxframe_mean'] = MAAE['Opp_Dist_COpp_maxframe_mean'] / MAAE['Opp_Dist_COpp_count']

MAAE['Route2_'] = np.where((MAAE['Route2_'] == "HITCH") | (MAAE['Route2_'] == "IN") | (MAAE['Route2_'] == "OUT"),MAAE['XYards_mean'].astype(str) + "yd+" + MAAE['Route2_'], MAAE['Route2_'] )


MAAE['route_'] = np.where((MAAE['Route2_'] == "IN+GO") | (MAAE['Route2_'] == "OUT+GO"),MAAE['Route2_'], MAAE['route_'] )

# Improving Route Detail For Analysis

Now that we know when the receivers make their cut, we can create a more in-depth view of the route by taking the cumulative sum difference of the receiver's X position from the line of scrimmage and rounding the cut yardage in increments of 5. Here are the new routes for our analysis:


**Single Cuts:**
- Slant
- 5yd + OUT
- 10yd + OUT
- 15yd + OUT
- 5yd + IN
- 10yd + IN
- 15yd + IN
- 5yd + HITCH
- 10yd + HITCH
- 15yd + HITCH

On plays where the receiver makes 2 cuts and heads up field, we can use alignment info and the cumulative sum of the Y coordinate difference from the line of scrimmage to find out which direction the receiver moved to make their first cut either IN or OUT:

**Double Moves:**
- IN + GO - This will contain Slant n' Go's (Sluggos)
- OUT + GO - This will contain Out n' up's/ Hitch n' Go's

-------------------------------------------------------------------------------------

# Model Approach

## Features:

- Outside WR - 1 = Yes / 0 = No
- Press Coverage  - 1 = Yes / 0 = No
- Separation - Mean, Max, Var, Last Frame
- Separation Percent Change- Mean, Max, Min, Var
- Defender Velocity(dx) relative to X-axis - Mean, Max, Min, Var, Last Frame
- Defender Acceleration - Mean, Max, Min, Var, Last Frame
- Defender Orientation Angle Diff from receiver location - Mean, Max, Min, Var, Last Frame
- Defender / Receiver Acceleration Difference - Mean, Max, Min, Var, Last Frame
- Defender / Receiver Velocity(dx) relative to X-axis Difference - Mean, Max, Min, Var, Last Frame
- Defender / Receiver Velocity(dy) relative to Y-axis Difference - Mean, Max, Min, Var, Last Frame
- Defender / Receiver Acceleration Percent Change Difference - Mean, Max, Min, Var, Last Frame
- Max Separation Frame
- Cut Frame Difference - Difference between Defender and receiver cut frames
- Difference between separation at start of cut and 2.5 seconds after



## Model

I will use a similar approach to my other [submission](https://www.kaggle.com/jdruzzi/quantifying-press-coverage-ability). Except this time we will use data for only 2.5 seconds after the receiver makes their first cut on single cut routes, and use their 2nd cut on double move routes. 

Instead of throwing all routes into the same model, we will loop through and generate a model for each route, append the score predictions to the corresponding play and then concatenate the dataset back together.

**Logic:** Not all routes can be evaluated the same way. For example, difference in velocity relative to the X-axis may be a non-factor when evaluating coverage on an out route, but it may be a crucial feature when evaluating a hitch/ comeback. 
Another reason we train each route individually is because this scoring metric is an inverse of completion probability model, so air yards and other factors may affect the scoring system. By modeling each route individually, this allows us to isolate just the receiver and cornerback interaction.




Using logistic regression, we will predict Defender Success where completion = 0, and incomplete/interception = 1

We can use the predicted probability of defender success, normalize it, and then multiply by 100 to create a scoring function.

Defender Grade = MinMaxScalar(P(Defender Success)) x 100

1-100 with 100 being the most ideal grade.

-------------------------

# Model Output
- Route
- Logistic Regression grid search parameters
- Model accuracy predicting Defender Success
- F1 score of the model to ensure model validity
- Avg. Top 5 Feature importance from Tree/Boosting Models

In [None]:
from sklearn.metrics import accuracy_score,f1_score
from IPython.core.display import HTML
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import MinMaxScaler

from catboost import CatBoostClassifier
from scipy.stats import randint, uniform
from sklearn.model_selection import StratifiedKFold,GridSearchCV, RandomizedSearchCV,cross_val_score
from sklearn.metrics import average_precision_score,recall_score,make_scorer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import lightgbm as lgb
import sklearn.model_selection as GridSearchCV

from sklearn.model_selection import cross_val_predict,cross_val_score
import seaborn as sns

from sklearn.metrics import mean_squared_error
from sklearn import svm
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import BaggingClassifier, VotingClassifier, RandomTreesEmbedding
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

from tqdm import tqdm
from imblearn.over_sampling import SMOTE
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from imblearn.pipeline import Pipeline
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler
from sklearn.tree import DecisionTreeClassifier

from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.linear_model import RidgeClassifier, SGDClassifier, LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.ensemble import BaggingClassifier, VotingClassifier, RandomTreesEmbedding
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier
from catboost import CatBoostClassifier
from sklearn.impute import SimpleImputer
from sklearn.naive_bayes import GaussianNB

from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.model_selection import KFold, cross_val_score
from lightgbm import LGBMClassifier


def Predictions(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42, stratify=y)
    numeric_features = X.select_dtypes(include=['int64', 'float64']).columns
    clfs1 = []

    clfs1.append(("DecisionTree",
                Pipeline([("DecisionTree", DecisionTreeClassifier())])))  

    clfs1.append(("RandomForestClassifier",
                Pipeline([("RandomForestClassifier", RandomForestClassifier(n_estimators = 1000,random_state = 123,max_depth = 9,criterion = "gini"))]))) 

 
    clfs1.append(("ExtraTreesClassifier",
                Pipeline([("ExtraTreesClassifier", ExtraTreesClassifier())])))      

    clfs1.append(("AdaBoostClassifier",
                Pipeline([("AdaBoostClassifier", AdaBoostClassifier())])))                             

    clfs1.append(("GradientBoostingClassifier",
                Pipeline([ ("GradientBoostingClassifier", GradientBoostingClassifier())]))) 

    clfs1.append(("RidgeClassifier",
                        Pipeline([("RidgeClassifier", RidgeClassifier())]))) 

    clfs1.append(("BaggingClassifier",
                        Pipeline([("RidgeClassifier", BaggingClassifier())])))

    clfs1.append(("CatboostClassifier",
                        Pipeline([("CatboostClassifier", CatBoostClassifier(logging_level='Silent'))]))) 

    clfs1.append(("LGBMClassifier",
                        Pipeline([("LGBMClassifier", LGBMClassifier())])))


    n_folds = 5
    seed = 42
    ModelName = []
    TrainAcc = []
    TestAcc = []
    TrainF1 = []
    TestF1 = []
    features = []
    for name, model  in clfs1:
        try:
                kfold = KFold(n_splits=n_folds, random_state=seed)
           #     print(name)
                ModelName.append(name)
                OG = cross_val_predict(model, X_train, y_train, cv=kfold, n_jobs=-1)
                TrainAcc.append(accuracy_score(y_train, OG))     
                model = model
                model.fit(X_train,y_train)
                YOne = model.predict(X_test)
                TestAcc.append(accuracy_score(y_test, YOne))   
                TrainF1.append(f1_score(y_train, OG))
                TestF1.append(f1_score(y_test, YOne))
                try:
                    fea_imp = pd.DataFrame({'imp': model.steps[0][1].feature_importances_, 'col': X.columns})
                    fea_imp[['imp']] = MinMaxScaler().fit_transform(fea_imp[['imp']])
                    fea_imp = fea_imp.sort_values(['imp','col'], ascending=[False,False]).reset_index()
             #       display(HTML(fea_imp.iloc[:5].to_html()))
                    features.append(fea_imp)
                except:
                        continue
        except:
                continue





    df = pd.DataFrame(list(zip(ModelName,TrainAcc,TestAcc,TrainF1,TestF1)), columns=['ModelName','TrainAcc','TestAcc','TrainF1','TestF1'])
    feats = pd.concat(features)
  #  display(HTML(df.sort_values(by=['TestF1'], ascending=False).head(20).to_html()))
    return df, feats

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RepeatedStratifiedKFold
import warnings




ROUTES = ['5yd+OUT', '15yd+HITCH', '5yd+IN', '10yd+HITCH', '15yd+OUT', '10yd+OUT', '5yd+HITCH', '15yd+IN', 'SLANT', 'OUT+GO', 'IN+GO', '10yd+IN']

dfs = []

data=[]
feat_imp = []


for route in ROUTES:
    with warnings.catch_warnings():
        warnings.simplefilter('ignore')
        Group = MAAE.loc[(MAAE['Route2_'] == route)]

        Cols = [ 'OutsideWR_', 'PressCoverage_', 'Opp_Dist_COpp_last', 'Opp_Dist_COpp_mean', 'Opp_Dist_COpp_max', 'Opp_Dist_COpp_change_var', 'Opp_Dist_COpp_change_mean', 'Opp_Dist_COpp_change_min', 'Opp_Dist_COpp_change_max','dx_COpp_max', 'dx_COpp_var', 'dx_COpp_mean', 'dx_COpp_min', 'dx_COpp_last', 'a_COpp_max', 'a_COpp_var', 'a_COpp_mean', 'a_COpp_min', 'a_COpp_last', 'Diff_WR_Opp_max', 'Diff_WR_Opp_var', 'Diff_WR_Opp_mean', 'Diff_WR_Opp_min', 'Diff_WR_Opp_last', 'a_diff_mean', 'a_diff_max', 'a_diff_var', 'a_diff_min', 'a_diff_last', 'dx_diff_mean', 'dx_diff_max', 'dx_diff_var', 'dx_diff_min', 'dx_diff_last',  'dy_diff_mean', 'dy_diff_max', 'dy_diff_var', 'dy_diff_min', 'dy_diff_last', 'a_pct_change_diff_mean', 'a_pct_change_diff_max', 'a_pct_change_diff_var', 'a_pct_change_diff_min', 'a_pct_change_diff_last', 'Opp_Dist_COpp_maxframe_mean', 'CutFrame_diff_mean', 'Opp_Dist_COpp_diff','Diff_WR_Opp_max_1sec_max', 'Diff_WR_Opp_mean_1sec_max', 'Diff_WR_Opp_var_1sec_max']    
        X = Group[Cols]
        y = Group['DefenderSuccess']

        X[Cols] = SimpleImputer().fit_transform(X[Cols])
        X[Cols] = StandardScaler().fit_transform(X[Cols])

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.30, random_state = 42, stratify=y)

        df, feats = Predictions(X,y)

        model = LogisticRegression()
        solvers = ['newton-cg', 'lbfgs', 'liblinear']
        penalty = ['l2']
        c_values = [100, 10, 1.0, 0.1, 0.01]
        # define grid search
        grid = dict(solver=solvers,penalty=penalty,C=c_values)
        cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=5, random_state=65)
        grid_search = GridSearchCV(estimator=model, param_grid=grid, n_jobs=-1, cv=cv, scoring='accuracy',error_score=0)
        grid_clf_acc = grid_search.fit(X, y)


        #Predict values based on new parameters
        y_pred_acc = grid_clf_acc.predict(X_test)
        y_pred_acctrain = grid_clf_acc.predict(X_train)
        
        feats5 = feats.groupby(['col'])['imp'].mean().reset_index().sort_values(by=['imp'], ascending=False).head(5)['col'].to_list()
        
        print('-----------------------------------------------')
        print('Route: ', route)
        print('Model Parameters: ', grid_clf_acc.best_params_)
        print('Test Accuracy Score : ' + str(accuracy_score(y_test,y_pred_acc)))
        print('Test F1 Score : ' + str(f1_score(y_test,y_pred_acc)))
        print('Top 5 Feature Importance : ', feats5)


        Group['Grade'] = grid_clf_acc.predict_proba(X)[:,1]
        Group[['Grade']] = MinMaxScaler().fit_transform(Group[['Grade']])
        Group['Grade'] = Group['Grade']*100

        feats['route'] = route
        data.append(df)
        feat_imp.append(feats)


        dfs.append(Group)

    
Final = pd.concat(dfs)

------------------------------------------

# Let's Visualize


Let's look at Stefon Diggs working Ronald Darby on this 10 yard out. I will highlight a few features to better help visualize this grading system.

![](https://media.giphy.com/media/JtCRRORLG0zzUeA1lq/giphy.gif)

In [None]:
ex = Final.query('displayName_ == "Stefon Diggs" & gameId_ == 2018100709 & playId_ == 3046 ').sort_values(by=['Grade'], ascending=True).filter(['gameId_','playId_','displayName_','route_','Route2_','epa_','Opp_Dist_COpp_first', 'Opp_Dist_COpp_last', 'dy_diff_max','Diff_WR_Opp_max', 'Grade'])
ex.columns = ['gameId','playId','Name','route','route2','epa','Initial_Separation', 'SeparationAfter', 'Velocitydy_diff_max','DefenderOrientation_Max', 'Grade']
ex

Notice how the defender maintains close distance with the receiver up until he makes his cut. We were able to show:
- The magnitude of the separation change
    - The difference from when the receiver makes his cut, to 2.5  seconds after the cut
- The max difference in sideline Velocity(dy)
    - You can see the massive 5 pt difference in velocity moving from sideline to sideline. Diggs pushes towards the sideline while Darby is still moving in the opposite direction.
- Max Defender Orientation
    - Darby gets completely turned around on the route, we can capture this by taking the mean, max, and variance of the defender's orientation relative to the receiver's position on the field.
- A final grade of 12.43 for Ronald Darby, indicating bad coverage after the receiver cut


## Where Most Metrics Fail

Metrics like **average EPA** and **average Separation** are popular when evaluating cornerback talent, but they fail to tell the full story. 

- EPA cannot account for bad coverages on errant throws, drops, and non-targeted receivers

- EPA cannot account for good coverages on plays where there's good coverage but a miraculous catch, and non-targeted receivers

- Average Separation is too vague and could potentially be skewed. The cornerback could be inches away from the receiver for the entirety of the route, only to be left in the dust in the final seconds when the receiver makes his break.

So what makes this grading system so important, is that we evaluate only the most crucial aspect of the route, and that is the cut.

----------------------------------------------------------------------------------

# Targeted Coverage Score After Cut vs. Targeted EPA

In [None]:
CBs = Finaldf1.filter(['displayName','nflId','position','Targeted','playId','epa'], axis=1).query('position == "CB" & Targeted == 1 | position == "DB" & Targeted == 1| position == "FS" & Targeted == 1')
CBs = CBs.groupby(['displayName','nflId','position','playId'])['epa'].mean().reset_index()
CBs = CBs.groupby(['displayName','nflId','position'])['epa'].mean().reset_index()
CBs = CBs.set_index('nflId')
CBs = CBs.loc[~CBs.index.duplicated(keep='first')]


In [None]:
import seaborn as sns
# for basic mathematics operation 
import numpy as np
import pandas as pd
from pandas import plotting

# for visualizations
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')

# for interactive visualizations
import plotly.offline as py
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from plotly import tools
init_notebook_mode(connected = True)
import plotly.figure_factory as ff

from sklearn.cluster import KMeans

#var1 = 'Press_Success'
var1 = 'Grade'
var2 = 'epa_'

MAAE1 = Final.groupby(['closestOpp_Id_']).agg({var1:[('mean', 'mean'),('count','count')],
                                                         var2:[('mean', 'mean')],}).reset_index(drop=False)
MAAE1.columns = MAAE1.columns.map('_'.join)

MAAE1.columns = MAAE1.columns.str.replace(var2 +'_mean' , var2)
MAAE1.columns = MAAE1.columns.str.replace(var1 + '_mean' , 'mean')
MAAE1.columns = MAAE1.columns.str.replace(var1 + '_count' , 'count')
MAAE1.columns = MAAE1.columns.str.replace('closestOpp_Id__' , 'closestOpp_Id_')

#MAAE1[['mean']] = MinMaxScaler().fit_transform(MAAE1[['mean']])
MAAE1[['mean']] = MinMaxScaler().fit_transform(MAAE1[['mean']])*100
MAAE1['displayName'] = MAAE1.closestOpp_Id_.map(CBs['displayName'])
#MAAE1['epa'] = MAAE1.closestOpp_Id_.map(CBs['epa'])
MAAE1 = MAAE1.sort_values(by=['mean'], ascending=False).query('count > 20')

print("Total players with more than 20 targeted snaps on breaking routes: ", len(MAAE1))
print("-------------------------------------------------------------------------------")
print("Top 10 Players")


Cols = ['displayName', 'mean', 'count', 'epa_']

MAAE = MAAE1[Cols]

MAAE.columns = ['Name', 'Avg. Grade', 'Cut Routes', 'Avg. EPA']


display(HTML(MAAE.head(10).to_html()))

fig, ax = plt.subplots(figsize=(15,10))
slope, intercept, r_value, pv, se = stats.linregress(MAAE1['mean'], MAAE1[var2])

print("Correlation:", r_value)

sns.regplot(MAAE1['mean'], MAAE1[var2], line_kws={'label':'$y=%3.7s*x+%3.7s$'%(slope, intercept)})
plt.xticks(fontsize=15)
plt.xlabel("Average " + var1)
plt.legend()
plt.yticks(fontsize=15)
plt.ylabel("Average " + "EPA")

There is a strong **negative correlation of -.53** between this grading system and EPA, thus confirming that there is potentially a relationship between a defender who can stay with a receiver after the cut and fewer expected points added.

# One-Cut Grade vs. Double-Move Grade

We can rank the defender among their peers for One-Cuts and Double Moves, and we will check the correlation.

In [None]:
import seaborn as sns

#var1 = 'Press_Success'
var1 = 'Grade'
var2 = 'epa_'

Final['RouteType'] = np.where((Final['route_'] != "IN+GO") & (Final['route_'] != "OUT+GO"), "One_Cut", "Double_Move")

MAAE1 = Final.groupby(['closestOpp_Id_','RouteType']).agg({var1:[('mean', 'mean'),('count','count')],
                                                         var2:[('mean', 'mean')],}).reset_index(drop=False)
MAAE1.columns = MAAE1.columns.map('_'.join)

MAAE1.columns = MAAE1.columns.str.replace(var2 +'_mean' , var2)
MAAE1.columns = MAAE1.columns.str.replace(var1 + '_mean' , 'mean')
MAAE1.columns = MAAE1.columns.str.replace(var1 + '_count' , 'count')
MAAE1.columns = MAAE1.columns.str.replace('closestOpp_Id__' , 'closestOpp_Id_')

#MAAE1[['mean']] = MinMaxScaler().fit_transform(MAAE1[['mean']])
#MAAE1[['mean']] = MinMaxScaler().fit_transform(MAAE1[['mean']])*100
MAAE1['displayName'] = MAAE1.closestOpp_Id_.map(CBs['displayName'])
#MAAE1['epa'] = MAAE1.closestOpp_Id_.map(CBs['epa'])
#MAAE1 = MAAE1.sort_values(by=['mean'], ascending=False).query('count >= 20')


MAAE1 = MAAE1.pivot_table(index=['displayName'], columns='RouteType_', values=['mean','count'],aggfunc=[np.mean], fill_value=0, margins=True).reset_index()#.sort_values(by=['All'], ascending=False)
MAAE1.columns = MAAE1.columns.map('_'.join)
MAAE1.columns = MAAE1.columns.str.replace('mean_mean_' , 'mean_')
MAAE1.columns = MAAE1.columns.str.replace('mean_count_' , 'count_')

#MAAE1['count'] = MAAE1.iloc[:,-3:-1].apply(lambda s: (s > 0).sum(), axis=1)
#MAAE1 = MAAE1.sort_values(by=['All'], ascending=False).query('count > 6')

MAAE1 = MAAE1.sort_values(by=['mean_All'], ascending=False).query('count_Double_Move >= 4 & count_One_Cut >= 15')
MAAE1['One_Cut_rank'] = MAAE1['mean_One_Cut'].rank(ascending=False)
MAAE1['DoubleMove_rank'] = MAAE1['mean_Double_Move'].rank(ascending=False)
MAAE1['Avg_Rank'] = MAAE1.iloc[:,-2:].mean(axis = 1, skipna = True)
MAAE1['WeightedMean'] = MAAE1['mean_Double_Move']*(MAAE1['count_Double_Move']/(MAAE1['count_Double_Move']+MAAE1['count_One_Cut'])) + MAAE1['mean_One_Cut']*(MAAE1['count_One_Cut']/(MAAE1['count_Double_Move']+MAAE1['count_One_Cut']))
MAAE1['RankDiffMean'] = np.abs(MAAE1['One_Cut_rank'] - MAAE1['DoubleMove_rank'])
MAAE1.drop(['count_Double_Move','count_One_Cut','count_All'], axis=1, inplace=True)

print("Total players with more than 15 targeted snaps on breaking routes & 4 or more double move routes: ", len(MAAE1))

Cols = ['displayName__','mean_One_Cut', 'One_Cut_rank',  'mean_Double_Move','DoubleMove_rank',  'WeightedMean']


MAAE = MAAE1[Cols]


MAAE.columns = ['Name','Avg. One-Cut Grade', 'One-Cut Rank',  'Avg. Double-Move Grade','Double-Move Rank','Weighted Grade']

display(HTML(MAAE.head(10).sort_values(by=['Weighted Grade'], ascending=False).to_html()))

fig, ax = plt.subplots(figsize=(15,10))
slope, intercept, r_value, pv, se = stats.linregress(MAAE1['One_Cut_rank'], MAAE1['DoubleMove_rank'])

print("Correlation:", r_value)

sns.regplot(MAAE1['One_Cut_rank'], MAAE1['DoubleMove_rank'], line_kws={'label':'$y=%3.7s*x+%3.7s$'%(slope, intercept)})
plt.xticks(fontsize=15)
plt.xlabel('One-Cut Rank')
plt.legend()
plt.yticks(fontsize=15)
plt.ylabel('Double-Move Rank')


We actually see a slight **negative correlation of -.13** between One-Cut Rank and Double-Move Rank!

### How is this possible?

Some defenders could be more lax in coverage to prevent the double move from occurring, thus giving them a bad one-cut rank and a good double-move ranking. Another group of defenders will be overly aggressive and try to jump on every one-cut route, leading them to be burned on double-moves. We can identify each group with the use of clustering.

# Clustering Analysis

In [None]:
#x = CBalign.drop(['Defender'], axis=1).values

x = MAAE1[['One_Cut_rank', 'DoubleMove_rank','RankDiffMean']].values
km = KMeans(n_clusters = 5, init = 'k-means++', max_iter = 1000, n_init = 10, random_state = 0)
km.fit(x)
labels = km.labels_
centroids = km.cluster_centers_

MAAE1['labels'] =  labels
trace1 = go.Scatter3d(surfacecolor='darkgrey',
    x= MAAE1['One_Cut_rank'],
    y= MAAE1['DoubleMove_rank'],
    z= MAAE1['WeightedMean'],
    mode='markers',text=MAAE1['displayName__'],
     marker=dict(
        color = MAAE1['labels'], 
        colorscale='Plasma',
        size= 5,
        line=dict(
            color= MAAE1['labels'],
            width= 10
        ),
        opacity=0.8
     )
)
df = [trace1]

layout = go.Layout(
    title = 'Corner Types',
    margin=dict(
        l=0,
        r=0,
        b=0,
        t=0  
    ),
    scene = dict(
            xaxis = dict(title  = 'Elite on One Cuts -->'),
            yaxis = dict(title  = '<-- Elite on Double Moves'),
            zaxis = dict(title  = 'Overall Score')
        )
)

fig = go.Figure(data = df, layout = layout)

fig.update_layout(
    autosize=False,
    width=800,
    height=500)
py.iplot(fig)

## Cluster     <span style="color:blue;font-size:36px;">          BLUE       </span> - Best All-Around

These players are very rare, they are disciplined enough to stay with the receiver on double moves, as well as, perform the best in the league when breaking on single cut routes.

In [None]:
MAAE1.query('labels == 0').filter(['displayName__','mean_One_Cut','One_Cut_rank','mean_Double_Move','DoubleMove_rank','WeightedMean']).sort_values(by=['WeightedMean'], ascending=False)

## Cluster     <span style="color:#e377c2;font-size:36px;">          PINK       </span> - Gamblers

These players will perform well on any breaking route thrown to them, however, their knack for jumping routes will pay the price when a talented receiver runs a crisp double move on them for a big play.

In [None]:
MAAE1.query('labels == 2').filter(['displayName__','mean_One_Cut','One_Cut_rank','mean_Double_Move','DoubleMove_rank','WeightedMean']).sort_values(by=['WeightedMean'], ascending=False)

## Cluster     <span style="color:#bcbd22;font-size:36px;">          YELLOW       </span> - Conservatives

These players will refuse to let up big plays against them by maintaining strict discipline on double moves, however, their conservative play allows teams to dink and dunk down the field on easy one-cut routes.

In [None]:
MAAE1.query('labels == 4').filter(['displayName__','mean_One_Cut','One_Cut_rank','mean_Double_Move','DoubleMove_rank','WeightedMean']).sort_values(by=['WeightedMean'], ascending=False)

## Cluster     <span style="color:#9467bd;font-size:36px;">          PURPLE       </span> - Consistently Average

These players should not be feared by opponents, they are middle of the pack in both categories.

In [None]:
MAAE1.query('labels == 1').filter(['displayName__','mean_One_Cut','One_Cut_rank','mean_Double_Move','DoubleMove_rank','WeightedMean']).sort_values(by=['WeightedMean'], ascending=False)

## Cluster     <span style="color:#ff7f0e;font-size:36px;">          ORANGE       </span> - Liabilities

This group has difficulty staying with a receiver on any single-cut route, as well as any double move. Teams should be actively targeting these players

In [None]:
MAAE1.query('labels == 3').filter(['displayName__','mean_One_Cut','One_Cut_rank','mean_Double_Move','DoubleMove_rank','WeightedMean']).sort_values(by=['WeightedMean'], ascending=False)

# Final Thoughts:

Using this grading system, we can evaluate who is best defending one-cut routes, as well as double moves. What is also unique about this grading system, is that it can also be used to evaluate cornerbacks that were not targeted on the play because we are only observing the receiver / cornerback interaction, with no knowledge of the pass outcome. The clustering analysis shows that some defenders have certain tendencies when defending these types of routes, NFL coaches can exploit these tendencies when creating their game plan for the week. This submission also demonstrated a way to create detailed routes by analyzing the receiver's acceleration change frame-by-frame, and the grading model can be flipped to quantify receiver route running ability for scouts and offensive coaches.


## Other Work:

1. [Shadow Cornerback + Coverage Analysis](https://www.kaggle.com/jdruzzi/shadow-cornerback-coverage-analysis)

2. [Defender Bite Velocity on Play-Action](https://www.kaggle.com/jdruzzi/defender-bite-velocity-on-play-action)

3. [Pass Coverage Classification](https://www.kaggle.com/jdruzzi/pass-coverage-classification-80-recall)

4. [Quantifying Press Coverage](https://www.kaggle.com/jdruzzi/quantifying-press-coverage-ability)

5. [Defender Tendencies: One-Cut Routes + Double Moves](https://www.kaggle.com/jdruzzi/defender-tendencies-one-cut-routes-double-moves)

## Data:

[Revised BDB Data](https://www.kaggle.com/jdruzzi/revised-bdb-data)

# Bonus Content

--------------------------------------------------------

![](https://media.giphy.com/media/wdIRaIIikhq7LHRV9a/giphy.gif)

In [None]:
ex = Final.query('displayName_ == "Allen Hurns" & gameId_ == 2018110500 & playId_ == 1918 ').sort_values(by=['Grade'], ascending=True).filter(['gameId_','playId_','route_','Route2_','epa_','Grade'])
ex.columns = ['gameId','playId','route','route2','epa', 'Grade']
ex

-----------------------------------------------------------

![](https://media.giphy.com/media/2UtOjJh635SBmLMou3/giphy.gif)

In [None]:
ex = Final.query('displayName_ == "Odell Beckham" & gameId_ == 2018100701 & playId_ == 2460 ').sort_values(by=['Grade'], ascending=True).filter(['gameId_','playId_','route_','Route2_','epa_','Grade'])
ex.columns = ['gameId','playId','route','route2','epa', 'Grade']
ex

----------------------------------------------------

![](https://media.giphy.com/media/t2o10PsQUtAUCmFeLq/giphy.gif)

In [None]:
ex = Final.query('displayName_ == "Keenan Allen" & gameId_ == 2018110408 & playId_ == 2245 ').sort_values(by=['Grade'], ascending=True).filter(['gameId_','playId_','route_','Route2_','epa_','Grade'])
ex.columns = ['gameId','playId','route','route2','epa', 'Grade']
ex

-----------------------------------------------------------

![](https://media.giphy.com/media/r48xl3h1AzQufggLMP/giphy.gif)

In [None]:
ex = Final.query('displayName_ == "Albert Wilson" & gameId_ == 2018101404 & playId_ == 2046 ').sort_values(by=['Grade'], ascending=True).filter(['gameId_','playId_','route_','Route2_','epa_','Grade'])
ex.columns = ['gameId','playId','route','route2','epa', 'Grade']
ex

------------------------------------------------------------

![](https://media.giphy.com/media/f4naKrzrdy6cNNV6SE/giphy.gif)

In [None]:
ex = Final.query('displayName_ == "Amari Cooper" & gameId_ == 2018093009 & playId_ == 5050 ').sort_values(by=['Grade'], ascending=True).filter(['gameId_','playId_','route_','Route2_','epa_','Grade'])
ex.columns = ['gameId','playId','route','route2','epa', 'Grade']
ex