# Normalized Closing Ratio : A Metric for Assessing Defensive Players' Closing Effectiveness

The goal of this paper is to respond to the proposed question suggested for the 2021 Big Data Bowl:

*“Which players are the best at closing on receivers when the ball is in the air?”*

To answer this question, I propose a novel metric I’ve dubbed the **Normalized Closing Ratio (NCR)** which describes a defender's ability to close on a ball relative to the targeted receiver. I show how this metric is closely linked to the outcome of passing plays (caught, incomplete, or interception). I then show how it can be used to evaluate defensive players’ ability to close on the ball while it is in the air. Finally, I discuss how I think this metric could be improved upon in future analysis.


In [None]:
"""
Code: This cell holds all the helper functions used by my analysis.
"""
import pandas as pd
import numpy as np

import matplotlib.pylab as plt
import matplotlib.patches as patches
import matplotlib as mpl

import seaborn as sns
from glob import glob
import plotly.express as px

from tqdm.notebook import tqdm

from scipy.spatial import distance_matrix

import os
import seaborn as sns

pd.set_option("max_columns", 500)


class NFLPlay:
    def __init__(
        self,
        gameId,
        playId,
        plays_df=None,
        players_df=None,
        games_df=None,
        tracking_df=None,
        targeted_df=None,
        compute_play_stats=True,
    ):
        """
        A class for organizing NFL play data.
        """
        self.playId = playId
        self.gameId = gameId

        self.compute_play_stats = compute_play_stats

        if plays_df is not None:
            self._get_play_details(playId, gameId, plays_df)
        if games_df is not None:
            self._get_game_details(gameId, games_df)
            self.gethomeaway()
        if tracking_df is not None:
            self._get_tracking_data(playId, gameId, tracking_df)
            self.add_tracking_isoffense()
        if targeted_df is not None:
            self.targeted_player = (
                targeted_df.loc[
                    (targeted_df["gameId"] == gameId)
                    & (targeted_df["playId"] == playId)
                ]["targetNflId"]
                .values[0]
                .astype("int")
            )
            # Add isTargeted to tracking data
            self.tracking_data["isTargeted"] = (
                self.tracking_data["nflId"].fillna(0).astype("int")
                == self.targeted_player
            )

        self.events = self.tracking_data["event"].unique().tolist()

    def __str__(self):
        return f"NFLPlay - gameId: {self.gameId} - playId: {self.playId}"

    def __repr__(self):
        return f"NFLPlay - gameId: {self.gameId} - playId: {self.playId}"

    def _get_play_details(self, playId, gameId, plays_df):
        play_details = plays_df.loc[
            (plays_df["playId"] == playId) & (plays_df["gameId"] == gameId)
        ]
        # Only one play should exist
        assert len(play_details) == 1
        self.playDetailsDict = play_details.to_dict(orient="records")[0]
        for k, v in self.playDetailsDict.items():
            setattr(self, k, v)

    def _get_game_details(self, gameId, games_df):
        game_details = games_df.loc[games_df["gameId"] == gameId]
        # Only one play should exist
        assert len(game_details) == 1
        self.gameDetailsDict = game_details.to_dict(orient="records")[0]
        for k, v in self.gameDetailsDict.items():
            setattr(self, k, v)

    def gethomeaway(self):
        if self.homeTeamAbbr == self.possessionTeam:
            self.possessionHomeAway = "home"
        else:
            self.possessionHomeAway = "away"

    def _get_tracking_data(self, playId, gameId, tracking_df):
        tracking_data = (
            tracking_df.loc[
                (tracking_df["playId"] == playId) & (tracking_df["gameId"] == gameId)
            ]
            .reset_index(drop=True)
            .copy()
        )

        tracking_data = self._format_tracking(tracking_data)
        if self.compute_play_stats:
            self._compute_play_stats(tracking_data)

        if "Football" in tracking_data["displayName"].unique():
            tracking_data["isFootball"] = tracking_data["displayName"] == "Football"

        self.tracking_data = tracking_data

    def _format_tracking(self, tracking_data):
        tracking_data["time"] = pd.to_datetime(tracking_data["time"])
        #         tracking_data['nflId'] = tracking_data['nflId'].astype('int')
        #         tracking_data['jerseyNumber'] = tracking_data['jerseyNumber'].astype('int')
        return tracking_data

    def _compute_play_stats(self, tracking_data):
        self.playLengthSeconds = (
            tracking_data["time"].max() - tracking_data["time"].min()
        ).seconds

    def get_events(self):
        return self.events

    def get_snap_positions(self):
        self.snap_positions = (
            self.tracking_data.loc[self.tracking_data["event"] == "ball_snap"]
            .reset_index(drop=True)
            .copy()
        )

    def get_event_positions(self, event):
        positions = (
            self.tracking_data.loc[self.tracking_data["event"] == event]
            .reset_index(drop=True)
            .copy()
        )
        return positions

    def get_wr_cusion(self):
        """
        For each wide receiver at the ball snap, get the closest
        defender distance.
        """
        wr_ids = self.tracking_data[
            (self.tracking_data["position"] == "WR")
            & (self.tracking_data["team"] == self.possessionHomeAway)
        ]["nflId"].unique()
        if len(wr_ids) == 0:
            return "No wide receivers in play"
        for wr_id in wr_ids:
            print(wr_id)
        return

    def add_time_relative_snap(self):
        self.tracking_data["time"] = pd.to_datetime(self.tracking_data["time"])
        self.tracking_data["snap_time"] = self.tracking_data.query(
            'event == "ball_snap"'
        )["time"].values[0]
        self.tracking_data["snap_time"] = self.tracking_data[
            "snap_time"
        ].dt.tz_localize(self.tracking_data["time"].dt.tz)

        return

    def plot_snap(self):
        self.get_snap_positions()
        fig, ax = plt.subplots()
        self.snap_positions.query('team == "home"').plot(
            x="y", y="x", kind="scatter", ax=ax, color="red"
        )
        self.snap_positions.query('team == "away"').plot(
            x="y", y="x", kind="scatter", ax=ax, color="blue"
        )
        self.snap_positions.query('team == "football"').plot(
            x="y", y="x", kind="scatter", ax=ax, color="green"
        )
        ax.set_xlim(0, 55)
        ax.axhline(self.absoluteYardlineNumber, color="blue")
        pass

    def _animated_slice(self, df):
        df["time"] = df["time"].astype("str")
        fig = px.scatter(
            df,
            x="x",
            y="y",
            color="team",
            text="position",
            animation_frame="frameId",
            animation_group="position",
            range_x=[0, 120],
            range_y=[0, 53.3],
            hover_data=[
                "displayName",
                "jerseyNumber",
                "s",
                "a",
                "dis",
                "o",
                "playDirection",
            ],
            title=self.playDescription,
        )
        fig.update_traces(textposition="top center", marker_size=10)
        fig.update_layout(
            paper_bgcolor="darkgreen", plot_bgcolor="darkgreen", font_color="white"
        )
        return fig

    def animate_play(self):
        #         time_slice = times[0:500:5]  # Change here
        df_slice = self.tracking_data
        fig = self._animated_slice(df_slice)
        fig.show()

    def _get_play_offense_team(self):
        """
        Return which team is offense out of "home" or "away"
        """
        if (
            self.gameDetailsDict["homeTeamAbbr"]
            == self.playDetailsDict["possessionTeam"]
        ):
            return "home"
        elif (
            self.gameDetailsDict["visitorTeamAbbr"]
            == self.playDetailsDict["possessionTeam"]
        ):
            return "away"

        print("Error finding offensive team!!!")

    def add_tracking_isoffense(self):
        """
        Add 'isOffense' column to the tracking data
        """
        offense_team = self._get_play_offense_team()

        self.tracking_data["isOffense"] = self.tracking_data["team"] == offense_team

    def add_play_routes(self, ax, invert_xy=False):
        """
        Add Routes to play plot
        """
        route_nflids = self.tracking_data.dropna(subset=["route"])["nflId"].unique()
        for i in route_nflids:
            route_data = self.tracking_data.query("nflId == @i")
            route_name = route_data["route"].values[0]
            if invert_xy:
                route_data.plot(
                    x="y_", y="x_", ax=ax, label=route_name, lw=4, alpha=0.7
                )
            else:
                route_data.plot(x="x", y="y", ax=ax, label=route_name, lw=4, alpha=0.7)
        return ax

    def plot_field_snap(
        self,
        marker_size=70,
        off_color="tab:orange",
        off_marker="o",
        def_color="tab:red",
        def_marker="X",
        ball_color="tab:brown",
        ball_marker="d",
        ball_marker_rot=90,
        targeted_player_color="tab:olive",
        field_color="forestgreen",
        fieldline_color="white",
        fieldline_alpha=0.8,
        show_los=True,
        los_color="c",
        show_firstdown_line=True,
        first_down_color="yellow",
        figsize=(12, 6.5),
        show_routes=False,
    ):
        """
        Plot the entire field at the moment of the snap.
        """
        fig, ax = create_football_field(
            field_color=field_color,
            linecolor=fieldline_color,
            linealpha=fieldline_alpha,
            figsize=figsize,
        )
        self.get_snap_positions()
        off_tracking = self.snap_positions.query("isOffense")
        def_tracking = self.snap_positions.query('not isOffense and team != "football"')
        ball_tracking = self.snap_positions.query('team == "football"')

        plt.scatter(
            off_tracking["x"],
            off_tracking["y"],
            zorder=10,
            s=marker_size,
            color=off_color,
            marker=off_marker,
        )
        plt.scatter(
            def_tracking["x"],
            def_tracking["y"],
            zorder=10,
            s=marker_size,
            color=def_color,
            marker=def_marker,
        )
        # Rotate ball diamond
        ball_marker = mpl.markers.MarkerStyle(ball_marker)
        ball_marker._transform.rotate_deg(ball_marker_rot)
        plt.scatter(
            ball_tracking["x"],
            ball_tracking["y"],
            zorder=10,
            s=marker_size,
            color=ball_color,
            marker=ball_marker,
        )

        if targeted_player_color is not None:
            targeted_player = self.snap_positions.query("isTargeted")
            plt.scatter(
                targeted_player["x"],
                targeted_player["y"],
                zorder=11,
                s=marker_size,
                color=targeted_player_color,
                marker=off_marker,
            )

        if show_los:
            ax.axvline(
                self.playDetailsDict["absoluteYardlineNumber"],
                color=los_color,
                alpha=0.5,
            )
        if show_firstdown_line:
            play_dir = self.tracking_data["playDirection"].values[0]
            if play_dir == "right":
                first_down_abs_line = (
                    self.playDetailsDict["absoluteYardlineNumber"]
                    + self.playDetailsDict["yardsToGo"]
                )
            elif play_dir == "left":
                first_down_abs_line = (
                    self.playDetailsDict["absoluteYardlineNumber"]
                    - self.playDetailsDict["yardsToGo"]
                )
            else:
                print("Error finding play direction")
            ax.axvline(first_down_abs_line, color=first_down_color, alpha=0.5)

        if show_routes:
            ax = self.add_play_routes(ax)
        return fig, ax

    def create_normalized_xy(self):
        if self.tracking_data["playDirection"].values[0] == "left":
            self.tracking_data["x_"] = self.tracking_data["x"] * -1
            self.tracking_data["y_"] = self.tracking_data["y"]
        else:
            self.tracking_data["x_"] = self.tracking_data["x"]
            self.tracking_data["y_"] = (self.tracking_data["y"] * -1) + 53.3

    def plot_routes(
        self,
        ax=None,
        event="ball_snap",
        marker_size=70,
        off_color="tab:orange",
        off_marker="o",
        def_color="tab:red",
        def_marker="X",
        ball_color="tab:brown",
        qb_marker="p",
        ball_marker="d",
        ball_marker_rot=0,
        targeted_player_color="yellow",
    ):
        self.create_normalized_xy()
        positions = self.get_event_positions(event)

        if ax is None:
            fig, ax = plt.subplots()
            return_fig = True
        else:
            return_fig = False

        if qb_marker is not None:
            off_tracking = positions.query("isOffense and position != 'QB'")
        else:
            off_tracking = positions.query("isOffense")
        def_tracking = positions.query('not isOffense and team != "football"')
        ball_tracking = positions.query('team == "football"')

        ax.scatter(
            off_tracking["y_"],
            off_tracking["x_"],
            zorder=10,
            s=marker_size,
            color=off_color,
            marker=off_marker,
        )
        ax.scatter(
            def_tracking["y_"],
            def_tracking["x_"],
            zorder=10,
            s=marker_size,
            color=def_color,
            marker=def_marker,
        )
        # Rotate ball diamond
        ball_marker = mpl.markers.MarkerStyle(ball_marker)
        ball_marker._transform.rotate_deg(ball_marker_rot)
        ax.scatter(
            ball_tracking["y_"],
            ball_tracking["x_"],
            zorder=10,
            s=marker_size,
            color=ball_color,
            marker=ball_marker,
        )

        if targeted_player_color is not None:
            targeted_player = positions.query("isTargeted")
            ax.scatter(
                targeted_player["y_"],
                targeted_player["x_"],
                zorder=11,
                s=marker_size,
                color=targeted_player_color,
                marker=off_marker,
            )

        if qb_marker is not None:
            qb_data = positions.query("position == 'QB'")
            ax.scatter(
                qb_data["y_"],
                qb_data["x_"],
                zorder=11,
                s=marker_size,
                color=off_color,
                marker=qb_marker,
            )

        ax = self.add_play_routes(ax, invert_xy=True)
        #     plt.axis('off')
        for x in range(-200, 200, 10):
            ax.axhline(x, color="black", lw=0.5)
        ax.axvline(0, color="black", lw=0.5)
        ax.axvline(53.3, color="black", lw=0.5)
        ax.set_facecolor("lightgreen")
        if return_fig:
            return fig, ax
        return ax

    def get_seconds_to_sack(self):
        snap_time = self.tracking_data.query('event == "ball_snap"')["time"].values[0]
        sack_time = self.tracking_data.query(
            'event == "qb_sack" or event == "qb_strip_sack"'
        )["time"].values[0]
        tts = sack_time - snap_time
        return tts.astype("timedelta64[ms]").astype("float") / 1000

    def get_closest_defender(self, nflId, event="ball_snap"):
        """
        Get the closest defender to the player at a specific event.
        """
        if nflId == "targeted":
            nflId = self.targeted_player
        event_data = self.tracking_data.query("event == @event")
        targeted_dist = distance_matrix(
            event_data.query("nflId == @nflId")[["x", "y"]].values,
            event_data.query("not isOffense")[["x", "y"]].values,
        )
        return np.min(targeted_dist)

    def add_event_distances(self, event):
        """
        Add distances of defensive players from ball and targeted receiver
        at the time of a given event.
        """
        targeted_xy_event = self.tracking_data.query(
            "event == @event & isTargeted == True"
        )[["x", "y"]].values
        ball_xy_event = self.tracking_data.query(
            'event == @event & displayName == "Football"'
        )[["x", "y"]].values
        defenders_xy_event = self.tracking_data.query(
            'event == @event & isOffense == False and displayName != "Football"'
        )[["x", "y"]].values
        off_xy_event = self.tracking_data.query(
            'event == @event & isOffense == False and displayName != "Football"'
        )[["x", "y"]].values

        self.tracking_data.loc[
            (self.tracking_data["event"] == event)
            & (~self.tracking_data["isOffense"])
            & (self.tracking_data["displayName"] != "Football"),
            f"dist_targeted_{event}",
        ] = distance_matrix(targeted_xy_event, defenders_xy_event)[0]

        self.tracking_data.loc[
            (self.tracking_data["event"] == event)
            & (~self.tracking_data["isOffense"])
            & (self.tracking_data["displayName"] != "Football"),
            f"dist_ball_{event}",
        ] = distance_matrix(ball_xy_event, defenders_xy_event)[0]

    def add_event_distances_all(self):
        for e in self.events:
            self.add_event_distances(e)

    def plot_targeted_coverage(
        self,
        ax=None,
        alpha=1,
        event="ball_snap",
        plot_event=None,
        marker_size=70,
        off_color="tab:orange",
        off_marker="o",
        def_color="tab:red",
        def_marker="X",
        ball_color="tab:brown",
        qb_marker="p",
        ball_marker="d",
        ball_marker_rot=0,
        targeted_player_color="yellow",
        lines_color="white",
        grass_color="lightgreen",
        include_qb=True,
        show_ticks=False,
    ):

        self.add_event_distances_all()

        if "pass_forward" not in self.events:
            print(f"No pass on this play, events: {self.events}")
            return

        closing_defender = (
            self.tracking_data.dropna(subset=["dist_ball_pass_arrived"])
            .sort_values("dist_ball_pass_arrived")
            .head(1)["nflId"]
            .values[0]
        )
        self.tracking_data["isClosingDefender"] = (
            self.tracking_data["nflId"] == closing_defender
        )

        positions = self.get_event_positions(event)
        positions = positions.query(
            "isFootball or isClosingDefender or isTargeted"
        ).copy()
        if self.tracking_data["playDirection"].values[0] == "left":
            self.tracking_data["x_"] = self.tracking_data["x"] * -1
            self.tracking_data["y_"] = self.tracking_data["y"]
            positions["x_"] = positions["x"] * -1
            positions["y_"] = positions["y"]
        else:
            self.tracking_data["x_"] = self.tracking_data["x"]
            self.tracking_data["y_"] = (self.tracking_data["y"] * -1) + 53.3
            positions["x_"] = positions["x"]
            positions["y_"] = (positions["y"] * -1) + 53.3

        if ax is None:
            fig, ax = plt.subplots()
            return_fig = True
        else:
            return_fig = False

        if qb_marker is not None:
            off_tracking = positions.query("isOffense and position != 'QB'")
        else:
            off_tracking = positions.query("isOffense")
        def_tracking = positions.query('not isOffense and team != "football"')
        ball_tracking = positions.query('team == "football"')

        ball_snap_frame = self.tracking_data.query('event == "ball_snap"')[
            "frameId"
        ].values[0]
        pass_forward_frame = self.tracking_data.query('event == "pass_forward"')[
            "frameId"
        ].values[0]
        pass_arrived_frame = self.tracking_data.query('event == "pass_arrived"')[
            "frameId"
        ].values[0]
        ballinair_tracking = self.tracking_data.query(
            "frameId >= @pass_forward_frame and frameId <= @pass_arrived_frame"
        ).copy()
        snap_arrived_tracking = self.tracking_data.query(
            "frameId >= @ball_snap_frame and frameId <= @pass_arrived_frame"
        ).copy()

        if plot_event is not None:
            snap_arrived_tracking = snap_arrived_tracking.query("event == @plot_event")
            ballinair_tracking = ballinair_tracking.query("event == @plot_event")
            snap_arrived_tracking.query("isClosingDefender").plot(
                x="y_",
                y="x_",
                kind="scatter",
                lw=5,
                ax=ax,
                color="red",
                label="Closing Defender",
            )
            snap_arrived_tracking.query("isTargeted").plot(
                x="y_",
                y="x_",
                kind="scatter",
                lw=5,
                ax=ax,
                color="orange",
                label="Targeted Receiver",
            )
            ballinair_tracking.query("isFootball").plot(
                x="y_",
                y="x_",
                kind="scatter",
                lw=2,
                ax=ax,
                color="grey",
                label="Football",
            )

        else:
            snap_arrived_tracking.query("isClosingDefender").plot(
                x="y_",
                y="x_",
                kind="line",
                lw=3,
                alpha=alpha,
                ax=ax,
                color="red",
                label="Closing Defender",
            )
            snap_arrived_tracking.query("isTargeted").plot(
                x="y_",
                y="x_",
                kind="line",
                lw=3,
                alpha=alpha,
                ax=ax,
                color="orange",
                label="Targeted Receiver",
            )
            ballinair_tracking.query("isFootball").plot(
                x="y_",
                y="x_",
                style="--",
                lw=2,
                alpha=alpha,
                ax=ax,
                color="grey",
                label="Football",
            )
        if include_qb:
            ballinair_tracking.query(
                'position == "QB" and event == "pass_forward"'
            ).plot(
                x="y_",
                y="x_",
                kind="scatter",
                alpha=alpha,
                lw=3,
                ax=ax,
                color="blue",
                label="Quarterback",
            )

        for x in range(
            -200,
            200,
            10,
        ):
            ax.plot([0, 53.3],[x,x], color=lines_color, lw=0.5)
        ax.axvline(0, color=lines_color, lw=0.5)
        ax.axvline(53.3, color=lines_color, lw=0.5)
        ax.set_facecolor(grass_color)
        if return_fig:
            return fig, ax
        
        if show_ticks:
            hash_range = range(-200, 200)
            for x in hash_range:
                ax.plot([0.4, 0.7], [x, x], color="white", alpha=0.5)
                ax.plot([53.0, 52.5], [x, x], color="white", alpha=0.5)
                ax.plot([22.91, 23.57], [x, x], color="white", alpha=0.5)
                ax.plot([29.73, 30.39], [x, x], color="white", alpha=0.5)     
        return ax

    def get_closing_track(self):
        self.create_normalized_xy()
        self.add_event_distances_all()
        self.add_dist_ball_destination()
        if "pass_forward" not in self.events:
            print(f"No pass on this play, events: {self.events}")
            return

        closing_defender = (
            self.tracking_data.dropna(subset=["dist_ball_pass_arrived"])
            .sort_values("dist_ball_pass_arrived")
            .head(1)["nflId"]
            .values[0]
        )
        self.tracking_data["isClosingDefender"] = (
            self.tracking_data["nflId"] == closing_defender
        )
        self.tracking_data["isFootball"] = (
            self.tracking_data["displayName"] == "Football"
        )

        ball_snap_frame = self.tracking_data.query('event == "ball_snap"')[
            "frameId"
        ].values[0]
        pass_forward_frame = self.tracking_data.query('event == "pass_forward"')[
            "frameId"
        ].values[0]
        pass_arrived_frame = self.tracking_data.query('event == "pass_arrived"')[
            "frameId"
        ].values[0]
        ballinair_tracking = self.tracking_data.query(
            "frameId >= @pass_forward_frame and frameId <= @pass_arrived_frame"
        ).copy()
        snap_arrived_tracking = self.tracking_data.query(
            "frameId >= @ball_snap_frame and frameId <= @pass_arrived_frame"
        ).copy()
        df1 = snap_arrived_tracking.query("isClosingDefender")
        df2 = snap_arrived_tracking.query("isTargeted")
        df3 = ballinair_tracking.query("isFootball")
        df4 = ballinair_tracking.query('position == "QB"')
        return pd.concat([df1, df2, df3, df4])

    def get_ball_dist_downfield(self, return_dist=False):
        """
        Calculate the distance downfield
        """
        #         try:
        self.pass_dist_downfield = np.abs(
            self.tracking_data.query(
                'event == "pass_forward" and displayName == "Football"'
            )["x"].values[0]
            - self.tracking_data.query(
                'event == "pass_arrived" and displayName == "Football"'
            )["x"].values[0]
        )
        #         except Exception as e:
        #             print(e)
        #             self.pass_dist_downfield = np.nan
        if return_dist:
            return self.pass_dist_downfield

    def add_dist_ball_destination(self):
        self.tracking_data["pass_destination_x"] = self.tracking_data.query(
            'isFootball and event == "pass_arrived"'
        )["x"].values[0]
        self.tracking_data["pass_destination_y"] = self.tracking_data.query(
            'isFootball and event == "pass_arrived"'
        )["y"].values[0]

        for event in self.events:
            self.tracking_data.loc[
                self.tracking_data["event"] == event, "dist_pass_destination"
            ] = distance_matrix(
                self.tracking_data.query("event == @event")[["x", "y"]].values,
                self.tracking_data.query('isFootball and event == "pass_arrived"')[
                    ["x", "y"]
                ].values,
            )


def get_tracking_data_deep_coverage_plays(plays_df, tracking_df, targeted_df, games_df):
    deep_plays = plays_df.query("isDeepPass").reset_index(drop=True)
    dfs = []
    for i, play in tqdm(deep_plays.iterrows(), total=len(deep_plays)):
        try:
            mygameId = play["gameId"]
            myplayId = play["playId"]

            myplay = NFLPlay(
                gameId=mygameId,
                playId=myplayId,
                plays_df=plays_df,
                games_df=games_df,
                tracking_df=tracking_df,
                targeted_df=targeted_df,
                compute_play_stats=True,
            )
            df = myplay.get_closing_track()
            dfs.append(df)
        except (IndexError, ValueError) as e:
            # Some expected tracking data does not exist
            pass
    deep_coverage = pd.concat(dfs)
    return deep_coverage


def normalize_xy_deepcoverage_relativeqb(deep_coverage):
    deep_coverage["game_play"] = (
        deep_coverage["gameId"].astype("str")
        + "_"
        + deep_coverage["playId"].astype("str")
    )
    x_ref_dict = (
        deep_coverage.sort_values("frameId", ascending=True)
        .query('position == "QB"')
        .groupby("game_play")["x"]
        .first()
        .to_dict()
    )

    y_ref_dict = (
        deep_coverage.sort_values("frameId", ascending=True)
        .query('position == "QB"')
        .groupby("game_play")["y"]
        .first()
        .to_dict()
    )

    x__ref_dict = (
        deep_coverage.sort_values("frameId", ascending=True)
        .query('position == "QB"')
        .groupby("game_play")["x_"]
        .first()
        .to_dict()
    )

    y__ref_dict = (
        deep_coverage.sort_values("frameId", ascending=True)
        .query('position == "QB"')
        .groupby("game_play")["y_"]
        .first()
        .to_dict()
    )

    deep_coverage["x__anchor"] = deep_coverage["game_play"].map(x__ref_dict)
    deep_coverage["y__anchor"] = deep_coverage["game_play"].map(y__ref_dict)

    deep_coverage["x_norm"] = deep_coverage["x_"] - deep_coverage["x__anchor"]
    deep_coverage["y_norm"] = deep_coverage["y_"] - deep_coverage["y__anchor"]
    return deep_coverage


def prep_plays_df(plays_df):
    plays_df["game_play"] = (
        plays_df["gameId"].astype("str") + "_" + plays_df["playId"].astype("str")
    )

    plays_df["isDeepPass"] = (
        plays_df["playDescription"].str.lower().str.contains("deep")
    )
    # Offensive Personnel Counts
    plays_df["Ocount_RB"] = (
        plays_df["personnelO"]
        .str.extract(
            r"(?:([1-9]?[0-9])[a-zA-Z ]{0,20}(?:RB))|(?:(?:RB)[a-zA-Z ]{0,20}([1-9]?[0-9]))"
        )[0]
        .fillna(0)
        .astype("int")
    )
    plays_df["Ocount_WR"] = (
        plays_df["personnelO"]
        .str.extract(
            r"(?:([1-9]?[0-9])[a-zA-Z ]{0,20}(?:WR))|(?:(?:WR)[a-zA-Z ]{0,20}([1-9]?[0-9]))"
        )[0]
        .fillna(0)
        .astype("int")
    )
    plays_df["Ocount_TE"] = (
        plays_df["personnelO"]
        .str.extract(
            r"(?:([1-9]?[0-9])[a-zA-Z ]{0,20}(?:TE))|(?:(?:TE)[a-zA-Z ]{0,20}([1-9]?[0-9]))"
        )[0]
        .fillna(0)
        .astype("int")
    )
    plays_df["Ocount_OL"] = (
        plays_df["personnelO"]
        .str.extract(
            r"(?:([1-9]?[0-9])[a-zA-Z ]{0,20}(?:OL))|(?:(?:OL)[a-zA-Z ]{0,20}([1-9]?[0-9]))"
        )[0]
        .fillna(0)
        .astype("int")
    )

    # Defensive Personnel Counts
    plays_df["Dcount_DL"] = (
        plays_df["personnelD"]
        .str.extract(
            r"(?:([1-9]?[0-9])[a-zA-Z ]{0,20}(?:DL))|(?:(?:DL)[a-zA-Z ]{0,20}([1-9]?[0-9]))"
        )[0]
        .fillna(0)
        .astype("int")
    )
    plays_df["Dcount_LB"] = (
        plays_df["personnelD"]
        .str.extract(
            r"(?:([1-9]?[0-9])[a-zA-Z ]{0,20}(?:LB))|(?:(?:LB)[a-zA-Z ]{0,20}([1-9]?[0-9]))"
        )[0]
        .fillna(0)
        .astype("int")
    )
    plays_df["Dcount_DB"] = (
        plays_df["personnelD"]
        .str.extract(
            r"(?:([1-9]?[0-9])[a-zA-Z ]{0,20}(?:DB))|(?:(?:DB)[a-zA-Z ]{0,20}([1-9]?[0-9]))"
        )[0]
        .fillna(0)
        .astype("int")
    )

    plays_df["ScoreDifferential"] = plays_df.apply(
        lambda x: np.abs(x["preSnapVisitorScore"] - x["preSnapHomeScore"]), axis=1
    )

    return plays_df


# ----- Modeling Code ------------- #

import lightgbm as lgb
import shap


def model_deep_pass_probability(plays_df):

    FEATURES = [  #'quarter','down','yardsToGo','absoluteYardlineNumber',
        "defendersInTheBox",
        "numberOfPassRushers",
        "Ocount_RB",
        "Ocount_WR",
        "Ocount_TE",
        "Ocount_OL",
        "Dcount_DL",
        "Dcount_LB",
        "Dcount_DB",
        #'epa','ScoreDifferential'
    ]

    X = plays_df[FEATURES].fillna(0)
    y = plays_df["isDeepPass"].astype("int")

    train_ds = lgb.Dataset(data=X, label=y)
    param = {}
    param["max_bin"] = 10
    param["learning_rate"] = 0.01
    param["boosting_type"] = "gbdt"
    param["objective"] = "binary"
    param["metric"] = "auc"
    param["sub_feature"] = 0.50
    param["bagging_fraction"] = 0.85
    param["bagging_freq"] = 40
    param["num_leaves"] = 512
    param["min_data"] = 50
    param["min_hessian"] = 0.05
    param["verbose"] = 0

    result = lgb.cv(param, train_ds, early_stopping_rounds=50, nfold=5)

    best_boosting_rounds = len(result["auc-mean"])

    bst = lgb.train(param, train_ds, num_boost_round=best_boosting_rounds)

    feature_importance_df = pd.DataFrame(
        index=FEATURES, data=bst.feature_importance(), columns=["importance"]
    )

    explainer = shap.TreeExplainer(bst)
    shap_values = explainer.shap_values(X)

    return X, explainer, shap_values


def plot_play_defender(
    ax,
    gameId,
    playId,
    plays_df,
    games_df,
    tracking_df,
    targeted_df,
    show_ddrt=False,
    show_ddra=False,
    show_dddt=False,
    show_ddda=False,
    show_tddt=False,
    show_tdda=False,
    yaxis_pad=(-10, 45),
    ddrt_pad=(0, 0),
    ddra_pad=(0, 0),
    dddt_pad=(0, 0),
    ddda_pad=(0, 0),
    tddt_pad=(0, 0),
    tdda_pad=(0, 0),
    add_description=False,
    description_cutoff=99999,
    markersize=50,
    title=None,
    title_fs=15,
    marker_alpha=0.5,
    show_ticks=True,
):
    myplay = NFLPlay(
        gameId=gameId,
        playId=playId,
        plays_df=plays_df,
        games_df=games_df,
        tracking_df=tracking_df,
        targeted_df=targeted_df,
        compute_play_stats=True,
    )

    ax = myplay.plot_targeted_coverage(
        ax=ax, plot_event="pass_forward", include_qb=True
    )

    ax = myplay.plot_targeted_coverage(
        ax=ax, plot_event="pass_arrived", include_qb=False
    )

    ax = myplay.plot_targeted_coverage(ax=ax, alpha=0.3, include_qb=False)

    myplay.tracking_data.query('event == "ball_snap" and isOffense').plot(
        x="y_",
        y="x_",
        kind="scatter",
        ax=ax,
        alpha=marker_alpha,
        marker="o",
        s=markersize,
        color="grey",
    )
    myplay.tracking_data.query('event == "ball_snap" and not isOffense').plot(
        x="y_",
        y="x_",
        kind="scatter",
        ax=ax,
        alpha=marker_alpha,
        marker="x",
        s=markersize,
        color="grey",
    )

    ax.set_facecolor("lightgreen")
    ax = format_ax(ax)
    ax.set_title(title, fontsize=title_fs)
    ax.legend([])
    # ax.set_ylim(ax.get_ylim()[0] + 5, ax.get_ylim()[0]+70)

    if np.isnan(myplay.absoluteYardlineNumber):
        ax.set_ylim(
            yaxis_pad[0],
            yaxis_pad[1],
        )
    elif myplay.tracking_data["x_"].mean() < 0:
        ax.set_ylim(
            -myplay.absoluteYardlineNumber + yaxis_pad[0],
            -myplay.absoluteYardlineNumber + yaxis_pad[1],
        )
    else:
        ax.set_ylim(
            myplay.absoluteYardlineNumber + yaxis_pad[0],
            myplay.absoluteYardlineNumber + yaxis_pad[1],
        )

    myplay.add_dist_ball_destination()

    cda = (
        myplay.get_closing_track()
        .query('isClosingDefender and event == "pass_arrived"')[["y_", "x_"]]
        .values[0]
    )
    cdt = (
        myplay.get_closing_track()
        .query('isClosingDefender and event == "pass_forward"')[["y_", "x_"]]
        .values[0]
    )
    tpa = (
        myplay.get_closing_track()
        .query('isTargeted and event == "pass_arrived"')[["y_", "x_"]]
        .values[0]
    )
    tpt = (
        myplay.get_closing_track()
        .query('isTargeted and event == "pass_forward"')[["y_", "x_"]]
        .values[0]
    )
    fbdest = (
        myplay.get_closing_track()
        .query('isFootball and event == "pass_arrived"')[["y_", "x_"]]
        .values[0]
    )

    if show_dddt:
        # Draw DDD_throw
        dddt = myplay.tracking_data.query(
            'event == "pass_forward" and isClosingDefender'
        )["dist_pass_destination"].values[0]
        ax.annotate(
            f"DDD_t = {dddt:0.2f} yards",
            (fbdest[0], fbdest[1]),
            xytext=(fbdest[0] + dddt_pad[0], fbdest[1] + dddt_pad[1]),
            fontsize=12,
            arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=0.2", fc="b"),
        )
        ax.annotate(
            f"DDD_t = {dddt:0.2f} yards",
            (cdt[0], cdt[1]),
            xytext=(fbdest[0] + dddt_pad[0], fbdest[1] + dddt_pad[1]),
            fontsize=12,
            arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=0.2", fc="b"),
        )

    if show_ddda:
        # Draw Distance to Destination when ball arrived
        ddda = myplay.tracking_data.query(
            'event == "pass_arrived" and isClosingDefender'
        )["dist_pass_destination"].values[0]
        ax.annotate(
            f"DDD_a = {ddda:0.2f} yards",
            (fbdest[0], fbdest[1]),
            xytext=(fbdest[0] + ddda_pad[0], fbdest[1] + ddda_pad[1]),
            fontsize=12,
            arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=0.2", fc="b"),
        )
        ax.annotate(
            f"DDD_a = {ddda:0.2f} yards",
            (cda[0], cda[1]),
            xytext=(fbdest[0] + ddda_pad[0], fbdest[1] + ddda_pad[1]),
            fontsize=12,
            arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=0.2", fc="b"),
        )

    if show_ddra:
        # Distance to Destination Arrived
        ddra = myplay.tracking_data.query(
            'event == "pass_arrived" and isClosingDefender'
        )["dist_targeted_pass_arrived"].values[0]
        ax.annotate(
            f"DR_a = {ddra:0.2f} yards",
            (cda[0], cda[1]),
            xytext=(cda[0] + ddra_pad[0], cda[1] + ddra_pad[1]),
            fontsize=12,
            arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=0.2", fc="b"),
        )
        ax.annotate(
            f"DR_a = {ddra:0.2f} yards",
            (tpa[0], tpa[1]),
            xytext=(cda[0] + ddra_pad[0], cda[1] + ddra_pad[1]),
            fontsize=12,
            arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=0.2", fc="b"),
        )

    if show_ddrt:
        # Distance to Receiver at time of Throw
        ddrt = myplay.tracking_data.query(
            'event == "pass_forward" and isClosingDefender'
        )["dist_targeted_pass_forward"].values[0]
        ax.annotate(
            f"DR_t = {ddrt:0.2f} yards",
            (cdt[0], cdt[1]),
            xytext=(cdt[0] + ddrt_pad[0], cdt[1] + ddrt_pad[1]),
            fontsize=12,
            arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=0.2", fc="b"),
        )
        ax.annotate(
            f"DR_t = {ddrt:0.2f} yards",
            (tpt[0], tpt[1]),
            xytext=(cdt[0] + ddrt_pad[0], cdt[1] + ddrt_pad[1]),
            fontsize=12,
            arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=0.2", fc="b"),
        )

    if show_tddt:
        # Targeted Distance to Destination at Thow
        tddt = myplay.tracking_data.query('event == "pass_forward" and isTargeted')[
            "dist_pass_destination"
        ].values[0]
        ax.annotate(
            f"TDD_t = {tddt:0.2f} yards",
            (fbdest[0], fbdest[1]),
            xytext=(cdt[0] + tddt_pad[0], cdt[1] + tddt_pad[1]),
            fontsize=12,
            arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=0.2", fc="b"),
        )
        ax.annotate(
            f"TDD_t = {tddt:0.2f} yards",
            (tpt[0], tpt[1]),
            xytext=(cdt[0] + tddt_pad[0], cdt[1] + tddt_pad[1]),
            fontsize=12,
            arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=0.2", fc="b"),
        )

    if show_tdda:
        # Targeted Distance to Destination at Arrival
        tdda = myplay.tracking_data.query('event == "pass_arrived" and isTargeted')[
            "dist_pass_destination"
        ].values[0]
        ax.annotate(
            f"TDD_a = {tdda:0.2f} yards",
            (fbdest[0], fbdest[1]),
            xytext=(tpa[0] + tdda_pad[0], tpa[1] + tdda_pad[1]),
            fontsize=12,
            arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=0.2", fc="b"),
        )
        ax.annotate(
            f"TDD_a = {tdda:0.2f} yards",
            (tpa[0], tpa[1]),
            xytext=(tpa[0] + tdda_pad[0], tpa[1] + tdda_pad[1]),
            fontsize=12,
            arrowprops=dict(arrowstyle="-|>", connectionstyle="arc3,rad=0.2", fc="b"),
        )

    if add_description:
        plt.figtext(
            0.5,
            0.07,
            myplay.playDescription[:description_cutoff],
            ha="center",
            fontsize=8,
            bbox={"facecolor": "grey", "alpha": 0.5, "pad": 5},
        )

    if show_ticks:
        hash_range = range(-200, 200)
        for x in hash_range:
            ax.plot([0.4, 0.7], [x, x], color='white', alpha=0.5)
            ax.plot([53.0, 52.5], [x, x],  color='white', alpha=0.5)
            ax.plot([22.91, 23.57], [x, x],  color='white', alpha=0.5)
            ax.plot([29.73, 30.39], [x, x], color='white', alpha=0.5)

    return ax


def format_ax(ax):
    ax.set_xlabel("")
    ax.set_ylabel("")
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    ax.tick_params(
        axis="x",  # changes apply to the x-axis
        which="both",  # both major and minor ticks are affected
        bottom=False,  # ticks along the bottom edge are off
        top=False,  # ticks along the top edge are off
        labelbottom=False,
    )  # labels along the bottom edge are off
    ax.tick_params(
        axis="y",  # changes apply to the x-axis
        which="both",
        left=False,  # both major and minor ticks are affected
        bottom=False,  # ticks along the bottom edge are off
        top=False,  # ticks along the top edge are off
        labelleft=False,
    )  # labels along the bottom edge are off
    return ax


def calc_coverage_stats(deep_coverage, plays_df, players_df):
    play_coverage_stats = (
        deep_coverage[["gameId", "playId"]].drop_duplicates().reset_index(drop=True)
    )

    # groupby/fist is to deal with plays that have multiple pass forwards
    df_temp = (
        deep_coverage.query('event == "pass_forward"')[["gameId", "playId", "time"]]
        .drop_duplicates()
        .sort_values("time")
        .groupby(["gameId", "playId"])
        .first()
        .reset_index()
        .rename(columns={"time": "time_at_pass_forward"})
    )
    play_coverage_stats = play_coverage_stats.merge(
        df_temp, on=["gameId", "playId"], how="left", validate="1:1"
    )
    df_temp = (
        deep_coverage.query('event == "pass_arrived"')[["gameId", "playId", "time"]]
        .drop_duplicates()
        .rename(columns={"time": "time_at_pass_arrived"})
    )
    play_coverage_stats = play_coverage_stats.merge(
        df_temp, on=["gameId", "playId"], how="left", validate="1:1"
    )
    play_coverage_stats["seconds_ball_in_air"] = (
        play_coverage_stats["time_at_pass_arrived"]
        - play_coverage_stats["time_at_pass_forward"]
    ).dt.total_seconds()

    df_temp = (
        deep_coverage.query('event == "pass_forward" and isClosingDefender')[
            ["gameId", "playId", "dist_pass_destination"]
        ]
        .groupby(["gameId", "playId"])
        .first()
        .reset_index()
        .rename(
            columns={
                "dist_pass_destination": "ClosingDefender_dist_PassDestination_at_PassForward"
            }
        )
    )
    play_coverage_stats = play_coverage_stats.merge(
        df_temp, on=["gameId", "playId"], how="left", validate="1:1"
    )
    df_temp = (
        deep_coverage.query('event == "pass_forward" and isTargeted')[
            ["gameId", "playId", "dist_pass_destination"]
        ]
        .drop_duplicates()
        .groupby(["gameId", "playId"])
        .first()
        .reset_index()
        .rename(
            columns={
                "dist_pass_destination": "Targeted_dist_PassDestination_at_PassForward"
            }
        )
    )
    play_coverage_stats = play_coverage_stats.merge(
        df_temp, on=["gameId", "playId"], how="left", validate="1:1"
    )

    df_temp = (
        deep_coverage.query('event == "pass_arrived" and isClosingDefender')[
            ["gameId", "playId", "dist_pass_destination"]
        ]
        .groupby(["gameId", "playId"])
        .first()
        .reset_index()
        .rename(
            columns={
                "dist_pass_destination": "ClosingDefender_dist_PassDestination_at_PassArrived"
            }
        )
    )
    play_coverage_stats = play_coverage_stats.merge(
        df_temp, on=["gameId", "playId"], how="left", validate="1:1"
    )
    df_temp = (
        deep_coverage.query('event == "pass_arrived" and isTargeted')[
            ["gameId", "playId", "dist_pass_destination"]
        ]
        .drop_duplicates()
        .rename(
            columns={
                "dist_pass_destination": "Targeted_dist_PassDestination_at_PassArrived"
            }
        )
    )
    play_coverage_stats = play_coverage_stats.merge(
        df_temp, on=["gameId", "playId"], how="left", validate="1:1"
    )

    df_temp = (
        deep_coverage.query('event == "pass_forward" and isClosingDefender')[
            ["gameId", "playId", "dist_targeted_pass_forward"]
        ]
        .groupby(["gameId", "playId"])
        .first()
        .reset_index()
        .rename(
            columns={
                "dist_targeted_pass_forward": "ClosingDefender_dist_Targeted_at_PassForward"
            }
        )
    )
    play_coverage_stats = play_coverage_stats.merge(
        df_temp, on=["gameId", "playId"], how="left", validate="1:1"
    )
    df_temp = (
        deep_coverage.query('event == "pass_arrived" and isClosingDefender')[
            ["gameId", "playId", "dist_targeted_pass_arrived"]
        ]
        .groupby(["gameId", "playId"])
        .first()
        .reset_index()
        .rename(
            columns={
                "dist_targeted_pass_arrived": "ClosingDefender_dist_Targeted_at_PassArrived"
            }
        )
    )
    play_coverage_stats = play_coverage_stats.merge(
        df_temp, on=["gameId", "playId"], how="left", validate="1:1"
    )
    play_coverage_stats = play_coverage_stats.merge(
        plays_df[
            [
                "gameId",
                "playId",
                "playResult",
                "passResult",
                "isDefensivePI",
                "playDescription",
            ]
        ],
        how="left",
        on=["gameId", "playId"],
        validate="1:1",
    )
    play_coverage_stats["seconds_ball_in_air"] = play_coverage_stats[
        "seconds_ball_in_air"
    ].clip(0.01, np.inf)

    # NFL IDs

    df_temp = (
        deep_coverage.query("isClosingDefender")[["gameId", "playId", "nflId"]]
        .drop_duplicates()
        .rename(columns={"nflId": "ClosingDefender_nflId"})
    )
    play_coverage_stats = play_coverage_stats.merge(
        df_temp, on=["gameId", "playId"], how="left", validate="1:1"
    )

    df_temp = (
        deep_coverage.query("isTargeted")[["gameId", "playId", "nflId"]]
        .drop_duplicates()
        .rename(columns={"nflId": "Targeted_nflId"})
    )
    play_coverage_stats = play_coverage_stats.merge(
        df_temp, on=["gameId", "playId"], how="left", validate="1:1"
    )

    df_temp = (
        deep_coverage.query("isTargeted")[["gameId", "playId", "route"]]
        .drop_duplicates()
        .rename(columns={"route": "route"})
    )
    play_coverage_stats = play_coverage_stats.merge(
        df_temp, on=["gameId", "playId"], how="left", validate="1:1"
    )

    
    play_coverage_stats = play_coverage_stats.merge(
        players_df[["nflId", "position", "displayName"]].rename(
            columns={
                "position": "ClosingDefender_position",
                "displayName": "ClosingDefenderName",
                "nflId": "ClosingDefender_nflId",
            }
        ),
        on=["ClosingDefender_nflId"],
    )

    play_coverage_stats = play_coverage_stats.merge(
        players_df[["nflId", "position", "displayName"]].rename(
            columns={
                "position": "Targeted_position",
                "displayName": "TargetedName",
                "nflId": "Targeted_nflId",
            }
        ),
        on=["Targeted_nflId"],
    )

    # Custom Metrics
    play_coverage_stats["closing_score"] = (
        (
            play_coverage_stats["ClosingDefender_dist_PassDestination_at_PassForward"]
            - play_coverage_stats["ClosingDefender_dist_PassDestination_at_PassArrived"]
        )
        / play_coverage_stats["seconds_ball_in_air"]
    ) * 10

    play_coverage_stats.loc[
        play_coverage_stats["closing_score"] < 40, "ClosingScoreGroup"
    ] = "Poor"
    play_coverage_stats.loc[
        (play_coverage_stats["closing_score"] >= 40)
        & (play_coverage_stats["closing_score"] < 60),
        "ClosingScoreGroup",
    ] = "Fair"
    play_coverage_stats.loc[
        (play_coverage_stats["closing_score"] >= 60)
        & (play_coverage_stats["closing_score"] < 80),
        "ClosingScoreGroup",
    ] = "Good"
    play_coverage_stats.loc[
        (play_coverage_stats["closing_score"] >= 80)
        & (play_coverage_stats["closing_score"] < 120),
        "ClosingScoreGroup",
    ] = "Excellent"

    play_coverage_stats["blanket_score"] = (
        play_coverage_stats["ClosingDefender_dist_PassDestination_at_PassArrived"]
        * play_coverage_stats["seconds_ball_in_air"]
    ) / play_coverage_stats[
        [
            "ClosingDefender_dist_Targeted_at_PassForward",
            "ClosingDefender_dist_Targeted_at_PassArrived",
        ]
    ].mean(
        axis=1
    )  #  / (play_coverage_stats['ClosingDefender_dist_PassDestination_at_PassArrived'] - play_coverage_stats['Targeted_dist_PassDestination_at_PassArrived'])) #.clip(-20, 20) # - play_coverage_stats['ClosingDefender_dist_Targeted_at_PassArrived'] - play_coverage_stats['Targeted_dist_PassDestination_at_PassArrived'] + 100) * 0.8

    play_coverage_stats["ThrowRatio"] = (
        play_coverage_stats["ClosingDefender_dist_PassDestination_at_PassForward"]
        / play_coverage_stats["Targeted_dist_PassDestination_at_PassForward"]
    )
    play_coverage_stats["ArrivalRatio"] = (
        play_coverage_stats["ClosingDefender_dist_PassDestination_at_PassArrived"]
        / play_coverage_stats["Targeted_dist_PassDestination_at_PassArrived"]
    )
    play_coverage_stats["ClosingRatio"] = (
        play_coverage_stats["ArrivalRatio"] / play_coverage_stats["ThrowRatio"]
    )
    play_coverage_stats["ClosingRatioBin"] = pd.qcut(
        play_coverage_stats["ClosingRatio"],
        4,
        labels=["Low", "Mid-Low", "Mid-High", "High"],
    )
    play_coverage_stats["logClosingRatio"] = np.log(play_coverage_stats["ClosingRatio"])
    play_coverage_stats["normClosingRatio"] = (
        (1 / (play_coverage_stats["logClosingRatio"] + 9)) * 700
    ).clip(0, 100)

    play_coverage_stats["normClosingRatioBin"] = pd.qcut(
        play_coverage_stats["normClosingRatio"],
        4,
        labels=["Low", "Mid-Low", "Mid-High", "High"],
    )
    
    

    return play_coverage_stats

def add_play_targeted_closing(deep_coverage):
    targeted_ids = (
        deep_coverage.query("isTargeted")[["game_play", "nflId"]]
        .drop_duplicates()
        .set_index("game_play")
        .to_dict()["nflId"]
    )
    closing_ids = (
        deep_coverage.query("isClosingDefender")[["game_play", "nflId"]]
        .drop_duplicates()
        .set_index("game_play")
        .to_dict()["nflId"]
    )
    deep_coverage["targeted_nflId"] = (
        deep_coverage["game_play"].map(targeted_ids).astype("int")
    )
    deep_coverage["closing_nflId"] = (
        deep_coverage["game_play"].map(closing_ids).astype("int")
    )
    return deep_coverage

if __name__ == "__main__":
    pass


In [None]:
"""
Data Loading and Preperation
"""

if os.path.exists("../input/nfl-big-data-bowl-2021-pp/games.parquet"):
    games_df = pd.read_parquet("../input/nfl-big-data-bowl-2021-pp/games.parquet")
    players_df = pd.read_parquet("../input/nfl-big-data-bowl-2021-pp/players.parquet")
    plays_df = pd.read_parquet("../input/nfl-big-data-bowl-2021-pp/plays.parquet")
    tracking_df = pd.read_parquet("../input/nfl-big-data-bowl-2021-pp/tracking.parquet")
    targeted_df = pd.read_parquet("../input/nfl-big-data-bowl-2021-pp/targeted.parquet")
    coverage_df = pd.read_parquet("../input/nfl-big-data-bowl-2021-pp/coverage.parquet")

elif os.path.exists("games.parquet"):
    games_df = pd.read_parquet("games.parquet")
    players_df = pd.read_parquet("players.parquet")
    plays_df = pd.read_parquet("plays.parquet")
    tracking_df = pd.read_parquet("tracking.parquet")
    targeted_df = pd.read_parquet("targeted.parquet")
    coverage_df = pd.read_parquet("coverage.parquet")
else:
    games_df = pd.read_csv("../input/nfl-big-data-bowl-2021/games.csv")
    players_df = pd.read_csv("../input/nfl-big-data-bowl-2021/players.csv")
    plays_df = pd.read_csv("../input/nfl-big-data-bowl-2021/plays.csv")
    tracking_csvs = glob("../input/nfl-big-data-bowl-2021/week*.csv")
    tracking_df = pd.concat([pd.read_csv(f) for f in tracking_csvs])
    targeted_df = pd.read_csv(
        "../input/nfl-big-data-bowl-2021-bonus/targetedReceiver.csv"
    )
    coverage_df = pd.read_csv(
        "../input/nfl-big-data-bowl-2021-bonus/coverages_week1.csv"
    )
    plays_df = prep_plays_df(plays_df)

games_df.to_parquet("games.parquet")
players_df.to_parquet("players.parquet")
plays_df.to_parquet("plays.parquet")
tracking_df.to_parquet("tracking.parquet")
targeted_df.to_parquet("targeted.parquet")
coverage_df.to_parquet("coverage.parquet")


# Closing Defender vs Targeted Receiver

To evaluate defensive players' ability to close on receivers, I first identify which defensive player is most responsible for closing in each play. Defenders have various responsibilities at the beginning of a play, but after the ball is thrown, the defensive team becomes focused on attacking the location of the ball.

Using the provided NGC player tracking data, I am able to determine the defender that is closest to the targeted receiver at the moment the ball reaches its destination. I call this defender the **Closing Defender**. All other defensive players are ignored for that play because I want to focus on the defender/receiver relationship.

Below is a brief description of the terminology I use for this analysis:

Players:

- **Targeted Receiver**: The quarterback’s intended receiver of the throw. 
- **Closing Defender**: The defensive player closest to the Targeted Receiver at the moment the ball arrives.

Play Events:

- **pass_forward** : The moment within the play when the quarterback released the ball (I've also referred to this as the “throw” moment).
- **pass_arrived**: The moment when the ball arrives at its final location. In the case of a completed pass, this is the location of the receiver, in the case of an interception, this would be the location of the defensive player. For incomplete passes, this could be anywhere on the field.


## Visualizing the Closing Defender

In the previous section, I coined the term “Closing Defender”. Before proceeding I want to validate that these players are in fact ones that follow or close upon the targeted receivers.

For purposes of plotting the data, I created normalized X and Y position features such that all offenses are moving in the same direction and all players movements are relative to the quarterback's location at the time of the throw. This allows me to easily plot different routes on top of each other and see if patterns emerge with the movement patterns of defenders. Each plot below displays a different targeted receiver's route. Note that these plots only include deep passes (passes attempted for >15 yards). One thing that becomes apparent is that the closing defender do, in fact, generally seem to follow very similar paths to their receivers. This is good confirmation that I have determined my "Closing Defender" term appropriately.

In [None]:
plt.style.use("default")

deep_coverage = pd.read_parquet(
    "../input/nfl-big-data-bowl-2021-pp/deep_coverage_tracking.parquet"
)
deep_coverage["game_play"] = (
    deep_coverage["gameId"].astype("str") + "_" + deep_coverage["playId"].astype("str")
)
route_dict = (
    deep_coverage.dropna(subset=["route"])
    .groupby("game_play")["route"]
    .first()
    .to_dict()
)
deep_coverage["targeted_route"] = deep_coverage["game_play"].map(route_dict)
deep_coverage = normalize_xy_deepcoverage_relativeqb(deep_coverage)
fig, axs = plt.subplots(3, 2, figsize=(15, 15))
axs = axs.flatten()
idx = 0

for route in deep_coverage["targeted_route"].value_counts().index[:6]:
    # for route, d in deep_coverage.groupby("targeted_route"):
    d = deep_coverage.query("targeted_route == @route")
    dc_gb = d.groupby("game_play")
    for i, d in dc_gb:
        d.query("isTargeted").plot(
            x="y_norm",
            y="x_norm",
            style="-",
            alpha=0.1,
            ax=axs[idx],
            label=None,
            color="white",
        )
        d.query("isClosingDefender").plot(
            x="y_norm",
            y="x_norm",
            style="-",
            alpha=0.1,
            ax=axs[idx],
            label=None,
            color="yellow",
        )
        axs[idx].get_legend().remove()

    axs[idx].set_title(route.title() + " Routes", fontsize=12)
    axs[idx].set_facecolor("black")
    axs[idx].grid(color="white", alpha=0.1)
    axs[idx].set_ylim(-5, 60)
    axs[idx].set_xlim(-30, 30)

    axs[idx].set_xlabel("")
    axs[idx].set_ylabel("")
    axs[idx].set_yticklabels([])
    axs[idx].set_xticklabels([])
    axs[idx].tick_params(
        axis="x",  # changes apply to the x-axis
        which="both",  # both major and minor ticks are affected
        bottom=False,  # ticks along the bottom edge are off
        top=False,  # ticks along the top edge are off
        labelbottom=False,
    )  # labels along the bottom edge are off

    leg = axs[idx].legend(
        ["Receiver", "Defender"], facecolor="black", loc="upper right"
    )
    for lh in leg.legendHandles:
        lh.set_alpha(1)
    for text in leg.get_texts():
        text.set_color("white")
    idx += 1
fig.suptitle("Closing Defender Paths by Route Type", y=0.93, fontsize=16)

plt.show()

# Defensive Closing Patterns

To better understand the movements that players make when defending the pass, I've provided three examples below. In these plots, I have isolated the players paths while the ball is in the air. In the first example, the receiver is running an Out Route. The Closing Defender is quick to cover and the pass ends up incomplete. The second example shows a Go Route, where the receiver creates enough separation to catch the pass and ultimately haul in the ball for a touchdown. The third example is another Go route, but, in this case, the Closing Defender tracks the ball while it is in the air, cutting off the receiver for an interception.

The question is: "How do we quantify if a defender is doing a good job at closing in on the ball?" I could simply look at the player's average speed while the ball is in the air, but that doesn't account for the player's direction, and if they are actually moving in the best direction. I also considered looking at the change in distance of the Closing Defender to the targeted receiver at different moments in the play, but distance to the receiver does not indicate the player is necessarily closing on the ball. I've concluded, after many different iterations, that the metric must account for both where the ball is thrown by both the closing defender and targeted receiver. My solution incorporates not only the defenders location at the moment the ball is thrown and arrives, but also the ratio distances between players. I'll discuss this metric in more detail in the next section and go into more detail regarding why I think this is a strong metric.


In [None]:
plt.style.use("default")

# Example Plays
example_plays = {
    0: {"mygameId": 2018102107, "myplayId": 1007},
    1: {"mygameId": 2018123004, "myplayId": 2619},
    2: {"mygameId": 2018111809, "myplayId": 845},
}
fig, axs = plt.subplots(3, 1, figsize=(8, 15))

for i, example in example_plays.items():
    myplay = NFLPlay(
        gameId=example["mygameId"],
        playId=example["myplayId"],
        plays_df=plays_df,
        games_df=games_df,
        tracking_df=tracking_df,
        targeted_df=targeted_df,
        compute_play_stats=True,
    )

    axs[i] = myplay.plot_targeted_coverage(ax=axs[i], show_ticks=True)

    myplay.tracking_data.query('event == "ball_snap" and isOffense').plot(
        x="y_",
        y="x_",
        kind="scatter",
        ax=axs[i],
        alpha=0.5,
        color="grey",
        s=100,
    )
    myplay.tracking_data.query('event == "ball_snap" and not isOffense').plot(
        x="y_",
        y="x_",
        kind="scatter",
        ax=axs[i],
        alpha=0.5,
        marker="x",
        color="grey",
        s=100,
    )

    axs[i].set_facecolor("lightgreen")
    axs[i] = format_ax(axs[i])
    axs[i].set_title(myplay.playDescription, fontsize=9)
    if myplay.tracking_data["x_"].mean() < 0:
        axs[i].set_ylim(
            -myplay.absoluteYardlineNumber - 15, -myplay.absoluteYardlineNumber + 55
        )
    else:
        axs[i].set_ylim(
            myplay.absoluteYardlineNumber - 15, myplay.absoluteYardlineNumber + 55
        )

axs[0].set_title("Example 1: Incomplete Pass", fontsize=15)
axs[1].set_title("Example 2: Complete Pass", fontsize=15)
axs[2].set_title("Example 3: Intercepted", fontsize=15)

for img in range(3):
    play_desc = plays_df.loc[
        (plays_df["gameId"] == example_plays[img]["mygameId"])
        & (plays_df["playId"] == example_plays[img]["myplayId"])
    ]["playDescription"].values[0][0:90]
    axs[img].text(axs[img].get_xlim()[0]+ 1, axs[img].get_ylim()[0] + 1.5, "*" + play_desc, fontsize=8)


axs[0].legend(loc="upper left")
axs[1].legend(loc="upper left")
axs[2].legend(loc="upper left")

plt.tight_layout()
plt.show()


# The NCR Metric

The Normalized Closing Ratio is my solution for quantifying how well a defender closed on a ball while it is in the air. It can be broken down into two parts. First, I define the targeted receiver and closing defender’s individual distance to the ball's arrival location. I do this at both the moment the ball is thrown and the moment the ball reaches this location. I call these the **Throw Ratio** and **Arrival Ratio**.


$$ ThrowRatio = \dfrac{DDD_t}{TDD_t} $$


$$ ArrivalRatio = \dfrac{DDD_a}{TDD_a} $$

where: $DDD_t$ is the defenders distance to the ball's destination at the moment of throw. $TDD_t$ is the targeted receivers distance to the ball's destination at the moment of throw. $DDD_a$ is the defender's distance to the ball destination at the moment the ball arrives, and $TDD_a$ is the targeted receiver's distance to the ball when it has arrived. 

Note that, when the defender is lagging the receiver with respect to the ball’s destination, these values will be large. When the defender is closer to the ball’s destination, these values will be large. These values do not tell us anything about how the player has closed on the ball’s destination during the time it was in the air.

To account for the player's change of position while the ball is in the air, I divide the arrival ration by the throw ratio. I call this the closing ratio:

$$ ClosingRatio = \dfrac{ArrivalRatio}{ThrowRatio} $$

The only problem with my raw closing ratio, as described above, is that it is not so easy to interpret. To normalize it, I apply a log transformation and some constants. You can think of this as “grading on a curve” where an A+ would be a score of 100, and and C (average) play has a score of 75. I call this the Normalized Closing Ratio (NCR). The NCR is clipped so the maximum possible value is 100.

$$ normClosingRatio = \frac{700}{(log(ClosingRatio) + 9)} $$


**Play restrictions**: There are some unique situations that restrict which plays are appropriate for my Normalized Closing Ratio. Plays must meet the following criteria:
The ball must be in the air for at least 0.3 seconds. This removes quick shuffle passes where I don’t think this metric should apply.
At the point of arrival, both the defender and receiver must be within at least 10 yards of the ball. This excludes some passes where the quarterback severely over or underthrows their receiver. It also excludes some plays where the quarterback intentionally throws the ball away. I did **not** decide to exclude plays resulting in penalties, as I believe that those plays are still legitimate with respect to closing speed- but this could be debated.


In [None]:
example = example_plays[2]
plt.style.use("default")
fig, ax = plt.subplots(figsize=(14, 8))
ax = plot_play_defender(
    ax,
    example["mygameId"],
    example["myplayId"],
    plays_df,
    games_df,
    tracking_df,
    targeted_df,
    show_ddrt=False,
    show_ddra=False,
    show_ddda=True,
    show_dddt=True,
    show_tddt=True,
    show_tdda=True,
    ddrt_pad=(-20, 10),
    ddra_pad=(10, 10),
    ddda_pad=(-15, 5),
    dddt_pad=(-20, -10),
    tddt_pad=(10, 5),
    tdda_pad=(10, 5),
    yaxis_pad=(-15, 60),
    add_description=False,
    description_cutoff=90,
    markersize=150,
    title="Distances used in Normalized Closing Ratio",
)

play_desc = plays_df.loc[
    (plays_df["gameId"] == example["mygameId"])
    & (plays_df["playId"] == example["myplayId"])
]["playDescription"].values[0][:90]
ax.text(10, 23, "*" + play_desc)

hl = ax.get_legend_handles_labels()
plt.legend(
    hl[0][:5] + [hl[0][6]],
    [
        "Closing Defender Path",
        "Targeted Receiver Path",
        "Football Path",
        "Defender at Throw and Arrival",
        "Receiver at Throw and Arrival",
        "Quarterback at Throw",
    ],
    loc="upper left",
    bbox_to_anchor=(1, 0.95),
)
plt.tight_layout()
plt.show()

In the plot above, I’ve focused on the third example from the previous section. I’ve added all the distances used to create the NCR metric. Now, I will calculate the NCR for this play. While the math is straightforward, I would like to emphasize this metric is designed to be intuitive. Essentially, we are calculating how much the defender gained for lost ground compared to the receiver on the play.

With that in mind, I’ll calculate using the distance values:

$ DDD_a = 0.2 \quad  DDD_t = 25.86 \quad   TDD_a = 1.01 \quad  TDD_t = 28.21 $

The $ThrowRatio$ is 0.917. This value shows *at the moment that the ball is thrown* both players are roughly equadistant to the ball's final location. The $ArrivalRatio$ is 0.198 - which tells us that the defender's relative distance to the ball when it arrives is much closer than the targeted reciever.  

$$ ThrowRatio = \dfrac{DDD_t}{TDD_t} = \dfrac{25.86}{28.21} = 0.917 $$


$$ ArrivalRatio = \dfrac{DDD_a}{TDD_a} = \dfrac{0.2}{1.01} = 0.198 $$

The closing ratio is simply :

$$ ClosingRatio = \dfrac{0.198}{0.917} = 0.216 $$

Finally, after normalizing, we have a metric where a value of 0 is a horrible defensive play while 100 is a perfect closing by the defensive player. In this play, Marshawn Lattimore scores 83.98, which is well above average in terms of closing:

$$ \frac{700}{(log(ClosingRatio) + 9)} = 83.98 $$


In [None]:
def get_coverage_tracking_data_plays(plays_df, tracking_df, targeted_df, games_df):
    deep_plays = plays_df.reset_index(drop=True)
    dfs = []
    for i, play in tqdm(deep_plays.iterrows(), total=len(deep_plays)):
        try:
            mygameId = play["gameId"]
            myplayId = play["playId"]

            myplay = NFLPlay(
                gameId=mygameId,
                playId=myplayId,
                plays_df=plays_df,
                games_df=games_df,
                tracking_df=tracking_df,
                targeted_df=targeted_df,
                compute_play_stats=True,
            )
            df = myplay.get_closing_track()
            dfs.append(df)
        except (IndexError, ValueError) as e:
            # Some expected tracking data does not exist
            pass
    coverage = pd.concat(dfs)
    return coverage


if os.path.exists("../input/nfl-big-data-bowl-2021-pp/coverage_tracking.parquet"):
    coverage = pd.read_parquet(
        "../input/nfl-big-data-bowl-2021-pp/coverage_tracking.parquet"
    )
else:
    coverage = get_coverage_tracking_data_plays(
        plays_df, tracking_df, targeted_df, games_df
    )
    coverage.to_parquet("coverage_tracking.parquet")

# Calculate play stats with NCR Metric

plays_df["game_play"] = (
    plays_df["gameId"].astype("str") + "_" + plays_df["playId"].astype("str")
)
coverage["game_play"] = (
    coverage["gameId"].astype("str") + "_" + coverage["playId"].astype("str")
)


coverage["passResult"] = coverage["game_play"].map(
    plays_df.set_index("game_play")["passResult"].to_dict()
)


coverage = add_play_targeted_closing(coverage)
coverage = normalize_xy_deepcoverage_relativeqb(coverage)

play_coverage_stats = calc_coverage_stats(coverage, plays_df, players_df)

restricted_stats = play_coverage_stats.query(
    "ClosingDefender_dist_PassDestination_at_PassArrived < 10 and Targeted_dist_PassDestination_at_PassArrived < 10 and seconds_ball_in_air > 0.3"
).copy()


# Normalized Closing Ratio and Pass Result

Up until this point, I've explained the math behind my Normalized Closing Ratio. Next, I'd like to show how this metric is related to passing outcomes. The results are striking- you can clearly see a distinction in the distributions! Complete passes typically have lower NCR scores, while interceptions are, on average, much higher. Incomplete passes occur close to the middle of the distribution. This further bolsters the idea that the NCR is a good metric for determining how well a defender plays against the pass while the ball is in the air.


In [None]:
plt.style.use("fivethirtyeight")
sns.set_context("paper", font_scale=1.2)
fig, ax = plt.subplots(figsize=(12, 6))
for i, d in restricted_stats.query(
    "seconds_ball_in_air > 0.2 and Targeted_dist_PassDestination_at_PassArrived < 5"
).groupby("passResult"):
    sns.kdeplot(d["normClosingRatio"], shade=True, label=i, ax=ax)
ax.set_xlim(40, 120)
ax.set_title("Normalized Closing Ratio is Closely Related to Pass Result", fontsize=16)
ax.set_xlabel(
    "Normalized Closing Ratio \n <------ Receiver Closes Faster / Defender Closes Faster ---->"
)
ax.set_ylabel("Density")
plt.legend(["Complete", "Incomplete", "Interception"])
plt.show()

# Top Defenders by Closing Ratio

Hopefully, by now I've demonstrated that the NCR is a helpful metric for determining how well defensive players close on the ball while it is in the air. The final step of the process is to find each player's average NCR score across all the available plays. I take the average NCR score across all plays meeting the restriction requirement (>0.3 seconds of ball in the air, and both players within 10 yards of the ball when it arrives) for each player. I've also removed players that have less than 20 plays as closing defenders (I only wanted to include players with enough sample size to be meaningful).

It's interesting to note that the top players list is quite diverse- no one position dominates this list. It consists of well known and some not so well known players who are exceptional at closing in on the ball as it is in the air. Anthony Walker, who tops my NCR list, had an [article written about his impressive play in the 2018 season](https://www.sportskeeda.com/gridiron/anthony-walker-jr-has-quietly-become-a-really-good-young-linebacker) about his impressive play. After watching some of his highlights, I'm not suprised by his top performance.

**Anthony Walker Jr.'s amazing closing speed in action**
![](https://i.imgur.com/yxUvFj4.gif)


In [None]:
cr_playerstats = (
    restricted_stats.groupby(
        ["ClosingDefender_nflId", "ClosingDefender_position", "ClosingDefenderName"]
    )[
        [
            "ThrowRatio",
            "ArrivalRatio",
            "normClosingRatio",
            "ClosingRatio",
            "logClosingRatio",
            "ClosingDefender_dist_PassDestination_at_PassArrived",
            "ClosingDefender_dist_PassDestination_at_PassForward",
            "ClosingDefender_dist_Targeted_at_PassArrived",
        ]
    ]
    .mean()
    .reset_index()
)

cr_playerstats = cr_playerstats.merge(
    restricted_stats.groupby(
        ["ClosingDefender_nflId", "ClosingDefender_position", "ClosingDefenderName"]
    )[["playId"]]
    .count()
    .rename(columns={"playId": "DefenderPlayCount"})
    .reset_index()
)


cr_playerstats = cr_playerstats.merge(
    restricted_stats.groupby(
        ["ClosingDefender_nflId", "ClosingDefender_position", "ClosingDefenderName"]
    )[["normClosingRatio"]]
    .max()
    .rename(columns={"normClosingRatio": "max_normClosingRatio"})
    .reset_index()
)

cr_playerstats = cr_playerstats.merge(
    restricted_stats.groupby(
        ["ClosingDefender_nflId", "ClosingDefender_position", "ClosingDefenderName"]
    )[["normClosingRatio"]]
    .min()
    .rename(columns={"normClosingRatio": "min_normClosingRatio"})
    .reset_index()
)

cr_playerstats = cr_playerstats.merge(
    restricted_stats.groupby(
        ["ClosingDefender_nflId", "ClosingDefender_position", "ClosingDefenderName"]
    )[["normClosingRatio"]]
    .median()
    .rename(columns={"normClosingRatio": "median_normClosingRatio"})
    .reset_index()
)

cr_playerstats = cr_playerstats.merge(
    restricted_stats.groupby(
        ["ClosingDefender_nflId", "ClosingDefender_position", "ClosingDefenderName"]
    )[["normClosingRatio"]]
    .std()
    .rename(columns={"normClosingRatio": "std_normClosingRatio"})
    .reset_index()
)

cr_playerstats["normClosingRatio_plus_std"] = (
    cr_playerstats["normClosingRatio"] + cr_playerstats["std_normClosingRatio"]
)
cr_playerstats["normClosingRatio_min_std"] = (
    cr_playerstats["normClosingRatio"] - cr_playerstats["std_normClosingRatio"]
)


ranked_players = (
    cr_playerstats.sort_values("normClosingRatio")
    .query("DefenderPlayCount > 20")
    .set_index("ClosingDefenderName")
)

ranked_players = ranked_players.reset_index()

ranked_players["position"] = ranked_players["ClosingDefender_position"].replace(
    {"ILB": "LB", "OLB": "LB", "MLB": "LB", "SS": "S", "FS": "S", "DB": "CB"}
)


In [None]:
plt.style.use("fivethirtyeight")
color_pal = plt.rcParams["axes.prop_cycle"].by_key()["color"]

fig, ax = plt.subplots(figsize=(8, 6))
# sns.color_palette('rocket')
ax = sns.barplot(
    x="normClosingRatio",
    y="ClosingDefenderName",
    data=ranked_players.reset_index()
    .sort_values("normClosingRatio", ascending=False)
    .head(20),
    hue="position",
    palette=color_pal,
    ax=ax,
    dodge=False,
)
# sns.set_context("paper", font_scale=1.2)
ax.set_ylabel("")
ax.set_xlabel("Average Normalized Closing Ratio", fontsize=12)
ax.set_xlim(75, 77.25)
ax.set_title("Top Players by Normalized Closing Ratio", fontsize=16)
plt.show()

In [None]:
fig, ax = plt.subplots(1, 3, figsize=(15, 10))
ranked_players.query('position == "CB"').set_index("ClosingDefenderName").query(
    "DefenderPlayCount > 15"
).sort_values("normClosingRatio", ascending=True).tail(25)["normClosingRatio"].plot(
    kind="barh", ax=ax[0], color=color_pal[0]
)

ranked_players.query('position == "LB"').set_index("ClosingDefenderName").query(
    "DefenderPlayCount > 15"
).sort_values("normClosingRatio", ascending=True).tail(25)["normClosingRatio"].plot(
    kind="barh", ax=ax[1], color=color_pal[1]
)


ranked_players.query('position == "S"').set_index("ClosingDefenderName").query(
    "DefenderPlayCount > 15"
).sort_values("normClosingRatio", ascending=True).tail(25)["normClosingRatio"].plot(
    kind="barh", ax=ax[2], color=color_pal[2]
)


ax[0].set_ylabel("")
ax[1].set_ylabel("")
ax[2].set_ylabel("")


ax[0].set_xlabel("Normalized Closing Ratio", fontsize=10)
ax[1].set_xlabel("Normalized Closing Ratio", fontsize=10)
ax[2].set_xlabel("Normalized Closing Ratio", fontsize=10)

ax[0].set_xlim(72, 78)
ax[1].set_xlim(72, 78)
ax[2].set_xlim(72, 78)

ax[0].grid(False)
ax[1].grid(False)
ax[2].grid(False)

ax[0].set_title("Top Closing Cornerbacks", fontsize=14)
ax[1].set_title("Top Closing Linebackers", fontsize=14)
ax[2].set_title("Top Closing Safeties", fontsize=14)

plt.suptitle("Top Closing Players by Position", y=1.03, fontsize=20)
plt.tight_layout()
plt.show()

# Conclusion and Further Research

Being an NFL defensive player requires both physical and mental toughness. As opposed to offensive players, who are executing a play that they know, defenders must recognize and respond in an instant! A crucial time for a defender's swift response is while the ball is in the air.

In this paper, I've presented a new metric called the **Normalized Closing Ratio** (NCR). I created the NCR to identify how well defensive player's close on the ball while it is in the air. This metric is only possible to compute because of the detailed tracking data provided by NextGen Stats. Using the NGS data, I show how I can calculate players distances to the pass' destination. I show how this simple metric is closely linked to play outcome, and display some of the top NCR players from 2018.

I believe this metric, and ones like it, hold significant potential to change the ways players, coaches and fans understand defensive player's abilities. It could be used as a tool for identifying players for offensive coaches to avoid. Defensive coaches could use it as an additional metric when determining who makes the starting lineup. Coaches and fans alike could look back at high scoring NCR plays to see what schemes or techniques were used to produce positive outcomes.

More testing and evaluation are, of course, required for the NCR. I believe, though, that further serious analysis could very likely contribute to sharpening of the entire football community's understanding of how defenders make decisions while the football is in the air.


# Appendix

## Examples of NCR by Route Type

While creating the Normalized Closing Ratio metric, I visually inspected numerous plays using plots like the ones shown below. Below are two columns of plots: on the left side are “bad” NCR plays (where the value is below 60). On the right are plays with “good” NCR plays (where the score is above 80). These plots show how the metric captures the complexities of football while staying simple enough to quickly gain insights.

In [None]:
seed = 900
plt.style.use("default")
frequent_routes = play_coverage_stats['route'].value_counts().to_frame().query('route > 1000').index
for route_type in frequent_routes:
    bad_example = play_coverage_stats.query(
        'seconds_ball_in_air > 0.5 and normClosingRatio < 60 and route == @route_type'
    ).sample(1, random_state=seed)
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))
    crat = bad_example["normClosingRatio"].values[0]
    ax1 = plot_play_defender(
        ax1,
        bad_example["gameId"].values[0],
        bad_example["playId"].values[0],
        plays_df,
        games_df,
        tracking_df,
        targeted_df,
        show_ddrt=False,
        show_ddra=False,
        show_ddda=False,
        show_dddt=False,
        ddrt_pad=(20, 10),
        ddra_pad=(10, 10),
        ddda_pad=(-10, 5),
        dddt_pad=(-10, 0),
        yaxis_pad=(-12, 25),
        add_description=False,
        description_cutoff=90,
        markersize=80,
        title=f"Normalized Closing Ratio : {crat:0.2f}",
        title_fs=10,
    )

    good_example = play_coverage_stats.query(
        'seconds_ball_in_air > 0.5 and normClosingRatio > 80 and route == @route_type'
    ).sample(1, random_state=seed)
    crat = good_example["normClosingRatio"].values[0]
    ax2 = plot_play_defender(
        ax2,
        good_example["gameId"].values[0],
        good_example["playId"].values[0],
        plays_df,
        games_df,
        tracking_df,
        targeted_df,
        show_ddrt=False,
        show_ddra=False,
        show_ddda=False,
        show_dddt=False,
        ddrt_pad=(20, 10),
        ddra_pad=(10, 10),
        ddda_pad=(-10, 5),
        dddt_pad=(-10, 0),
        yaxis_pad=(-12, 25),
        add_description=False,
        description_cutoff=90,
        markersize=80,
        title=f"Normalized Closing Ratio : {crat:0.2f}",
        title_fs=10,
    )
    ax1.get_legend().remove()
    ax2.get_legend().remove()
    
    
    hl = ax2.get_legend_handles_labels()
    ax2.legend(
        hl[0][:5] + [hl[0][6]],
        [
            "Closing Defender Path",
            "Targeted Receiver Path",
            "Football Path",
            "Defender at Throw and Arrival",
            "Receiver at Throw and Arrival",
            "Quarterback at Throw",
        ],
        loc="upper left",
        bbox_to_anchor=(1.1, 0.95),
    )
    
    plt.suptitle(route_type.title() + ' Route', y=1.01, fontsize=12)
    plt.show()

The End. Thanks for reading!