# Analytics: Returns For Loss 

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import json
import ipywidgets as widgets

from matplotlib import pyplot as plt
from typing import Dict, List, Any, Callable, Optional
from tqdm.notebook import tqdm

In [None]:
SIDE = 5
ENDZONE = 10
FIELD_L = 120
FIELD_W = 53.3
FIELD_MID = 60
FIELD_X = (0 - SIDE, FIELD_L + SIDE)
FIELD_Y = (0 - SIDE, FIELD_W + SIDE)
FIELD_XD = (FIELD_X[1] - FIELD_X[0]) / 10
FIELD_YD = (FIELD_Y[1] - FIELD_Y[0]) / 10
FIELD_RATIO = 1.25
FIELD_DIM = (FIELD_RATIO * FIELD_XD, FIELD_RATIO * FIELD_YD)


def plot_field(plt):
    style = {
        "c": "black",
        "alpha": 0.5,
    }
    plt.xlim(*FIELD_X)
    plt.ylim(*FIELD_Y)
    plt.plot([0, FIELD_L], [0, 0], **style)
    plt.plot([0, FIELD_L], [FIELD_W, FIELD_W], **style)
    plt.plot([0, 0], [0, FIELD_W], **style)
    plt.plot([FIELD_L, FIELD_L], [0, FIELD_W], **style)
    for x in range(ENDZONE, FIELD_L, 10):
        yard = 50 - abs(x - 10 - 50)
        no_dash = (x == FIELD_MID) or (x == ENDZONE) or (x == (FIELD_L - ENDZONE))
        style = {
            "c": "black",
            "alpha": 0.5,
            "dashes": [] if no_dash else [2, 2],
        }
        plt.plot([x, x], [0, FIELD_W], **style)
        plt.text(s=yard, x=x, y=-2, ha="center")
    fig = plt.gcf()
    fig.set_size_inches(*FIELD_DIM)
    


# Load Data

In [None]:
DIR = "../input/nfl-big-data-bowl-2022"
DIR_VT = "../input/process-punt-return-decision-data"
DIR_PRED = "../input/model-training-returns-for-loss"
df_preds = pd.read_csv(f"{DIR_PRED}/predictions.csv")
df_preds["players"] = df_preds["players"].apply(lambda j: json.loads(j))
print(f"Loaded {df_preds.shape[0]:,d} plays with predictions.")
df_games = pd.read_csv(f"{DIR}/games.csv")
df_players = pd.read_csv(f"{DIR}/players.csv")
# Get patched versions from our custom output
df_plays = pd.read_csv(f"{DIR_VT}/plays_patched.csv")
df_pff = pd.read_csv(f"{DIR_VT}/pff_patched.csv")

In [None]:
df_preds.columns

In [None]:
df_preds.head()

# Heat Map

In [None]:
PLAY_KEYS = ["gameId", "playId"]

In [None]:
#ballLanding is ball is returnable
df_preds[PLAY_KEYS + ['returnerNflId', 'ballLandingYardline','penaltyResultYardline', 'specialTeamsResult']].head()

In [None]:
df_preds['specialTeamsResult'].unique()

In [None]:
#Helper Functions
def result_classifier(x):
    if x == 'Fair Catch':
        return 'Fair Catch'
    elif x == 'Return' or x == 'Muffed':
        return 'Return'
    else:
        return 'Bail'

In [None]:
DISPLAY_KEYS = PLAY_KEYS + ['returnerNflId', 'ballLandingYardline', 'specialTeamsResult', 'penaltyResultYardline']
df_ball_lands = df_preds.copy()
df_ball_lands['ballLandingYardline'] = df_preds['ballLandingYardline']
df_ball_lands[DISPLAY_KEYS].head()

In [None]:
df_ball_lands['classifiedResult'] = df_ball_lands['specialTeamsResult'].apply(result_classifier)
df_ball_lands[DISPLAY_KEYS + ['classifiedResult']]

In [None]:
DISPLAY_KEYS = DISPLAY_KEYS + ['classifiedResult']
#making a new column result yardage from when the punt lands to where the offense starts 
print(df_ball_lands[DISPLAY_KEYS].reset_index())
df_ball_lands['netDecisionYards'] = df_ball_lands['penaltyResultYardline'] - df_ball_lands['ballLandingYardline']

In [None]:
df_ball_lands[DISPLAY_KEYS + ['netDecisionYards']]

In [None]:
df_ball_lands['landX'] = df_ball_lands['ballLandingX'].apply(np.round)
df_ball_lands['landY'] = df_ball_lands['ballLandingY'].apply(np.round)
df_bl_x_y = df_ball_lands.groupby(['landX', 'landY', 'classifiedResult'])['playId'].count().reset_index()
df_bl_x_y['fraction'] = df_bl_x_y['playId'] / len(df_ball_lands)
df_bl_x_y_mc = df_bl_x_y.groupby(['landX', 'landY', 'classifiedResult'])['playId'].max().reset_index()
df_bl_x_y_j = df_bl_x_y_mc.join(df_bl_x_y.set_index(['landX', 'landY', 'playId', 'classifiedResult'])\
                                , on = ['landX', 'landY', 'playId', 'classifiedResult'], )\
                                .rename(columns = {'classifiedResult' : 'result'})
assert df_bl_x_y_j['result'].isna().sum() == 0, "some yards don't have a classified result"


In [None]:
sns.palplot(sns.color_palette('hls',3))

In [None]:
plot_field(plt)
sns.scatterplot(
    data = df_ball_lands,
    x = 'ballLandingX',
    y = 'ballLandingY',
    hue = 'classifiedResult',
    palette = {'Fair Catch' : 'blue', 'Return' : 'yellow', 'Bail' : 'black'},
    alpha = 0.5)
plt.legend()
plt.show()

In [None]:
df_ball_lands.head()

In [None]:

def df_to_heatmap_gp(df):
    df_hm = df.copy()
    df_hm['ballLandingXFloor'] = df_hm['ballLandingX'].apply(np.floor).astype(int)
    df_hm['ballLandingYFloor'] = df_hm['ballLandingY'].apply(np.floor).astype(int)
    gp_hm = (
        df_hm
            .groupby(['ballLandingXFloor', 'ballLandingYFloor', 'classifiedResult'])
            ['playId'].count()
            .reset_index()
            .rename(columns = {'playId': 'playCount'})
    )
    return gp_hm

In [None]:
# plot_field(plt)
# sns.scatterplot(data = gp_hm, x = 'ballLandingXFloor', y = 'ballLandingYFloor', opacity = 5)

In [None]:
def join_to_matrix(gp, xSize, ySize, xCol, yCol, valCol):
    rows, columns = np.indices((ySize, xSize))
    rows = rows.flatten()
    columns = columns.flatten()
    df_mat = pd.DataFrame({'rows' : rows, 'columns' : columns}, index = range(len(rows)))
    df_mat_w_hm = df_mat.join(gp.set_index([yCol, xCol]), on = ['rows', 'columns'])
    mat = df_mat_w_hm[valCol].fillna(0).values.reshape(ySize,xSize)
    return mat

In [None]:
mat_fc = join_to_matrix(
    df_bl_x_y[df_bl_x_y['classifiedResult'] == 'Fair Catch'],
    80,
    54,
    'landX',
    'landY',
    'fraction'
)
mat_ret = join_to_matrix(
    df_bl_x_y[df_bl_x_y['classifiedResult'] == 'Return'],
    80,
    54,
    'landX',
    'landY',
    'fraction'
)
mat_bail = join_to_matrix(
    df_bl_x_y[df_bl_x_y['classifiedResult'] == 'Bail'],
    80,
    54,
    'landX',
    'landY',
    'fraction'
)

In [None]:
plot_field(plt)
plt.imshow(mat_fc, cmap = 'Greens')
plt.colorbar()
plt.show()

In [None]:
plot_field(plt)
plt.imshow(mat_ret, cmap = 'Blues')
plt.colorbar()
plt.show()

In [None]:
plot_field(plt)
plt.imshow(mat_bail, cmap = 'Reds')
plt.colorbar()
plt.show()

In [None]:
#create a groupby using a dataframe
#build analysis on this model
#think about applications in when this could be useful

In [None]:
def plot_kicks_from_yardline(y, r):
    colors = {'Return' : 'Greens', 'Bail' : 'Reds', 'Fair Catch' : 'Blues'}
    df_query = df_ball_lands.query(f"kickingYardline <= {y} and kickingYardline > {y-10}")
    df_gp = df_to_heatmap_gp(df_query)
    mat = join_to_matrix(
        df_gp[df_gp['classifiedResult'] == r],
        120,
        54,
        'ballLandingXFloor',
        'ballLandingYFloor',
        'playCount'
    )
    plot_field(plt)
    plt.axvline(120 - y)
    plt.axvline(120 - y - 10 )
    if len(df_gp) >= 1:
        plt.imshow(mat, cmap = colors.get(r))
        plt.colorbar()
    plt.title(f"n = {len(df_query)} returnable punts ending in {r}, punts from {y-10} to {y}")
    plt.show()

In [None]:
_ = widgets.interact(plot_kicks_from_yardline, y = (10,80,10), r = ['Return', 'Fair Catch', 'Bail'])

In [None]:
#Rounding landing yardline
df_ball_lands['ballLandingYardline'] = df_ball_lands['ballLandingYardline'].apply(np.floor)
#Querying for punts landed futher than opponent 35
df_ball_lands = df_ball_lands.query('ballLandingYardline <= 65')

In [None]:
def netDecisionYards_based_on_kickingYardline(y) :
    df_query = df_ball_lands.copy()
    df_heats = df_query.query(f"kickingYardline > {y-10} and kickingYardline <= {y}")
    
    df_heat_zones = df_heats\
    .groupby(['ballLandingYardline', 'classifiedResult'])['netDecisionYards']\
    .mean().reset_index()\
    .rename(columns = {'netDecisionYards': 'yardsPlusMinus'})

    df_bl_copy = df_heats.copy()

    df_bl_copy = df_bl_copy.groupby(['ballLandingYardline', 'classifiedResult'])['playId']\
            .count()\
            .reset_index()\
            .rename(columns = {'playId' : 'playCount'})

    df_heat_zones_ud = df_heats.join(df_bl_copy.set_index(['ballLandingYardline', 'classifiedResult'])
                                          , on = ['ballLandingYardline', 'classifiedResult'])\
                                         .rename(columns = {'netDecisionYards': 'yardsPlusMinus'})

        
    df_heat_zones_ud = df_heat_zones_ud.query('playCount >= 4')
        
    print(f"Decision plus/minus yardage when punted between {y - 10} and {y} of kicking team")
    
    sns.lineplot(
        data = df_heat_zones_ud,
        x = 'ballLandingYardline',
        y = 'yardsPlusMinus',
        hue = 'classifiedResult',
        palette = {'Fair Catch' : 'blue', 'Return' : 'green', 'Bail' : 'red'}
    )
    plt.gcf().set_size_inches(12,6)
    plt.legend()
    plt.show()

In [None]:
_ = widgets.interact(netDecisionYards_based_on_kickingYardline, y = (10,80,10))