In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from tqdm import tqdm
import base64
import os
import re
import math
import plotly.express as px
import plotly.graph_objects as go
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns
from ipywidgets import widgets, interactive, interact, interact_manual
! pip install mplcyberpunk
import mplcyberpunk
plt.style.use("cyberpunk")
import warnings
warnings.filterwarnings('ignore')

In [None]:
plays = pd.read_csv('../input/nfl-big-data-bowl-2021/plays.csv')
games = pd.read_csv('../input/nfl-big-data-bowl-2021/games.csv')
players = pd.read_csv('../input/nfl-big-data-bowl-2021/players.csv')

In [None]:
dataDir = "/kaggle/input/nfl-big-data-bowl-2021/"
weeks = pd.DataFrame(columns=['time', 'x', 'y', 's', 'a', 'dis', 'o', 'dir', 'event', 'nflId',
       'displayName', 'jerseyNumber', 'position', 'frameId', 'team', 'gameId',
       'playId', 'playDirection', 'route'])
for i in range(1,18):
    weeks = pd.concat([weeks, pd.read_csv(dataDir+"week{}.csv".format(i))], ignore_index=True)

In this notebook, I demonstrate some interesting insights using visualization. Mostly I will focus on NFL teams and individuals' defence performance. As for the team logo images that I used in this notebook, please feel free to refer to my dataset: https://www.kaggle.com/anzhemeng/nfl-team-logos.

**NOTE**: there are some images that were created using ipywidgets' interactive module while are unavailable in the stable kaggle notebook. Please copy and edit this notebook if you are interested in them.

**Helper functions:**

In [None]:
def getImage(path):
    return OffsetImage(plt.imread(path), zoom=0.5)

## credit to https://www.kaggle.com/hungnguyen95/nfl-big-data-bowl-2021-plotting-player-heatmap
def create_football_field(ax=None,
                          linenumbers=True,
                          endzones=True,
                          highlight_line=False,
                          highlight_line_number=50,
                          highlighted_name='Line of Scrimmage',
                          fifty_is_los=False,
                          facecolor='darkgreen'):
    """
    Function that plots the football field for viewing plays.
    Allows for showing or hiding endzones.
    """
    rect = patches.Rectangle((0, 0), 120, 53.3, linewidth=0.1,
                             edgecolor='r', facecolor=facecolor, zorder=0)

    if ax is None:
        ax = plt.gca()
        
    ax.add_patch(rect)

    plt.plot([10, 10, 10, 20, 20, 30, 30, 40, 40, 50, 50, 60, 60, 70, 70, 80,
              80, 90, 90, 100, 100, 110, 110, 120, 0, 0, 120, 120],
             [0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3,
              53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 53.3, 0, 0, 53.3],
             color='black')
    if fifty_is_los:
        plt.plot([60, 60], [0, 53.3], color='gold')
        plt.text(62, 50, '<- Player Yardline at Snap', color='gold')
    # Endzones
    if endzones:
        ez1 = patches.Rectangle((0, 0), 10, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ez2 = patches.Rectangle((110, 0), 120, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ax.add_patch(ez1)
        ax.add_patch(ez2)
    plt.xlim(0, 120)
    plt.ylim(-5, 58.3)
    plt.axis('off')
    if linenumbers:
        for x in range(20, 110, 10):
            numb = x
            if x > 50:
                numb = 120 - x
            plt.text(x, 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='black')
            plt.text(x - 0.95, 53.3 - 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='black', rotation=180)
    if endzones:
        hash_range = range(11, 110)
    else:
        hash_range = range(1, 120)

    for x in hash_range:
        ax.plot([x, x], [0.4, 0.7], color='black')
        ax.plot([x, x], [53.0, 52.5], color='black')
        ax.plot([x, x], [22.91, 23.57], color='black')
        ax.plot([x, x], [29.73, 30.39], color='black')

    if highlight_line:
        hl = highlight_line_number + 10
        plt.plot([hl, hl], [0, 53.3], color='yellow')
        plt.text(hl + 2, 50, '<- {}'.format(highlighted_name),
                 color='yellow')
    return ax

## credit to https://medium.com/kenlok/how-to-create-sankey-diagrams-from-dataframes-in-python-e221c1b4d6b0
def genSankey(df,cat_cols=[],value_cols='',title='Sankey Diagram'):
    # maximum of 6 value cols -> 6 colors
    colorPalette = ['#4B8BBE','#306998','#FFE873','#FFD43B','#646464']
    labelList = []
    colorNumList = []
    for catCol in cat_cols:
        labelListTemp =  list(set(df[catCol].values))
        colorNumList.append(len(labelListTemp))
        labelList = labelList + labelListTemp
        
    # remove duplicates from labelList
    labelList = list(dict.fromkeys(labelList))
    
    # define colors based on number of levels
    colorList = []
    for idx, colorNum in enumerate(colorNumList):
        colorList = colorList + [colorPalette[idx]]*colorNum
        
    # transform df into a source-target pair
    for i in range(len(cat_cols)-1):
        if i==0:
            sourceTargetDf = df[[cat_cols[i],cat_cols[i+1],value_cols]]
            sourceTargetDf.columns = ['source','target','count']
        else:
            tempDf = df[[cat_cols[i],cat_cols[i+1],value_cols]]
            tempDf.columns = ['source','target','count']
            sourceTargetDf = pd.concat([sourceTargetDf,tempDf])
        sourceTargetDf = sourceTargetDf.groupby(['source','target']).agg({'count':'sum'}).reset_index()
        
    # add index for source-target pair
    sourceTargetDf['sourceID'] = sourceTargetDf['source'].apply(lambda x: labelList.index(x))
    sourceTargetDf['targetID'] = sourceTargetDf['target'].apply(lambda x: labelList.index(x))
    
    # creating the sankey diagram
    data = dict(
        type='sankey',
        node = dict(
          pad = 15,
          thickness = 20,
          line = dict(
            color = "black",
            width = 0.5
          ),
          label = labelList,
          color = colorList
        ),
        link = dict(
          source = sourceTargetDf['sourceID'],
          target = sourceTargetDf['targetID'],
          value = sourceTargetDf['count']
        )
      )
    
    layout =  dict(
        title = title,
        font = dict(
          size = 10
        )
    )
       
    fig = dict(data=[data], layout=layout)
    return fig

# General Defence Performance
---
I compared all the teams' defence result in the league, which is respectively measured by EPA, offense play result and play result. Generally speaking, Baltimore Ravens is the most outstanding one.

## Matplotlib version

In [None]:
play_result_teams = pd.merge(plays[['possessionTeam', 'gameId', 'playResult', 'offensePlayResult', 'epa']], games[['homeTeamAbbr', 'visitorTeamAbbr', 'gameId']], on='gameId')
play_result_teams['defensiveTeam'] = None
play_result_teams['defensiveTeam'].loc[play_result_teams['homeTeamAbbr'] == play_result_teams['possessionTeam']] = play_result_teams['visitorTeamAbbr']
play_result_teams['defensiveTeam'].loc[play_result_teams['visitorTeamAbbr'] == play_result_teams['possessionTeam']] = play_result_teams['homeTeamAbbr']
play_result_teams = pd.merge(play_result_teams, games[['gameId', 'week']], on='gameId')

In [None]:
measure = widgets.Dropdown(
    options=['playResult', 'offensePlayResult', 'epa'],
    value='playResult',
    description='Measure',
)
week = widgets.Dropdown(
    options=['Overall', 'Week 1', 'Week 2', 'Week 3', 'Week 4', 'Week 5', 'Week 6', 'Week 7', 'Week 8',
             'Week 9', 'Week 10', 'Week 11', 'Week 12', 'Week 13', 'Week 14', 'Week 15', 'Week 16', 'Week 17'],
    value='Overall',
    description='Period',
)

def barit(week, measure):
    """
    Filters and plot the dataframe as a bar plot of teams' defence performance

    Args:
    -----
        * week (str): the period to filter on, or "Overall" to display the entire season
        * measure (str): the option of measurement

    Returns:
    --------
        A matplotlib bar plot

    """
    plot = play_result_teams.copy()
    if week != 'Overall':
        week_num = int(week.split(' ')[1])
        plot = plot[plot.week == week_num]
        
    if measure == 'playResult':
        defence = plot.groupby('defensiveTeam').agg({'playResult': 'mean'})
        defence.rename(columns = {'playResult': 'mean'}, inplace=True)
    elif measure == 'offensePlayResult':
        defence = plot.groupby('defensiveTeam').agg({'offensePlayResult': 'mean'})
        defence.rename(columns = {'offensePlayResult': 'mean'}, inplace=True)
    elif measure == 'epa':
        defence = plot.groupby('defensiveTeam').agg({'epa': 'mean'})
        defence.rename(columns = {'epa': 'mean'}, inplace=True)
        
    # Plot it (only if there's data to plot)
    if len(plot) > 0:
        fig, ax = plt.subplots(figsize=(15, 15))
        ax.barh([i+1 for i in range(defence.shape[0])], defence['mean'].tolist())
        ax.set_xlabel('Opponent\'s Average Gain', size=15)
        ax.set_yticklabels([])
        ax.set_title('Team Defence Performance', size=15)
        for x0, y0, path in zip([i+1 for i in range(defence.shape[0])], defence['mean'].tolist(), 
                               ['../input/nfl-team-logos/' + file for file in sorted(os.listdir('../input/nfl-team-logos'))]):
            ab = AnnotationBbox(getImage(path), (y0, x0), xybox=(-25, 0), frameon=False, 
                                xycoords='data', boxcoords="offset points", pad=0)
            ax.add_artist(ab)
    else:
        print("No data to show for current selection")
        
interact(barit, week=week, measure=measure)

## Plotly version

In [None]:
fig = go.Figure()
for column in ['playResult', 'offensePlayResult', 'epa']:
    defence = play_result_teams.groupby('defensiveTeam').agg({column: 'mean'})
    fig.add_trace(
        go.Bar(
            x = [i+1 for i in range(defence.shape[0])],
            y = defence[column],
            name = 'Overall {}'.format(column)
        )
    )
    fig.update_xaxes(tickvals=['' for i in range(defence.shape[0])])
#     if column != 'epa':
#         fig.update_layout(yaxis_range=[-1,8])
#     else:
#         fig.update_layout(yaxis_range=[-.5, .5])

# add images
for i,src in zip(range(defence.shape[0]), sorted(os.listdir('../input/nfl-team-logos'))):
    logo = base64.b64encode(open('../input/nfl-team-logos/'+src, 'rb').read())
    fig.add_layout_image(
        source='data:image/png;base64,{}'.format(logo.decode()),
        xref="x",
        yref="y",
        x=i+1,
        y=0,
        xanchor="center",
        yanchor="middle",
        sizex=1,
        sizey=1,
    )
    
fig.update_layout(
    template='plotly_dark',
    updatemenus=[{
        "active": 1,
        "showactive": True,
        "buttons": list(
            [
             dict(label = 'playResult',
                  method = 'update',
                  args = [{'visible': [True, False, False]}, # the index of True aligns with the indices of plot traces
                          {'title': 'Overall Play Result',
                           'showlegend': True}]),
             dict(label = 'offensePlayResult',
                  method = 'update',
                  args = [{'visible': [False, True, False]},
                          {'title': 'Offense Play Result',
                           'showlegend': True}]),
             dict(label = 'epa',
                  method = 'update',
                  args = [{'visible': [False, False, True]},
                          {'title': 'EPA',
                           'showlegend':True}])
            ]
        )
    }
    ],
     title="Team Defence Performance",
     xaxis_title="",
     yaxis_title="Opponent's Average Gain",
     legend_title="Measure"
)

fig.show()

# Offence Options
----
In this league, most offensive sides would choose to attack by passing the ball.

In [None]:
opposite_offence_teams = pd.merge(plays[['possessionTeam', 'gameId', 'playType']], games[['homeTeamAbbr', 'visitorTeamAbbr', 'gameId']], on='gameId')
opposite_offence_teams['defensiveTeam'] = None
opposite_offence_teams['defensiveTeam'].loc[opposite_offence_teams['homeTeamAbbr'] == opposite_offence_teams['possessionTeam']] = opposite_offence_teams['visitorTeamAbbr']
opposite_offence_teams['defensiveTeam'].loc[opposite_offence_teams['visitorTeamAbbr'] == opposite_offence_teams['possessionTeam']] = opposite_offence_teams['homeTeamAbbr']
opposite_offence_teams = pd.merge(opposite_offence_teams, games[['gameId', 'week']], on='gameId')

## Matplotlib version

In [None]:
def stackit(week):
    """
    Filters and plot the dataframe as a stack plot of teams' defence performance

    Args:
    -----
        * week (str): the period to filter on, or "Overall" to display the entire season

    Returns:
    --------
        A matplotlib stack plot

    """
    plot = opposite_offence_teams.copy()
    if week != 'Overall':
        week_num = int(week.split(' ')[1])
        plot = plot[plot.week == week_num]
        
    defence = plot.groupby(['defensiveTeam', 'playType']).agg({'possessionTeam': 'count'})
        
    # Plot it (only if there's data to plot)
    if len(plot) > 0:
        fig, ax = plt.subplots(figsize=(15, 15))
        defence['possessionTeam'].unstack().plot(kind='bar', stacked=True, ax=ax)
#         ax.barh([i+1 for i in range(defence.shape[0])], defence['possessionTeam'].tolist())
        ax.set_xlabel('Faced Play Types', size=15)
        ax.set_xticklabels([])
        ax.set_title('Team Defence Performance', size=15)
        for x0, y0, path in zip([i+.8 for i in range(32)], 
                                defence['possessionTeam'].unstack()['play_type_pass'].tolist(), 
                               ['../input/nfl-team-logos/' + file for file in sorted(os.listdir('../input/nfl-team-logos'))]):
            ab = AnnotationBbox(getImage(path), (x0, y0), xybox=(-25, 0), frameon=False, 
                                xycoords='data', boxcoords="offset points", pad=0)
            ax.add_artist(ab)
#         plt.show()
    else:
        print("No data to show for current selection")
        
interactive(stackit, week=week)

## Plotly version

In [None]:
fig = go.Figure()
defence = opposite_offence_teams.groupby(['defensiveTeam', 'playType']).agg({'possessionTeam': 'count'})
defence = defence.reset_index()
fig.add_trace(
    go.Bar(
        x = [i+1 for i in range(defence.shape[0])],
        y = defence['possessionTeam'].loc[defence['playType'] == 'play_type_pass'],
        marker_color='rgb(55, 83, 109)',
        name='play_type_pass'
    )
)
fig.add_trace(
    go.Bar(
        x = [i+1 for i in range(defence.shape[0])],
        y = defence['possessionTeam'].loc[defence['playType'] == 'play_type_sack'],
        marker_color='rgb(26, 118, 255)',
        name='play_type_sack'
    )
)
fig.add_trace(
    go.Bar(
        x = [i+1 for i in range(defence.shape[0])],
        y = defence['possessionTeam'].loc[defence['playType'] == 'play_type_unknown'],
        marker_color='rgb(35, 63, 19)',
        name='play_type_unknown'
    )
)

fig.update_xaxes(tickvals=['' for i in range(defence.shape[0])])
fig.update_layout(yaxis_range=[-50,700],
                 title="Team Defence Performance",
                 xaxis_title="Faced Play Types",
                 yaxis_title="Amount",
                 legend_title="Play Type")

# add images
for i,src, yy in zip(range(defence.shape[0]), sorted(os.listdir('../input/nfl-team-logos')), defence['possessionTeam'].tolist()):
    logo = base64.b64encode(open('../input/nfl-team-logos/'+src, 'rb').read())
    fig.add_layout_image(
        source='data:image/png;base64,{}'.format(logo.decode()),
        xref="x",
        yref="y",
        x=i+1,
        y=0,
        xanchor="center",
        yanchor="middle",
        sizex=50,
        sizey=50,
    )
    
fig.update_layout(
    template='plotly_dark',
    barmode='stack'
)

fig.show()

# How do teams usually play the games?

In [None]:
plot = plays.groupby(['personnelO', 'playType', 'personnelD']).count()['gameId']
plot = plot.reset_index()
go.Figure(genSankey(plot, ['personnelO', 'playType', 'personnelD'], 'gameId', 'Distribution of Offensive Personnel vs. Defensive Personnel'))

# Team Unallowed Pass Completion Percentage
---
Since the most offence that an NFL team would face is passing, let's have a look at who is the most effective one to deal with. It turns out that Baltimore outperforms all its opponents this season. That seems to prove that whoever is good at stopping passes is good at defence.

In [None]:
play_teams = pd.merge(plays[['possessionTeam', 'gameId', 'passResult']], games[['homeTeamAbbr', 'visitorTeamAbbr', 'gameId']], on='gameId')
play_teams['defensiveTeam'] = None
play_teams['defensiveTeam'].loc[play_teams['homeTeamAbbr'] == play_teams['possessionTeam']] = play_teams['visitorTeamAbbr']
play_teams['defensiveTeam'].loc[play_teams['visitorTeamAbbr'] == play_teams['possessionTeam']] = play_teams['homeTeamAbbr']
play_teams = pd.merge(play_teams, games[['gameId', 'week']], on='gameId')

In [None]:
def scatterit(week):
    """
    Filters and plot the dataframe as a scatter plot of teams' defence performance

    Args:
    -----
        * week (str): the period to filter on, or "Overall" to display the entire season

    Returns:
    --------
        A matplotlib scatter plot

    """
    plot = play_teams.copy()
    if week != 'Overall':
        week_num = int(week.split(' ')[1])
        plot = plot[plot.week == week_num]
        
    defence = plot[plot['passResult'] != 'C'].groupby('defensiveTeam').agg({'passResult': 'count'}) / plot.groupby('defensiveTeam').agg({'passResult': 'count'})
        
    # Plot it (only if there's data to plot)
    if len(plot) > 0:
        fig, ax = plt.subplots(figsize=(15, 15))
        ax.scatter([i+1 for i in range(defence.shape[0])], defence['passResult'].tolist())
        ax.set_ylabel('Unallowed Passes %', size=15)
        ax.set_xticklabels([])
        ax.set_title('Team Defence Performance', size=15)
        for x0, y0, path in zip([i+1 for i in range(defence.shape[0])], defence['passResult'].tolist(), 
                               ['../input/nfl-team-logos/' + file for file in sorted(os.listdir('../input/nfl-team-logos'))]):
            ab = AnnotationBbox(getImage(path), (x0, y0), frameon=False)
            ax.add_artist(ab)
    else:
        print("No data to show for current selection")
        
interactive(scatterit, week=week)

In [None]:
defence = play_teams[play_teams['passResult'] != 'C'].groupby('defensiveTeam').agg({'passResult': 'count'}) / play_teams.groupby('defensiveTeam').agg({'passResult': 'count'})
fig = px.scatter(x=[i+1 for i in range(defence.shape[0])], 
                 y=defence['passResult'].tolist(), 
                 template='plotly_dark',
                 labels=dict(x="", y="Unallowed Passes %"))

fig.update_xaxes(tickvals=['' for i in range(defence.shape[0])])
fig.update_layout(
                 title="Team Defence Performance",
                 xaxis_title="Faced Play Types",
                 yaxis_title="Unallowed Passes %"
                )

# add images
for i,src,yy in zip(range(defence.shape[0]), sorted(os.listdir('../input/nfl-team-logos')), defence['passResult'].tolist()):
    logo = base64.b64encode(open('../input/nfl-team-logos/'+src, 'rb').read())
    fig.add_layout_image(
        source='data:image/png;base64,{}'.format(logo.decode()),
        xref="x",
        yref="y",
        x=i+1,
        y=yy,
        xanchor="center",
        yanchor="middle",
        sizex=1,
        sizey=1,
    )

fig.show() 

# Where did the passes go?
---
Thanks to [this notebook](https://www.kaggle.com/beomjunbae/parsing-targets), I managed to discover the potential target player's location. My idea is that assuming the receiver was determined when the ball was thrown by the passer (usually the quarterbacks), the location of the target player can be considered as the destination of the ball.
According to my visualization, most passes clustered within the two 20-yardlines.

In [None]:
# to store the weekly data in a list can increase the efficiency
ws = []
for i in range(1,18):
    ws.append(pd.read_csv(dataDir+"week{}.csv".format(i)))
    
def get_week_df(gameID, playID):
    wk = games.query("gameId=={}".format(gameID)).iloc[0].week
    week_df = ws[wk-1]
    information = week_df[(week_df.playId==playID)&(week_df.gameId==gameID)]
    return information, wk

initial_re = " (to|for) [A-Z]{1}[a-z]*\.\s?[A-Z]{1}[-\'a-zA-Z]+[\s\.$]+"
name_re =              "[A-Z]{1}[a-z]*\.\s?[A-Z]{1}[-\'a-zA-Z]+[\s\.$]+"
filt = plays.playDescription.str.contains(initial_re, regex=True)
full_names = []
for i in tqdm(range(len(filt))):
    full_name = "-"
    if "E.St. Brown" in plays.iloc[i].playDescription:
        full_name = "Equanimeous St. Brown"
    elif filt.iloc[i]:
        shit = re.search(initial_re, plays.iloc[i].playDescription).group(0)
        target = re.search(name_re, shit).group(0).split(".")
        
        if target[-1] == '':
            target.pop(-1)
        
        # Get the appropriate week\d.csv
        result, wk = get_week_df(plays.iloc[i].gameId, plays.iloc[i].playId)
        
        # filter by the qb's team, if there is a qb on the field
        something = result.query("position=='QB'")
        if len(something) > 0:
            qb_team = something.team.iloc[0]
            result = result[result.team==qb_team]
        
        # filter for names that match
        spec_name_re = "^{}[-\'\.a-zA-Z]+ {}$".format(target[0].strip(), target[1].strip())
        result = result[result.displayName.str.contains(spec_name_re, regex=True)].displayName.unique().tolist()
        if len(result) == 0:
            players_result = players[players.displayName.str.contains(spec_name_re, regex=True)]
            if len(players_result) > 1:
                # we see if other plays with the matching name, use that name
                potential = pd.Series(ws[wk-1][ws[wk-1].gameId==plays.iloc[i].gameId].displayName.unique().tolist())
                potential = potential[potential.str.contains(spec_name_re, regex=True)]
                if len(potential) != 1:
                    print(target)
                    print(potential)
                    print(wk, plays.iloc[i].gameId, plays.iloc[i].playId)
                    print(plays.iloc[i].playDescription)
                    print("UNCAUGHT ERROR 11111111111111111111111111111111111111111111111111111111111")
                else:
                    full_name = potential.iloc[0]
            elif len(players_result) == 1:
                full_name = players_result.iloc[0].displayName
            # When len(players_result)==0, it means that the targeted player is not a usually targeted player (LB and etc)
            # so we ignore this case
            
        else:
            full_name = result[0]
        # there aren't any cases where len(result) > 1. I've checked already
    full_names.append(full_name)
plays = plays.assign(target=full_names)

In [None]:
pass_dest = pd.merge(plays[['possessionTeam', 'gameId', 'target', 'playId', 'quarter']], games[['homeTeamAbbr', 'visitorTeamAbbr', 'gameId', 'week']], on='gameId')
pass_dest['defensiveTeam'] = None
pass_dest['defensiveTeam'].loc[pass_dest['homeTeamAbbr'] == pass_dest['possessionTeam']] = pass_dest['visitorTeamAbbr']
pass_dest['defensiveTeam'].loc[pass_dest['visitorTeamAbbr'] == pass_dest['possessionTeam']] = pass_dest['homeTeamAbbr']

In [None]:
truncated_plays = pass_dest[['gameId', 'playId', 'target', 'defensiveTeam', 'quarter']].loc[plays.target != '-']

In [None]:
truncated_weeks = weeks[['x', 'y', 'event', 'displayName', 'gameId', 'playId', 'nflId']]
truncated_weeks = truncated_weeks[truncated_weeks.event.isin(['pass_forward', 'pass_shovel', 'pass_lateral', 'snap_direct', 'qb_spike'])]

In [None]:
location = pd.merge(truncated_plays, truncated_weeks, left_on=['target', 'gameId', 'playId'], right_on = ['displayName', 'gameId', 'playId'])[['x', 'y', 'defensiveTeam', 'quarter']]

In [None]:
# assume the offensing side always starts from the left to the right
location['x'].loc[location['quarter'].isin([2, 3, 5])] = 120 - location['x']
location['y'].loc[location['quarter'].isin([2, 3, 5])] = 53.3 - location['y']

In [None]:
team = widgets.Dropdown(
    options=['All'] + [item.split('.')[0] for item in sorted(os.listdir('../input/nfl-team-logos'))],
    value='All',
    description='Team',
)

def heatit(team):
    """
    Filters and plot the dataframe as a heatmap of teams' defence performance

    Args:
    -----
        * team (str): the team to filter on, or "Overall" to display the entire league

    Returns:
    --------
        A seaborn heat map

    """
    plot = location.copy()
    if team != 'All':
        plot = plot[plot.defensiveTeam == team]
        
    # Plot it (only if there's data to plot)
    if len(plot) > 0:
        fig = plt.figure(figsize=(20, 11))
        ax = create_football_field()
        ax.set_title('Passes Faced by {}'.format(team), size=15)
        sns.kdeplot(plot.x, plot.y, shade = True, color='green',
                    shade_lowest=False, n_levels=100, ax=ax)
    else:
        print("No data to show for current selection")
        
interactive(heatit, team=team)

# Player Efficiency
---
Similar to teams' evaluation, I checked how many pass attempts were stopped by defensive players. Here is the list of top ten players who managed to achieve more than 50% unallowed pass rate with at least 50 potential passes around them.

In [None]:
player_loc = pd.merge(plays[['gameId', 'playId', 'target']], weeks[['gameId', 'playId', 'event', 'team', 'x', 'y', 'displayName']], left_on=['gameId', 'playId'], right_on = ['gameId', 'playId'])
player_loc = player_loc[player_loc.event.isin(['pass_forward', 'pass_shovel', 'pass_lateral', 'snap_direct', 'qb_spike'])]

In [None]:
closest_players = player_loc[['gameId', 'playId']].drop_duplicates()
closest_players['playerName'] = None
for item in tqdm(player_loc[['gameId', 'playId']].drop_duplicates().values.tolist()):
    tmp = player_loc.groupby(['gameId', 'playId']).get_group((item[0], item[1]))
    if tmp['target'].head(1).values[0] != '-':
        try:
            team = tmp['team'].loc[tmp['target'] == tmp['displayName']].values[0]
            loc = tmp[['x', 'y']].loc[tmp['target'] == tmp['displayName']]
            tmp = tmp[(tmp.team != team) & (tmp.team != 'football')]
            index = np.argmin(np.sqrt((tmp.x-loc.x.values[0])**2 + (tmp.y-loc.y.values[0])**2))
            closest_players['playerName'].loc[(closest_players.gameId == item[0]) & (closest_players.playId == item[1])] = tmp.reset_index().iloc[index]['displayName']
        except:
            continue

In [None]:
closest_players = pd.merge(closest_players, plays[['gameId', 'playId', 'passResult', 'quarter']], left_on=['gameId', 'playId'], right_on=['gameId', 'playId'])

In [None]:
unallowed_pass_rate = closest_players[closest_players['passResult'] != 'C'].groupby('playerName').agg({'passResult': 'count'}) / closest_players.groupby('playerName').agg({'passResult': 'count'})
unallowed_pass_rate['num'] = closest_players.groupby('playerName').count()['passResult']

In [None]:
plot = unallowed_pass_rate[unallowed_pass_rate['num'] > 30].reset_index().sort_values('passResult', ascending=False).head(10)
plot['passResult'] = plot['passResult'] * 10000
fig, ax = plt.subplots(figsize=(15, 15))
plot.reset_index().plot(kind='scatter', x='index', y='num', s='passResult', ax=ax)
ax.set_xticklabels([])
ax.set_ylabel('Entries')
ax.set_xlabel('')
ax.set_title('Players with Highest Unallowing Pass Rate')
for line in plot.reset_index().iterrows():
     ax.text(line[1]['index'], line[1]['num'], '{}\n{:.2f}%'.format(line[1]['playerName'], line[1]['passResult']/100), 
             horizontalalignment='center', size='medium', color='black', weight='semibold')

In [None]:
location = pd.merge(plot, weeks[['x', 'y', 'displayName', 'gameId', 'playId']], left_on='playerName', right_on='displayName')[['x', 'y', 'displayName', 'gameId', 'playId']]

In [None]:
location = pd.merge(location, plays[['gameId', 'playId', 'quarter']], left_on=['gameId', 'playId'], right_on=['gameId', 'playId'])
location['x'].loc[location['quarter'].isin([2, 3, 5])] = 120 - location['x']
location['y'].loc[location['quarter'].isin([2, 3, 5])] = 53.3 - location['y']

In [None]:
fig, ax = plt.subplots(5, 2, figsize=(20, 25))

for i in range(5):
    for j in range(2):
        plot = location[location['displayName'] == location['displayName'].unique()[2*i+j]]
        ax[i][j].set_title('{}\'s location'.format(location['displayName'].unique()[2*i+j]), size=15)
        sns.kdeplot(plot.x, plot.y, shade = True, color='green',
                    shade_lowest=False, n_levels=100, ax=ax[i][j])
        ax[i][j] = create_football_field(ax[i][j])

> # Reference
* https://www.kaggle.com/hungnguyen95/nfl-big-data-bowl-2021-plotting-player-heatmap
* https://www.kaggle.com/beomjunbae/parsing-targets
* https://medium.com/kenlok/how-to-create-sankey-diagrams-from-dataframes-in-python-e221c1b4d6b0