In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import ipywidgets as wg
from IPython.core.display import HTML
import matplotlib.animation as anim
import matplotlib.pyplot as plt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Load plays data and see how many plays were kickoffs

In [None]:
plays_df = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2022/plays.csv')
print(plays_df.shape)
plays_df.head()

In [None]:
plays_df['specialTeamsPlayType'].value_counts().plot.barh(figsize=(8,4), title="Special teams plays by type")

Out of 19,979 total special teams plays in the dataset, nearly 8,000 were kickoffs, the most frequent type of special teams play.

In [None]:
kickoffs_df = plays_df.loc[plays_df['specialTeamsPlayType'] == 'Kickoff']
kickoffs_df.head()

### How many kickoffs were returned?

In [None]:
kickoffs_df['specialTeamsResult'].value_counts().plot.barh(figsize=(9,4), title="Kickoffs by result")

Well over half (4,773) of all kickoffs (7,843) went for a touchback. Only 2,921, or 37% were returned.

In [None]:
kickoff_returns_df = kickoffs_df.loc[kickoffs_df['specialTeamsResult'] == 'Return']
kickoff_returns_df.head()

### What's the average and maximum return yardage given up by each team?

Let's start by looking at the distribution of return yards overall, then look at a boxplot for each team.

In [None]:
kickoff_returns_df[['kickReturnYardage']].plot.hist(figsize=(14,6), title="Kickoff Return Yards Frequency")

In [None]:
ax = kickoff_returns_df[['possessionTeam', 'kickReturnYardage']].boxplot(by='possessionTeam', figsize=(14,10), vert=False)
ax.set_xlabel(None)
ax.set_title(None)

From both the histogram and the boxplot, we can see that most kickoff returns were concentrated around 20 yards, but there were several kickoffs that were returned for more than 100 yards. Those must have been for touchdowns, so let's look at that subset to see if there's anything in the description that indicates a TD. We can use that to get all kickoffs that were returned for a touchdown.

In [None]:
long_returns = kickoff_returns_df.loc[kickoff_returns_df['kickReturnYardage'] >= 100.0]
long_returns

So, only 9 kickoffs in the dataset were returned for more 100 yards. If we look at the `playDescription` field we can see that the play result of `TOUCHDOWN` is recorded here. We can use this to find all kickoffs that were returned for a touchdown, regardless of return length.

In [None]:
td_returns = kickoff_returns_df.loc[kickoff_returns_df['playDescription'].str.contains('TOUCHDOWN')]
td_returns.head()

So we can see that only 21 out of 2,921 kickoff returns resulted in a touchdown. That's only about 0.7%.

### Which teams gave up the most kickoff returns for a touchdown?

In [None]:
ax = td_returns.groupby('possessionTeam').size().plot.barh(figsize=(14,6), title="Kickoff returns for a TD by kicking team")
ax.set_ylabel('Kicking Team')

### Visualize a Kickoff Return

Credit to [Joseph Mohr](https://www.kaggle.com/josephvm) for the following animation. I took the `getPlayAnimation` from his [NFL BDB 2022 - Play Animation & Tracking EDA](https://www.kaggle.com/josephvm/nfl-bdb-2022-play-animation-tracking-eda) notebook and modified it to look slightly more like a football field.

In [None]:
# Load one of the tracking data files so we have the details for the animation
tracking_df = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2022/tracking2018.csv')

In [None]:
def getPlayAnimation(df, playId=2956, gameId=2018090903, color_dict={'home': 'red', 'away': 'blue', 'football': 'brown'}):
    # function to draw one frame
    def plotFrame(i):
        one_frame = one_play[ one_play['frameId'] == i+1 ]
        ax.cla()

        plt.xticks(range(0, 121, 10), fontsize=15)
        ax.set_xticklabels([None, 'G', '10', '20', '30', '40', '50', '40', '30', '20', '10', 'G', None])
        ax.tick_params(axis='x', colors='white')
        
        # Constants from frame to frame
        ax.set_xlim([0, 120])
        ax.set_ylim([0, 53.3])
        for yardline in range(10, 111, 10):
            ax.plot([yardline, yardline], [0, 53.3], c='white')
        
        # What changes
        ax.scatter(one_frame['x'], one_frame['y'], c=one_frame['team'].transform(lambda x: color_dict[x]), s=64)
        ax.set_title('Frame: {}'.format(i+1), fontsize=15, c='white')
    
    # Get df of tracking data for one play
    one_play = df[(df['playId'] == playId) & (df['gameId'] == gameId)]
    
    fig, ax = plt.subplots(figsize=(12,6))
    
    # Don't show y-axis ticks or tick labels
    ax.axes.get_yaxis().set_visible(False)
    
    # Set background colors
    fig.patch.set_facecolor('xkcd:leaf green')
    ax.set_facecolor('xkcd:leaf green')
    
    # Set the field border color and line width
    for spine in ax.spines.values():
        spine.set_edgecolor('white')
        spine.set_linewidth(2.5)
    
    play_anim = anim.FuncAnimation(fig, plotFrame, frames=one_play['frameId'].max()-1, interval=100)
    html = play_anim.to_html5_video()
    plt.close(fig)
    return HTML(html)

Let's look at the first kickoff return for a touchdown in our data set, which has TEN kicking to MIA.

In [None]:
play_desc = td_returns.loc[134]['playDescription']
play_desc

In [None]:
team_colors = {'home': 'xkcd:light turquoise',
               'away': 'xkcd:cobalt',
               'football': 'xkcd:mud'}
getPlayAnimation(tracking_df, playId=2956, gameId=2018090903, color_dict=team_colors)

### Kick type, direction, hang-time, and return formation

Let's look at more attributes of the play by merging in the scouting data with the data on kickoffs that were returned.

In [None]:
scouting_df = pd.read_csv('/kaggle/input/nfl-big-data-bowl-2022/PFFScoutingData.csv')
scouting_df.head()

In [None]:
# Keep the columns that are related to kickoffs
kickoff_cols = ['gameId', 'playId', 'hangTime', 'kickType', 'kickDirectionIntended', 'kickDirectionActual', 'missedTackler',
                'returnDirectionIntended', 'returnDirectionActual', 'kickoffReturnFormation', 'specialTeamsSafeties']
kickoff_scouting_df = scouting_df[kickoff_cols]
kickoff_scouting_df.head()

In [None]:
# Merge the scouting data with the kickoffs that were returned.
kickoff_returns_df = kickoff_returns_df.merge(kickoff_scouting_df, how='left', on=['gameId', 'playId'])
kickoff_returns_df.head()

### What are the most common kick types and kickoff return formations?

In [None]:
kickoff_returns_df['kickType'].value_counts()

In [None]:
ax = kickoff_returns_df[['kickType', 'kickReturnYardage']].boxplot(by='kickType', figsize=(14,6), vert=False)
ax.set_xlabel(None)
ax.set_title(None)

Since the deep kick is the most common, it's also the kick type with the most returns for long yardage.

The kickoff formation is a 3-digit code indicating the number of players in the Front Wall, Mid Wall, and Back Wall during the kick. Let's look at which formations are most popular, then at the distribution of return yards for each formation.

In [None]:
kickoff_returns_df['kickoffReturnFormation'].value_counts()

In [None]:
ax = kickoff_returns_df[['kickoffReturnFormation', 'kickReturnYardage']].boxplot(by='kickoffReturnFormation', figsize=(14,8), vert=False)
ax.set_xlabel(None)
ax.set_title(None)

Not surprisingly, most of the long returns were when the return team was in the most popular formation.

### Is hang time related to return yardage?

Let's look at the distribution of hang times of kicks and how it relates to return yards.

In [None]:
kickoff_returns_df['hangTime'].plot.kde(figsize=(14, 3), title="Distribution of Kickoff Hang Times (s)")

In [None]:
kickoff_returns_df.plot.scatter(x='hangTime', y='kickReturnYardage', figsize=(14,6), title="Kickoff Return Yards vs. Kick Hang Time (s)")

Again, there doesn't appear to be much relationship here. Since the vast majority of kickoffs have a hang time between 3.5 and 4.5 seconds, it makes sense that all of long returns would be on this type of kick.

### Is kick direction related to return yards?

In [None]:
kickoff_returns_df['kickDirectionIntended'].value_counts().plot.barh(figsize=(9,3), title="Intended Kick Direction")

In [None]:
kickoff_returns_df['kickDirectionActual'].value_counts().plot.barh(figsize=(9,3), title="Actual Kick Direction")

It makes sense that most kicks would be straight up the middle, since it's harder to kick the ball out of bounds that way, and easier to kick it through the end zone, which is what most teams are trying to do most of the time. Let's look at how often the intended kick direction doesn't match the actual direction.

In [None]:
(kickoff_returns_df['kickDirectionIntended'] == kickoff_returns_df['kickDirectionActual']).sum() / len(kickoff_returns_df)

98.9% accuracy at anything is pretty impressive, but let's take a closer look at just those plays where the kick direction doesn't match the intended direction.

In [None]:
missed_direction = kickoff_returns_df[kickoff_returns_df['kickDirectionIntended'] != kickoff_returns_df['kickDirectionActual']]
missed_direction['kickReturnYardage'].plot.kde(figsize=(14,3), title="Distribution of Return Yards on Missed Directional Kicks")

That looks like the distribution for most of the other types of kicks. It looks like these small mistakes in kick direction aren't that costly in terms of return yards.

### How do missed tackles impact returns?

The last thing I want to look at is plays where there are missed tackles. This should correlate to longer returns more than any other feature of this data set.

In [None]:
missed_tackles_df = kickoff_returns_df[kickoff_returns_df['missedTackler'].notnull()]
missed_tackles_df['kickReturnYardage'].plot.kde(figsize=(14,3), title="Distribution of Return Yards on Missed Tackles")

It looks like the average return is longer (25 yards) when tackles are missed, plus there are small bumps in the distribution at 50 and 100 yards. This is as expected. It looks like most of the really long returns we saw earlier are in this subset.

In [None]:
missed_tackles_df['missedTackles'] = missed_tackles_df['missedTackler'].str.split(';').str.len()
missed_tackles_df['missedTackles'].value_counts().plot.barh(figsize=(9,4), title="Returns with Missed Tackles")

In [None]:
ax = missed_tackles_df[['missedTackles', 'kickReturnYardage']].boxplot(by='missedTackles', figsize=(14,8), vert=False)
ax.set_xlabel(None)
ax.set_title(None)

### Summary

Most kickoffs are basically the same. Teams try to kick the ball out of the back of the endzone to avoid a return. When they fail to do this, the majority of returns are around 20 yards. Onside kicks and returns for touchdowns are extremely rare. The best things kicking teams can do to avoid long returns are:

1. Kick the ball through the end zone
2. Don't miss tackles on returns