In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objs as go

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

1. Game Data
Game data: The games.csv contains the teams playing in each game. The key variable is gameId.

gameId: Game identifier, unique (numeric)

gameDate: Game Date (time, mm/dd/yyyy)

gameTimeEastern: Start time of game (time, HH:MM:SS, EST)

homeTeamAbbr: Home team three-letter code (text)

visitorTeamAbbr: Visiting team three-letter code (text)

week: Week of game (numeric)

In [None]:
games = pd.read_csv('../input/nfl-big-data-bowl-2022/games.csv')
games

Function for making feature summary

In [None]:
def resumetable(df):
    print(f'Shape : {df.shape}')
    summary = pd.DataFrame(df.dtypes, columns=['Data Type'])
    summary = summary.reset_index()
    summary = summary.rename(columns={'index': 'Feature'})
    summary['Num of null'] = df.isnull().sum().values
    summary['Num of unique'] = df.nunique().values
    summary['First value'] = df.loc[0].values
    summary['Second value'] = df.loc[1].values
    summary['Third value'] = df.loc[2].values
    return summary

In [None]:
check = games['gameDate'].value_counts().reset_index()

check.columns = [
    'date', 
    'games'
]

check = check.sort_values('games')

fig = px.bar(
    check, 
    y='date', 
    x="games", 
    orientation='h', 
    title='Number of games for every date', 
    height=900, 
    width=800
)

fig.show()


In [None]:
check = games['gameTimeEastern'].value_counts().reset_index()

check.columns = [
    'time', 
    'games'
]

check = check.sort_values('games')

fig = px.bar(
    check, 
    y='time', 
    x="games", 
    orientation='h', 
    title='Number of games for every time', 
    height=400, 
    width=800
)

fig.show()


In [None]:
check = games['homeTeamAbbr'].value_counts().reset_index()

check.columns = [
    'team', 
    'games'
]

check = check.sort_values('games')

fig = px.bar(
    check, 
    y='team', 
    x="games", 
    orientation='h', 
    title='Number of games for every team (home)', 
    height=700, 
    width=800
)

fig.show()

In [None]:
check = games['visitorTeamAbbr'].value_counts().reset_index()

check.columns = [
    'team', 
    'games'
]

check = check.sort_values('games')

fig = px.bar(
    check, 
    y='team', 
    x="games", 
    orientation='h', 
    title='Number of games for every team (away)', 
    height=700, 
    width=800
)

fig.show()

In [None]:
check = games['week'].value_counts().reset_index()

check.columns = [
    'week', 
    'games'
]

check = check.sort_values('games')
check['week'] = check['week'].astype(str) + '-'

fig = px.bar(
    check, 
    y='week', 
    x="games", 
    orientation='h', 
    title='Number of games for every week', 
    height=500, 
    width=800
)

fig.show()


2. Player Data
Player data: The players.csv file contains player-level information from players that participated in any of the tracking data files. The key variable is nflId

nflId: Player identification number, unique across players (numeric)

height: Player height (text)

weight: Player weight (numeric)

birthDate: Date of birth (YYYY-MM-DD)

collegeName: Player college (text)

position: Player position (text)

displayName: Player name (text)

In [None]:
players = pd.read_csv('../input/nfl-big-data-bowl-2022/players.csv')
players

In [None]:
resumetable(players)

Convert all heights to feet.

In [None]:
check = players['height'].str.split('-',expand=True)

check.columns = [
    'first', 
    'second'
]

check.loc[(check['second'].notnull()), 'first'] = check[check['second'].notnull()]['first'].astype(np.int16) * 12 + check[check['second'].notnull()]['second'].astype(np.int16)

In [None]:
players['height'] = check['first']
players['height'] = players['height'].astype(np.float32)
players['height'] /= 12

players

In [None]:
fig = make_subplots(rows=1, cols=2)
traces = [
    go.Histogram(
        x=players[col[0]], 
        nbinsx=col[1], 
        name=col[0]
    ) for col in [('height', 20), ('weight', 50)]
]

for i in range(len(traces)):
    fig.append_trace(
        traces[i], 
        (i // 2) + 1, 
        (i % 2) + 1
    )

fig.update_layout(
    title_text='Height & weight distributions',
    height=400,
    width=800
)

fig.show()

In [None]:
check = players['collegeName'].value_counts().reset_index()

check.columns = [
    'college', 
    'players'
]

check = check.sort_values('players').tail(50)

fig = px.bar(
    check, 
    y='college', 
    x="players", 
    orientation='h', 
    title='Top 50 colleges by number of players',
    height=900,
    width=800
)

fig.show()

Players positions abbreviation

WR: Wide Receiver;
    
CB: Cornerback;
    
RB: Running Back;
    
TE: Tight End;
    
OLB: Outside Linebacker;
    
QB: Quarterback;
    
FS: Free Safety;
    
LB: Linebacker;
    
SS: Strong Safety;
    
ILB: Inside Linebacker;
    
DE: Defensive End;
    
DB: Defensive Back;
    
MLB: Middle Linebacker
    
DT: Defensive Tackle;
    
FB: Fullback;
    
P: Punter;
    
LS: Long snapper;
    
S: Safety;
    
K: Kicker;
    
HB: Running back;
    
NT: Nose Tackle

3. Play Data

Play data: The plays.csv file contains play-level information from each game. The key variables are gameId and playId

gameId: Game identifier, unique (numeric)

playId: Play identifier, not unique across games (numeric)

playDescription: Description of play (text)

quarter: Game quarter (numeric)

down: Down (numeric)

yardsToGo: Distance needed for a first down (numeric)

possessionTeam: Team on offense (text)

playType: Outcome of dropback: sack or pass (text)

yardlineSide: 3-letter team code corresponding to line-of-scrimmage (text)

yardlineNumber: Yard line at line-of-scrimmage (numeric)

offenseFormation: Formation used by possession team (text)

personnelO: Personnel used by offensive team (text)

defendersInTheBox: Number of defenders in close proximity to line-of-scrimmage (numeric)

numberOfPassRushers: Number of pass rushers (numeric)

personnelD: Personnel used by defensive team (text)

typeDropback: Dropback categorization of quarterback (text)

preSnapHomeScore: Home score prior to the play (numeric)

preSnapVisitorScore: Visiting team score prior to the play (numeric)

gameClock: Time on clock of play (MM:SS)

absoluteYardlineNumber: Distance from end zone for possession team (numeric)

penaltyCodes: NFL categorization of the penalties that ocurred on the play. For purposes of this contest, the most important penalties are Defensive Pass Interference (DPI), Offensive Pass Interference (OPI), Illegal Contact (ICT), and Defensive Holding (DH). Multiple penalties on a play are separated by a ; (text)

penaltyJerseyNumber: Jersey number and team code of the player commiting each penalty. Multiple penalties on a play are separated by a ; (text)

passResult: Outcome of the passing play (C: Complete pass, I: Incomplete pass, S: Quarterback sack, IN: Intercepted pass, text)

offensePlayResult: Yards gained by the offense, excluding penalty yardage (numeric)

playResult: Net yards gained by the offense, including penalty yardage (numeric)

epa: Expected points added on the play, relative to the offensive team. Expected points is a metric that estimates the average of every next scoring outcome given the play's down, distance, yardline, and time remaining (numeric)

isDefensivePI: An indicator variable for whether or not a DPI penalty ocurred on a given play (TRUE/FALSE)

In [None]:
plays = pd.read_csv('../input/nfl-big-data-bowl-2022/plays.csv')

plays

In [None]:
resumetable(plays)

In [None]:
check = plays['quarter'].value_counts().reset_index()

check.columns = [
    'quarter', 
    'plays'
]

check = check.sort_values('plays')

fig = px.pie(
    check, 
    names='quarter', 
    values="plays",  
    title='Number of plays of every quarter',
    height=500,
    width=800
)

fig.show()

In [None]:
check = plays['down'].value_counts().reset_index()

check.columns = [
    'down', 
    'plays'
]

check = check.sort_values('plays')

fig = px.pie(
    check, 
    names='down', 
    values="plays",  
    title='Number of plays of every down',
    height=500,
    width=800
)

fig.show()


In [None]:
check = plays['yardsToGo'].value_counts().reset_index()

check.columns = [
    'yardsToGo', 
    'plays'
]

check['yardsToGo'] = check['yardsToGo'].astype(str) + '-'
check = check.sort_values('plays')

fig = px.bar(
    check, 
    y='yardsToGo', 
    x="plays", 
    orientation='h', 
    title='Number of plays for every yards to go category',
    height=800,
    width=800
)

fig.show()

In [None]:
check = plays['possessionTeam'].value_counts().reset_index()

check.columns = [
    'team', 
    'plays'
]

check = check.sort_values('plays')

fig = px.bar(
    check, 
    y='team', 
    x="plays", 
    orientation='h', 
    title='Number of plays for every team',
    height=800,
    width=800
)

fig.show()


In [None]:
check = plays['yardlineSide'].value_counts().reset_index()

check.columns = [
    'yardlineSide', 
    'plays'
]

check = check.sort_values('plays')

fig = px.bar(
    check, 
    y='yardlineSide', 
    x="plays", 
    orientation='h', 
    title='Number of plays for every team yard side',
    height=800,
    width=800
)

fig.show()


In [None]:
check = plays['yardlineNumber'].value_counts().reset_index()

check.columns = [
    'yardline', 
    'plays'
]

check = check.sort_values('plays')

fig = px.bar(
    check, 
    x='yardline', 
    y="plays",  
    title='Number of plays for every yardline',
    height=500,
    width=800
)

fig.show()

In [None]:
fig = px.histogram(
    plays, 
    x='absoluteYardlineNumber',
    width=800,
    height=500,
    nbins=50,
    title='Absolute Yardline Number distribution'
)

fig.show()

In [None]:
check = plays['passResult'].value_counts().reset_index()

check.columns = [
    'passResult', 
    'plays'
]

check = check.sort_values('plays')

fig = px.pie(
    check, 
    names='passResult', 
    values="plays",  
    title='Number of plays for every pass result',
    height=500,
    width=800
)

fig.show()

In [None]:
fig = px.histogram(
    plays, 
    x="playResult",
    width=800,
    height=500,
    nbins=50,
    title='Play result distribution'
)

fig.show()

In [None]:
fig = px.histogram(
    plays, 
    x="preSnapVisitorScore",
    width=800,
    height=500,
    nbins=50,
    title='Pre Snap Visitor Score distribution'
)

fig.show()

In [None]:
fig = px.histogram(
    plays, 
    x="preSnapHomeScore",
    width=800,
    height=500,
    nbins=50,
    title='Pre Snap Home Score distribution'
)

fig.show()

4. Tracking Data

In [None]:
tracking2018 = pd.read_csv('../input/nfl-big-data-bowl-2022/tracking2018.csv')
tracking2018.head()

2018123000 and playId == 36

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
tracking2018.query('gameId == 2018123000 and playId == 36').groupby('team') \
    .plot(x='x', y='y', ax=ax, style='.')
plt.legend().remove();

gameId == 2018091001 and playId == 4033

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
tracking2018.query('gameId == 2018091001 and playId == 4033').groupby('team') \
    .plot(x='x', y='y', ax=ax, style='.')
plt.legend().remove();

gameId == 2018091609 and position == "CB"

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
tracking2018.query('gameId == 2018091609 and position == "CB"').groupby('team') \
    .plot(x='x', y='y', ax=ax, style='.')
plt.legend().remove();

gameId == 2018091609 and position == "LB"

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
tracking2018.query('gameId == 2018091609 and position == "LB"').groupby('team') \
    .plot(x='x', y='y', ax=ax, style='.')
plt.legend().remove();

gameId == 2018091609 and position == "RB"

In [None]:
fig, ax = plt.subplots(figsize=(12, 8))
tracking2018.query('gameId == 2018091609 and position == "RB"').groupby('team') \
    .plot(x='x', y='y', ax=ax, style='.')
plt.legend().remove();

5. Football field visualization

In [None]:
import matplotlib.patches as patches
from matplotlib.patches import Arc
from matplotlib import pyplot as plt
import matplotlib.patches as mpatches

# Change size of the figure
plt.rcParams['figure.figsize'] = [12, 8]
def drawPitch(width, height, color="w"):
    fig = plt.figure()
    ax = plt.axes(xlim=(-10, width + 30), ylim=(-15, height + 5))
    plt.axis('off')

    # Grass around pitch
    rect = patches.Rectangle((-10, -5), width + 40, height + 10, linewidth=1, facecolor='#3f995b', capstyle='round')
    ax.add_patch(rect)
    ###################

    # Pitch boundaries
    rect = plt.Rectangle((0, 0), width + 20, height, ec=color, fc="None", lw=2)
    ax.add_patch(rect)
    ###################

    # vertical lines - every 5 yards
    for i in range(21):
        plt.plot([10 + 5 * i, 10 + 5 * i], [0, height], c="w", lw=2)
    ###################
        
    # distance markers - every 10 yards
    for yards in range(10, width, 10):
        yards_text = yards if yards <= width / 2 else width - yards
        # top markers
        plt.text(10 + yards - 2, height - 7.5, yards_text, size=15, c="w", weight="bold")
        # botoom markers
        plt.text(10 + yards - 2, 7.5, yards_text, size=15, c="w", weight="bold", rotation=180)
    ###################

    # yards markers - every yard
    # bottom markers
    for x in range(20):
        for j in range(1, 5):
            plt.plot([10 + x * 5 + j, 10 + x * 5 + j], [1, 3], color="w", lw=2)

    # top markers
    for x in range(20):
        for j in range(1, 5):
            plt.plot([10 + x * 5 + j, 10 + x * 5 + j], [height - 1, height - 3], color="w", lw=2)

    # middle bottom markers
    y = (height - 18.5) / 2
    for x in range(20):
        for j in range(1, 5):
            plt.plot([10 + x * 5 + j, 10 + x * 5 + j], [y, y + 2], color="w", lw=2)

    # middle top markers
    for x in range(20):
        for j in range(1, 5):
            plt.plot([10 + x * 5 + j, 10 + x * 5 + j], [height - y, height - y - 2], color="w", lw=2)
    ###################

    # draw home end zone
    plt.text(2.5, (height - 15) / 2, "HOME", size=30, c="w", weight="bold", rotation=90)
    rect = plt.Rectangle((0, 0), 10, height, ec=color, fc="#0064dc", lw=2)
    ax.add_patch(rect)

    # draw away end zone    
    plt.text(111, (height - 15) / 2, "AWAY", size=30, c="w", weight="bold", rotation=-90)
    rect = plt.Rectangle((width + 10, 0), 10, height, ec=color, fc="#c80014", lw=2)
    ax.add_patch(rect)
    ###################
    
    # draw extra spot point
    # left
    y = (height - 3) / 2
    plt.plot([10 + 2, 10 + 2], [y, y + 3], c="w", lw=2)
    
    # right
    plt.plot([width + 10 - 2, width + 10 - 2], [y, y + 3], c="w", lw=2)
    ###################
    
    # draw goalpost
    goal_width = 6 # yards
    y = (height - goal_width) / 2
    # left
    plt.plot([0, 0], [y, y + goal_width], "-", c="y", lw=10, ms=20)
    # right
    plt.plot([width + 20, width + 20], [y, y + goal_width], "-", c="y", lw=10, ms=20)
    
    return fig, ax

In [None]:
fig, ax = drawPitch(100, 53.3)


Function to create animation

In [None]:
games_ids = {}
games_tracking2018 = tracking2018.groupby(by=["gameId"])
for game, data in games_tracking2018:
    games_ids[game] = list(set(data.playId.tolist()))

In [None]:
def extract_one_game(game_id, play_id, df):
    game = df[(df.gameId == game_id) & (df.playId == play_id)]
    home = {}
    away = {}
    balls = []
    
    players = game.sort_values(['frameId'], ascending=True).groupby('nflId')
    for id, dx in players:
        jerseyNumber = int(dx.jerseyNumber.iloc[0])
        if dx.team.iloc[0] == "home":
            home[jerseyNumber] = list(zip(dx.x.tolist(), dx.y.tolist()))
        elif dx.team.iloc[0] == "away":
            away[jerseyNumber] = list(zip(dx.x.tolist(), dx.y.tolist()))


    ball_df = game.sort_values(['frameId'], ascending=True) 
    ball_df = ball_df[ball_df.team == "football"]
    balls = list(zip(ball_df.x.tolist(), ball_df.y.tolist()))
    return home, away, balls

In [None]:
from matplotlib import animation
from IPython.display import HTML
def animate_one_play(game_id, play_id, df):
    fig, ax = drawPitch(100, 53.3)
    
    home, away, balls = extract_one_game(game_id, play_id, df)

    team_left, = ax.plot([], [], 'o', markersize=20, markerfacecolor="r", markeredgewidth=2, markeredgecolor="white", zorder=7)
    team_right, = ax.plot([], [], 'o', markersize=20, markerfacecolor="b", markeredgewidth=2, markeredgecolor="white", zorder=7)
    ball, = ax.plot([], [], 'o', markersize=10, markerfacecolor="black", markeredgewidth=2, markeredgecolor="white", zorder=7)
    drawings = [team_left, team_right, ball]

    def init():
        team_left.set_data([], [])
        team_right.set_data([], [])
        ball.set_data([], [])
        return drawings

    def draw_teams(i):
        X = []
        Y = []
        for k, v in home.items():
            x, y = v[i]
            X.append(x)
            Y.append(y)
        team_left.set_data(X, Y)
        
        X = []
        Y = []
        for k, v in away.items():
            x, y = v[i]
            X.append(x)
            Y.append(y)
        team_right.set_data(X, Y)

    def animate(i):
        draw_teams(i)
        
        x, y = balls[i]
        ball.set_data([x, y])
        return drawings
    
    # !May take a while!
    anim = animation.FuncAnimation(fig, animate, init_func=init,
                                   frames=len(balls), interval=100, blit=True)

    return HTML(anim.to_html5_video())

In [None]:
animate_one_play(2018123000, 36, tracking2018)