In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
from typing import Union, Sequence
from numbers import Number
from datetime import datetime, timedelta

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import cm
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
from plotly.subplots import make_subplots

## Load the data

In [None]:
players = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2022/players.csv")
plays = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2022/plays.csv")
games = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2022/games.csv")
scouting = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2022/PFFScoutingData.csv")
tracking2018 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2022/tracking2018.csv")
#tracking2019 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2022/tracking2019.csv")
#tracking2020 = pd.read_csv("/kaggle/input/nfl-big-data-bowl-2022/tracking2020.csv")

### Format Datetime

In [None]:
games['gameDateTime'] = pd.to_datetime(games['gameDate'] + "-" + games['gameTimeEastern'], format="%m/%d/%Y-%H:%M:%S")
players['birthDate'] = pd.to_datetime(players['birthDate'])
plays['gameClock']  = pd.to_datetime(plays['gameClock']).apply(lambda i: timedelta(hours=i.hour, minutes=i.minute, seconds=i.second))
tracking2018['time'] = pd.to_datetime(tracking2018['time'])

### games data
players.csv consist of the following fields:
* **gameID**: `int`
* **season**: `int`
* **week**: `int`
* **gameDate**: `str`
* **gameTimeEastern**: `str`
* **homeTeamAbbr**: `str`
* **visitorTeamAbbr**: `str`

In [None]:
print(len(games))
games.head(10)

### [WIP] calculate game score

### Players data
players.csv consist of the following fields:
* **nflID**: `int`
* **Height**: `str`. Some are X(feet)-Y(inch), some are XX(inch) format. I will convert them to cm.
* **Weight**: `int`
* **birthdate**: `str`. This will be converted to `datetime` type.
* **collegeName**: `str`
* **Position**: `str`
* **displayName**: `str`

In [None]:
print(len(players))
players.head(10)

#### Format height & birthDate

In [None]:
def format_height(height: Union[str, Number]):
    """ Convert height to cm """
    # if height is number, do nothing (assuming it's already converted)
    if isinstance(height, Number):
        return height
    # in 'X-X' format
    elif '-' in height:
        feet, inch = map(int, height.split('-'))
        return (feet * 12 + inch) * 2.54
    # in 'XX' format
    elif isinstance(height, str):
        return int(height) * 2.54
    else:
        ValueError("invalid heigth format")

In [None]:
players['height'] = players['height'].apply(format_height)
players['age'] = (datetime.today() - players['birthDate']).apply(lambda i: i.days)  # age in terms of days

In [None]:
ff.create_scatterplotmatrix(players[["Position", "height", "weight", "age"]],
                            height=800, width=1000,
                            index='Position',
                            diag='histogram')

### Tracking
* **time**: `str`. Time stamp of play (time, yyyy-mm-dd, hh:mm:ss)
* **x**: `float`. Player position along the long axis of the field, 0 - 120 yards.
* **y**: `float`. Player position along the short axis of the field, 0 - 53.3 yards.
* **s**: `float`. Speed in yards/second (numeric)
* **a**: `float`. Speed in yards/second^2 (numeric)
* **dis**: `float`. Distance traveled from prior time point, in yards (numeric)
* **o**: `float`. Player orientation (deg), 0 - 360 degrees (numeric)
* **dir**: `float`. Angle of player motion (deg), 0 - 360 degrees (numeric)
* **event**: `str`. Tagged play details, including moment of ball snap, pass release, pass catch, tackle, etc (text)
* **nflId**: `int`. Player identification number, unique across players (numeric)
* **displayName**: `str`. Player name (text)
* **jerseyNumber**: `float`. Jersey number of player (numeric)
* **position**: `str`. Player position group (text)
* **team**: `str`. Team (away or home) of corresponding player (text)
* **frameId**: `int`. Frame identifier for each play, starting at 1 (numeric)
* **gameId**: `int`. Game identifier, unique (numeric)
* **playId**: `int`. Play identifier, not unique across games (numeric)
* **playDirection**: `str`. Direction that the offense is moving (left or right)

In [None]:
tracking2018.head(5)

In [None]:
gameid = gameid = 2018123000
playid = 36
fig = plt.figure(figsize=(15, 10))
for team, cmap in zip(('home', 'away', 'football'), ('Reds', 'Blues', 'Greens')):
    tmp = tracking2018.query(f"(gameId=={gameid}) & (playId == {playid}) & (team == '{team}')")
    cmap = plt.cm.get_cmap(cmap)
    x = tmp['x'].values
    y = tmp['y'].values
    v_x = tmp['s'].values * np.sin(np.deg2rad(tmp['o'].values))
    v_y = tmp['s'].values * np.cos(np.deg2rad(tmp['o'].values))
    if team != 'football':
        plt.quiver(x, y, v_x, v_y, angles="xy", zorder=5, alpha=0.7,
                   color=[cmap(i/tmp['frameId'].max()) for i in tmp['frameId']])
    else:
        plt.scatter(x, y, color=[cmap(i/tmp['frameId'].max()) for i in tmp['frameId']])
fig.show()

### Plays
* **gameId**: `str`. Game identifier, unique (numeric)
* **playId**: `int`.  Play identifier, not unique across games (numeric)
* **playDescription**: `str`. Description of play (text)
* **quarter**: `int`. Game quarter (numeric)
* **down**: `int`. Down (numeric)
* **yardsToGo**: `int`. Distance needed for a first down (numeric)
* **possessionTeam**: `str`. Team punting, placekicking or kicking off the ball (text)
* **specialTeamsPlayType**: `str`. Formation of play: Extra Point, Field Goal, Kickoff or Punt (text)
* **specialTeamsPlayResult**: `str`. Special Teams outcome of play dependent on play type: Blocked Kick Attempt, Blocked Punt, Downed, Fair Catch, Kick Attempt Good, Kick Attempt No Good, Kickoff Team Recovery, Muffed, Non-Special Teams Result, Out of Bounds, Return or Touchback (text)
* **kickerId**: `float`. nflId of placekicker, punter or kickoff specialist on play (numeric)
* **returnerId**: `None` or `int`. nflId(s) of returner(s) on play if there was a special teams return. Multiple returners on a play are separated by a ; (text)
* **kickBlockerId**: `float`. nflId of blocker of kick on play if there was a blocked field goal or blocked punt (numeric)
* **yardlineSide**: `str`. 3-letter team code corresponding to line-of-scrimmage (text)
* **yardlineNumber**: `int`. Yard line at line-of-scrimmage (numeric)
* **gameClock**: `str`. Time on clock of play (MM:SS)
* **penaltyCodes**: `str` or `None`. NFL categorization of the penalties that occurred on the play. A standard penalty code followed by a d means the penalty was on the defense. Multiple penalties on a play are separated by a ; (text)
* **penaltyJerseyNumber**: `str` or `None`. Jersey number and team code of the player committing each penalty. Multiple penalties on a play are separated by a ; (text)
* **penaltyYards**: `float`. yards gained by possessionTeam by penalty (numeric)
* **preSnapHomeScore**: `int`. Home score prior to the play (numeric)
* **preSnapVisitorScore**: `int`. Visiting team score prior to the play (numeric)
* **passResult**: `str` or `None`. Scrimmage outcome of the play if specialTeamsPlayResult is "Non-Special Teams Result" (C: Complete pass, I: Incomplete pass, S: Quarterback sack, IN: Intercepted pass, R: Scramble, ' ': Designed Rush, text)
* **kickLength**: `float`. Kick length in air of kickoff, field goal or punt (numeric)
* **kickReturnYardage**: `float`. Yards gained by return team if there was a return on a kickoff or punt (numeric)
* **playResult**: `int`. Net yards gained by the kicking team, including penalty yardage (numeric)
* **absoluteYardlineNumber**: `int`. Location of ball downfield in tracking data coordinates (numeric)

In [None]:
play = plays.query(f"gameId == {gameid}")
play.head(5)

#### Merge tracking data with plays & games according to playId

In [None]:
tracking2018 = tracking2018.merge(plays[['gameId', 'playId', 'quarter', 'possessionTeam']], on=['gameId', 'playId'])

In [None]:
game = tracking2018.query(f"gameId == {gameid}")
game.head(5)

#### Visualize ball heatmap using kernel density estimation
Note that play side (direction) changes every quarter

In [None]:
ball = game.query("displayName == 'football'")
ball['possession'] = np.where(ball['possessionTeam'] == games.query(f"gameId == {gameid}")['homeTeamAbbr'].iloc[0], 'home', 'away')
ball['x'] = ball['x'].where((ball['quarter']%2!=0) & (ball['possession']=='home'), 120 - ball['x'])
ball['x'] = ball['x'].where((ball['quarter']%2==0) & (ball['possession']=='away'), 120 - ball['x'])
fig, ax = plt.subplots(1,2, figsize=(15, 10))
sns.kdeplot(ax=ax[0], data=ball.query("possession == 'home'"), x='y', y='x', levels=100, fill=True, cmap="rocket_r")
sns.kdeplot(ax=ax[1], data=ball.query("possession == 'away'"), x='y', y='x', levels=100, fill=True, cmap="rocket_r")
for i in range(2):
    ax[i].set_xlim([-20, 80])
    ax[i].set_ylim([-20, 140])
    for h in range(0, 140, 20):
        ax[i].hlines(y=h, xmin=-20, xmax=80, linewidth=1, linestyles='--', color='gray')
plt.legend()
plt.show()

## Posession in each quarter

In [None]:
fig = make_subplots(rows=1, cols=5, specs=[[{'type':'domain'} for _ in range(5)]],
                    subplot_titles=['Total Possession',]+[f"Possession in quarter:{q}" for q in range(1,5)])
_teams = pd.unique(ball['possessionTeam'])
fig.add_trace(go.Pie(labels=_teams, values=[len(ball.query(f"possessionTeam == '{t}'")) for t in _teams], sort=False), 1, 1)
for quarter in range(1, 5):
    fig.add_trace(go.Pie(labels=_teams, 
                         values=[len(ball.query(f"possessionTeam == '{t}' & quarter == {quarter}")) for t in _teams],
                         sort=False),
                  1, quarter+1)
fig.update_traces(hole=.4, hoverinfo="label+percent")
fig.show()

### Scouting [WIP]

In [None]:
print(len(scouting))
scouting.head(5)