## <center>Imports</center>

In [None]:
# Linear Algebra, Data Manipulation
import numpy as np
import pandas as pd

# Plots
import seaborn as sns
pal_hls = sns.hls_palette(100, l=.3, s=.8).as_hex() # Seaborn Color Palette for colors
import matplotlib as mpl
import matplotlib.pyplot as plt
sns.set_style('darkgrid')

# Interactive Plots
import plotly.express as px
from plotly.offline import init_notebook_mode
import plotly.graph_objects as go
init_notebook_mode()

# Ignore Warnings
import warnings
warnings.filterwarnings("ignore")

# <center>1. Game Data</center>
### <b>Game Data:</b> The <i>games.csv</i> contains the teams playing in each game. The key variable is gameId

In [None]:
games = pd.read_csv("../input/nfl-big-data-bowl-2022/games.csv")
games

### Make Derative Features (month, day, hour)

In [None]:
games["month"] = games["gameDate"].apply(lambda x: int(x.split("/")[0]))
games["day"] = games["gameDate"].apply(lambda x: int(x.split("/")[1]))
games["hour"] = games["gameTimeEastern"].apply(lambda x: int(x.split(":")[0]))

### Data Visualization

In [None]:
def plotlybar(data, column, title):
    df = pd.DataFrame()

    df[column] = data[column].value_counts().index # Gets all the unique seasons
    df["gamesCount"] = data[column].value_counts().values # Gets the gamesCount for those unique seasons
    df["percent"] = round(df.gamesCount / len(data) * 100, 1) # The Percent of games played that season

    # Bar plot using plotly bar plot
    fig = px.bar(df, 
                y='gamesCount',
                x=column,
                text='percent',
                height=600, width=950,
                title=f"<b>{title}</b>", color=pal_hls[:len(df)])
    fig.update_traces(texttemplate='%{text}%', textposition='outside') # The text at the top of the bar plot
    fig.update_layout(
        xaxis = dict(dtick = 1),
        showlegend=False) # Fixing the problem with the x ticks here
    fig.show()

In [None]:
plotlybar(games, "season", "Number of Games Played Every Season")

In [None]:
title_font_dict = {"fontsize": 16, "fontweight": "bold"}

plt.figure(figsize=(10, 5), dpi=100)
sns.countplot(x='season', data=games, hue='month', lw=2)
plt.title('Games in Year/Season', fontdict=title_font_dict)
plt.legend(["January", "September", "October", "November", "December"],
               loc='center right', bbox_to_anchor=(1.2, 0.5), ncol=1)
plt.show()

#### As the Years Pass the Number of Games Held Every Year/Season Increased

In [None]:
plotlybar(games, "month", "Number of Games Played Every Month")

#### The Games were held from Sepetember to January. There are especially many games held in December, and rarely any games held in January

In [None]:
plotlybar(games, "day", "Number of Games Held Every Day")

#### Very few Games were Held on 5, 12, 19, 26 and 31 day of the months.

In [None]:
plotlybar(games, "gameTimeEastern", "Number of Games played on different gameTimeEastern")

#### Most of the Games were Held at 13, 16:25, 20:20, 16:05 and 20:15 gameTime Eastern

In [None]:
plotlybar(games, "hour", "Number of Games played on different gameTimeEastern(ignoring minutes)")

#### This gives us a better representation and tells us that most of the games started at 13, 16 and 20 hours.

In [None]:
plotlybar(games, "week", "Number of Games Held Every week")

# <center>2. Player Data</center>
### <b>Player Data:</b> The <i>players.csv</i> contains player-level information from players that pariticipated in any of the tracking data files. The key variable is nflId

In [None]:
players = pd.read_csv('../input/nfl-big-data-bowl-2022/players.csv')
players

### Convert Height to Centimeters and Weight to Kilograms

In [None]:
players_heights = players["height"] # Get the Height data from DataFrame
players_heights = players_heights.apply(lambda x: x.split("-")) # Split the heights by hyphen ("-")

# Convert Heights to Centimeters and add them to DataFrame
players["height"] = players_heights.apply(lambda x: int(x[0]) * 12 + int(x[1]) if len(x) == 2 else int(x[0])) * 2.54

# Convert Weights to Kilograms and them to DataFrame
players["weight"] = round(players.weight * 0.453592, 2)

players

### Get Birth Year and Birth Month from the birthDate

In [None]:
players["birthYear"] = 0
players["birthMonth"] = 0

In [None]:
#There are NA values in birthDate so that we should drop them
players.dropna(subset=["birthDate"], inplace=True)

In [None]:
for idx, row in players.iterrows():
    if len(row['birthDate'].split('/')) == 3: # 05/17/1994 
        players.loc[idx, 'birthYear'] = row['birthDate'].split('/')[2]
        players.loc[idx, 'birthMonth'] = row['birthDate'].split('/')[0]
        
    elif len(row['birthDate'].split('-')) == 3: # 1995-05-05
        players.loc[idx, 'birthYear'] = row['birthDate'].split('-')[0]
        players.loc[idx, 'birthMonth'] = row['birthDate'].split('-')[1]

In [None]:
players

In [None]:
fig = plt.figure(figsize=(20, 15), dpi=80)

birth_year_dist = players.birthYear.value_counts().sort_index()
ax1 = fig.add_subplot(221)
sns.barplot(x=birth_year_dist.index, y=birth_year_dist.values, ax=ax1)
ax1.tick_params(axis='x', rotation=45)
ax1.set_title("Birth Year Distribution", fontdict=title_font_dict)

birth_month_dist = players.birthMonth.value_counts().sort_index()
ax2 = fig.add_subplot(222)
sns.barplot(x=birth_month_dist.index, y=birth_month_dist.values, ax=ax2)
ax2.set_title("Birth Month Distribution", fontdict=title_font_dict)

ax3 = fig.add_subplot(223)
sns.histplot(players.weight, ax=ax3)
ax3.set_title("Weight(Kg) Distribution", fontdict=title_font_dict)

ax4 = fig.add_subplot(224)
sns.histplot(players.height, ax=ax4, bins=10)
ax4.set_title("Height(cm) Distribution", fontdict=title_font_dict);

In [None]:
fig = plt.figure(figsize=(20, 12), dpi=80)
title_font_dict = {"fontsize": 16, "fontweight": "bold"}

ax1 = fig.add_subplot(211)
sns.boxplot(players.weight, ax=ax1, width=0.5)
ax1.set_title("Weight(Kg) Distribution", fontdict=title_font_dict)

ax2 = fig.add_subplot(212)
sns.boxplot(players.height, ax=ax2, width=0.5)
ax2.set_title("Height(cm) Distribution", fontdict=title_font_dict);

#### Most of the Players are Born between 1991 and 1997
#### Most of the Players weight between 80 to 120 kgs
#### Most of the players have the height between 185cm to 195cm

In [None]:
print(f"Youngest Player Born in: {players.birthYear.max()}")
print(f"Oldest Player Born in: {players.birthYear.min()}")

In [None]:
plt.figure(figsize=(10, 6), dpi=100)
sns.regplot(x=players.weight, y=players.height, line_kws={"color": "orange"})
plt.title("Player Weight(Kg) vs Player Height(cm)");

#### It seems that players with higher weight tend to be longer and vice versa is also possible

In [None]:
# Seeing Which college has given the most number of players

college_count = players.collegeName.value_counts()[:25]

fig = px.bar(x=college_count.index, y=college_count.values,
             labels={"x":"College Name", "y":"Count"}, title="<b>Players College Count</b>")
fig.update_layout(
  width=1300,
  height=500,
)
fig.show()

#### Alabama College has given the most number of Players

In [None]:
position_count = players.Position.value_counts()

fig = px.bar(x=position_count.index, y=position_count.values,
             labels={"x":"Position", "y":"Count"}, title="<b>Players Position Count</b>")
fig.update_layout(
  width=1300,
  height=500,
)
fig.show()

#### Highest Number of Players play at WR Positon and very few Players play at HB Position

# <center>3. Play Data</center>
### <b>Play Data:</b> The <i>plays.csv</i> file contains play-level information from each game. The key variable are gameId and playId

In [None]:
plays = pd.read_csv("../input/nfl-big-data-bowl-2022/plays.csv")
plays

In [None]:
plt.figure(figsize=(15, 12))
sns.heatmap(plays.corr(), annot=True)

In [None]:
plays.info()

In [None]:
plotlybar(plays, "quarter", "Number of Plays every quater")

In [None]:
plotlybar(plays, "down", "Number of Plays every down")

In [None]:
plotlybar(plays, "yardsToGo", "Number of Plays every yardsToGo")

In [None]:
plotlybar(plays, "specialTeamsPlayType", "Number of Playes per specialTeamsPlayType")

In [None]:
plotlybar(plays, "possessionTeam", "Number of Playes per possessionTeam")

In [None]:
plt.figure(figsize=(10, 12), dpi=100)

ax1 = plt.subplot(211)
sns.histplot(plays.kickLength, ax=ax1)
ax1.set_title("Kick Length Distribution", fontdict=title_font_dict)
ax1.set_xticks(np.arange(0, 85, 10));

ax2 = plt.subplot(212)
sns.boxplot(plays.kickLength, ax=ax2, width=0.5)
ax2.set_title("Kick Length Distribution", fontdict=title_font_dict)
ax2.set_xticks(np.arange(0, 85, 10));

In [None]:
columns = ["quarter", "down", "preSnapHomeScore", "preSnapVisitorScore", "kickReturnYardage", "kickLength", "yardsToGo", "kickBlockerId",
           "yardlineNumber", "penaltyYards", "playResult"]

plays_temp = plays[columns]
plays_temp

In [None]:
plt.figure(figsize=(12, 8), dpi=80)
sns.heatmap(plays_temp.corr(), annot=True)
plt.title("Correlation Heat Map", fontdict=title_font_dict);

In [None]:
plt.figure(figsize=(15, 6), dpi=100)

ax1 = plt.subplot(121)
sns.boxplot(
    x = plays_temp.quarter,
    y = plays_temp.preSnapHomeScore,
    ax=ax1
)
ax1.set_title("quarter vs preSnapHomeScore", fontdict=title_font_dict)

ax2 = plt.subplot(122)
sns.boxplot(
    x = plays_temp.quarter,
    y = plays_temp.preSnapVisitorScore,
    ax=ax2
)
ax2.set_title("quarter vs preSnapVisitorScore", fontdict=title_font_dict);

In [None]:
plt.figure(figsize=(15, 6), dpi=100)

ax1 = plt.subplot(121)
sns.boxplot(
    x = plays_temp.down,
    y = plays_temp.kickReturnYardage,
    ax=ax1
)
ax1.set_title("down vs kickReturnYardage", fontdict=title_font_dict)

ax2 = plt.subplot(122)
sns.boxplot(
    x = plays_temp.down,
    y = plays_temp.kickLength,
    ax=ax2
)
ax2.set_title("down vs kickLength", fontdict=title_font_dict);

In [None]:
plt.figure(figsize=(25, 8))

ax1 = plt.subplot(131)
sns.boxplot(
    x = plays_temp.yardsToGo,
    y = plays_temp.kickReturnYardage,
    ax=ax1
)
ax1.set_title("yardsToGo vs kickReturnYardage", fontdict=title_font_dict)

ax2 = plt.subplot(132)
sns.boxplot(
    x = plays_temp.yardsToGo,
    y = plays_temp.kickLength,
    ax=ax2
)
ax2.set_title("yardsToGo vs kickLength", fontdict=title_font_dict);

ax3 = plt.subplot(133)
sns.boxplot(
    x = plays_temp.yardsToGo,
    y = plays_temp.kickBlockerId,
    ax=ax3
)
ax3.set_title("yardsToGo vs kickBlockerId", fontdict=title_font_dict);

In [None]:
plt.figure(figsize=(20, 10))

ax1 = plt.subplot(121)
sns.regplot(
    x = plays_temp.yardlineNumber,
    y = plays_temp.playResult,
    ax=ax1, line_kws={"color": "orange"}, scatter_kws={"color": "#396EB0"},
)
ax1.set_title("yardlineNumber vs playResult", fontdict=title_font_dict)

ax2 = plt.subplot(122)
sns.regplot(
    x = plays_temp.yardlineNumber,
    y = plays_temp.kickLength,
    ax=ax2, line_kws={"color": "#396EB0"}, scatter_kws={"color": "orange"},
    marker="+"
)
ax2.set_title("yardlineNumber vs kickLength", fontdict=title_font_dict);

In [None]:
plt.figure(figsize=(20, 10))

ax1 = plt.subplot(121)
sns.regplot(
    x = plays_temp.kickLength,
    y = plays_temp.kickReturnYardage,
    ax=ax1, line_kws={"color": "orange"}, scatter_kws={"color": "#396EB0"},
)
ax1.set_title("kickLength vs kickReturnYardage", fontdict=title_font_dict)

ax2 = plt.subplot(122)
sns.regplot(
    x = plays_temp.kickLength,
    y = plays_temp.playResult,
    ax=ax2, line_kws={"color": "#396EB0"}, scatter_kws={"color": "orange"},
    marker="+"
)
ax2.set_title("kickLength vs playResult", fontdict=title_font_dict);

# <center>4. Tracking Data</center>

In [None]:
tracking2020 = pd.read_csv('../input/nfl-big-data-bowl-2022/tracking2020.csv')
tracking2020.head()

In [None]:
# Downloading and Saving Field Image for BackGround
import urllib.request
urllib.request.urlretrieve("https://upload.wikimedia.org/wikipedia/commons/thumb/c/c5/AmFBfield.svg/1200px-AmFBfield.svg.png", "field.png")

from PIL import Image
#set a local image as a background
image_filename = 'field.png'
plotly_logo = Image.open(image_filename)

In [None]:
def plot_tracking_data(gameId, playId=False, position=False):
    team_names = ["home", "away", "football"]
    colors = ["#396EB0", "#116530", "red"]
    
    if playId:
        temp_tracking_df = tracking2020.query(f'gameId == {gameId} and playId == {playId}')
        teams = 3
    elif position:
        temp_tracking_df = tracking2020.query(f'gameId == {gameId} and position == "{position}"')
        teams = 2
        
    fig = go.Figure()
    
    for i in range (teams):
        fig.add_trace(go.Scatter(
            x = temp_tracking_df.query(f"team == '{team_names[i]}'").x,
            y = temp_tracking_df.query(f"team == '{team_names[i]}'").y,
            mode = "markers",
            name=team_names[i],
            marker=dict(color=colors[i])
        ))
        
    if playId:
        title = f"<b>Tracking Data - gameId = {gameId} and playId = {playId}</b>"
    elif position:
        title = f"<b>Tracking Data - gameId = {gameId} and position = {position}</b>"
        
    fig.update_layout(
        template="plotly_white",
        autosize=False,
        width=1000,
        height=550,
        images= [dict(source=plotly_logo,
                      xref="paper", yref="paper",x=0, y=1, sizex=1, sizey=1,
                      layer="below", opacity=0.5)],
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False),
        title=title,
        xaxis_title="x",
        yaxis_title="y",
    )
    fig.show()

In [None]:
plot_tracking_data(gameId=2020092707, playId=1186)

In [None]:
plot_tracking_data(gameId=2020102600, playId=250)

In [None]:
plot_tracking_data(gameId=2021010307, playId=2460)

In [None]:
temp_tracking_df = tracking2020.query(f'gameId == 2021010307 and position == "RB"')


In [None]:
plot_tracking_data(gameId=2020102508, position="CB")

In [None]:
plot_tracking_data(gameId=2020101112, position="LB")

In [None]:
plot_tracking_data(gameId=2020102507, position="RB")

### Animating Tracking Data using Plotly

In [None]:
import base64

def animated_game(gameId, playId, speed):
    temp_tracking_df = tracking2020[tracking2020['gameId'] == gameId]

    temp_tracking_query = (tracking2020['gameId'] == gameId) & (tracking2020['playId'] == playId)

    temp_tracking_df = (
        tracking2020[temp_tracking_query][['x', 'y', 'time', 'nflId', 'team', 'displayName']]
        .fillna(0.)
        .sort_values(['team', 'time'])
    )

    fig = px.scatter(
        temp_tracking_df,
        x='x',
        y='y',
        animation_frame='time',
        color='team',
        animation_group="nflId",
        hover_name="displayName"
    )
    fig.update_traces(marker=dict(size=12,line=dict(width=2,color='DarkSlateGrey')),
                      selector=dict(mode='markers'))
    fig.update_layout(
        template="plotly_white",
        autosize=False,
        width=1000,
        height=600,
        title=f'Animation Every Players in the play {playId} of Game {gameId}',
        images= [dict(source=plotly_logo,
                      xref="paper", yref="paper",x=0, y=1, sizex=1, sizey=1,
                      layer="below", opacity=0.5)],
        xaxis=dict(showgrid=False),
        yaxis=dict(showgrid=False)
    )
    fig.layout.updatemenus[0].buttons[0].args[1]["frame"]["duration"] = speed
    fig.show()

In [None]:
gameId = 2021010300
playId = 40
speed = 60

animated_game(gameId, playId, speed)

In [None]:
gameId = 2020111510
playId = 1987
speed = 60

animated_game(gameId, playId, speed)

In [None]:
gameId = 2020121302
playId = 1320
speed = 60

animated_game(gameId, playId, speed)