Data Overview

This notebook provides an overview of the data and some examples of how to access and conduct some initial plotting of the data that has been provided. There are three different types of data provided for this problem:



**Image Data**-Almost 10,000 images and associated helmet labels for the purpose of building a helmet detection computer vision system.

**Video Data** - 120 videos (60 plays) from both a sideline and endzone point of view (one each per play) with associated helmet and helmet impact labels for the purpose of building a helmet impact detection computer vision system.

**Tracking Data**-Tracking data for all players that participate in the provided 60 plays.

This overview provides an example for how to parse and plot each of these data types. It also briefly summarizes the needed steps to submit a solution for scoring.

**import needed Libraries**

In [None]:
import imageio
from PIL import Image
import cv2
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import subprocess

import matplotlib.pyplot as plt
import matplotlib.patches as patches
%matplotlib inline
plt.rcParams['figure.dpi'] = 150

import seaborn as sns

from IPython.display import Video, display

#block those warnings from pandas about setting values on a slice
import warnings
warnings.filterwarnings('ignore') 
import numpy as np
import pandas as pd 

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import patches
from PIL import Image

import os
from tqdm import tqdm
from IPython.display import clear_output

import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

**Lets explore the Image Data**

**First let's plot some random images with corresponding helmet bounding-boxes.**

In [None]:
DATA_PATH = '../input/nfl-impact-detection'

**First let's plot some random images with corresponding helmet bounding-boxes.**

In [None]:
def add_bboxes(ax, img, img_df):
    img_data = img_df[img_df['image'] == img]
    for i in range(img_data.shape[0]):
        data = img_data.iloc[i]
        bbox = patches.Rectangle((
            data['left'],
            data['top']),
            data['width'],
            data['height'],
            linewidth=1,
            edgecolor='r',
            facecolor='None',
            alpha=0.7
        )
        ax.add_patch(bbox)
    return

def plot_random_images(root_path, plot_bboxes=True, verbose=True):
   
    images_path = root_path + '/images/'
    img_labels_df = pd.read_csv(root_path + '/image_labels.csv')
    
    images_list = os.listdir(images_path)
    n_images = len(images_list)
    endzone_images = [image for image in images_list if 'Endzone' in image]
    sideline_images = [image for image in images_list if 'Sideline' in image]

    if verbose:
        print(f'There are {n_images} images in the `images` folder.')
        print(f'  {len(endzone_images)} - images from endzone.')
        print(f'  {len(sideline_images)} - images from sideline.')

    fig, ax = plt.subplots(4, 2, figsize=(14, 12))
    for i in range(4):
        for j in range(2):
            if j == 0:
                random_idx = np.random.randint(len(endzone_images))
                random_img_name = endzone_images[random_idx]
                random_img = Image.open(images_path + random_img_name)
            else:
                random_idx = np.random.randint(len(sideline_images))
                random_img_name = sideline_images[random_idx]
                random_img = Image.open(images_path + random_img_name)
            ax[i][j].imshow(random_img)
            ax[i][j].set_axis_off()
            if plot_bboxes:
                add_bboxes(ax[i][j], random_img_name, img_labels_df)

    ax[0][0].set_title('Endzone images')
    ax[0][1].set_title('Sideline images')
    fig.tight_layout()

In [None]:

plot_random_images(DATA_PATH, plot_bboxes=True, verbose=True)

**Now let's check the helmet visibility classes distribution**

In [None]:
img_labels_df = pd.read_csv(DATA_PATH + '/image_labels.csv')
plt.figure(figsize = (12,6))
img_labels_df.label.hist()

In [None]:
#importing the Data
Image_Labels = pd.read_csv('/kaggle/input/nfl-impact-detection/image_labels.csv')
Image_Labels.head()

In [None]:
Image_Labels.info()

**Lets take an Image and add the Labels**

In [None]:
#lets take an Image and add the Labels
Image_Name = Image_Labels['image'][0]
Image_Name

**Path to our selected image**

In [None]:
img_path = f"/kaggle/input/nfl-impact-detection/images/{Image_Name}"

**read and plot the Image**

In [None]:
img = imageio.imread(img_path)
plt.imshow(img)
plt.show()

**Let's write a function for adding the bounding boxes from the label to the image. Note that the pixel geometry starts with (0,0) in the top left of the image. To draw the bounding box, we need to specify the top left pixel location and the bottom right pixel location of the image.**

In [None]:
# Function to add labels to an Image
def add_img_box(image_name, image_labels):
    #set Label Color for Bounding Boxes
    Helmet_Color = (0, 0, 0)
    
    boxes = Image_Labels.loc[Image_Labels['image'] == Image_Name]
    for i, box in boxes.iterrows():
        color = Helmet_Color
        
        # Add a box around the helmet
        # Note that cv2.rectangle needs us to specify the top left pixel and the bottom right pixel
        cv2.rectangle(img, (box.left, box.top), (box.left + box.width, box.top + box.height), color, thickness=1)
        #display the image with Bounding Boxes
        plt.imshow(img)
        plt.show()
        
        
        

In [None]:
add_img_box(Image_Name, Image_Labels)

**now we can see Bounding Boxes around the Helmets.**

**Video Data**

The labeled video dataset provides video for 60 plays observed from both the sideline and endzone perspective (120 videos total). The video_labels.csv file contains labeled bounding boxes for every helmet that is visible in every frame of every video

In [None]:
# Read in the video labels file
video_labels = pd.read_csv('/kaggle/input/nfl-impact-detection/train_labels.csv')
video_labels.head()

In [None]:
video_labels.info()

In [None]:
video_labels.value_counts()



**The gameKey, playID, video, and frame fields facilitate matching the bounding box to the appropriate video file and video frame. The label field corresponds to the player field in the tracking data, providing a unique identifier for the helmets of players that are participating in the play. However, there are also helmets (players) that appear in the videos that are not participating in the play. These players are identified with the labels V00 (non-participant on the visiting team) or H00 (non-participant on the home team). In rare cases that a player cannot be uniquely identified that is participating in the play (for example when only the helmet is visible in a pile-up), the appropriate generic V00 or H00 label is applied to that helmet bounding box.**

The Sideline and Endzone views have been time-synced such that the snap occurs 10 frames into the video. This time alignment should be considered to be accurate to within +/- 3 frames or 0.05 seconds (video data is recorded at approximately 59.94 frames per second).

# **For the purposes of evaluation, definitive helmet impacts are defined as meeting three criteria:**
# 
• impact = 1

• confidence > 1

• visibility > 0

Those labels with confidence = 1 document cases in which human labelers asserted it was possible that a helmet impact occurred, but it was not clear that the helmet impact altered the trajectory of the helmet. Those labels with visibility = 0 indicate that although there is reason to believe that an impact occurred to that helmet at that time, the impact itself was not visible from the view.

**Let's bring in the very first video and display it.**


In [None]:
video_name = video_labels['video'][0]
video_name

**setting the Path and display the video**

In [None]:
video_path = f"/kaggle/input/nfl-impact-detection/train/{video_name}"
display(Video(data=video_path, embed=True))

**Lets created a function that will add bounding boxes to every frame in our video.**

In [None]:
# Create a function to annotate the video at the provided path using labels from the provided dataframe, return the path of the video
def annotate_video(video_path: str, video_labels: pd.DataFrame) -> str:
    VIDEO_CODEC = "MP4V"
    HELMET_COLOR = (0, 0, 0)    # Black
    IMPACT_COLOR = (0, 0, 255)  # Red
    video_name = os.path.basename(video_path)
    
    vidcap = cv2.VideoCapture(video_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    output_path = "labeled_" + video_name
    tmp_output_path = "tmp_" + output_path
    output_video = cv2.VideoWriter(tmp_output_path, cv2.VideoWriter_fourcc(*VIDEO_CODEC), fps, (width, height))
    frame = 0
    while True:
        it_worked, img = vidcap.read()
        if not it_worked:
            break
        
        # We need to add 1 to the frame count to match the label frame index that starts at 1
        frame += 1
        
        # Let's add a frame index to the video so we can track where we are
        img_name = f"{video_name}_frame{frame}"
        cv2.putText(img, img_name, (0, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, HELMET_COLOR, thickness=2)
    
        # Now, add the boxes
        boxes = video_labels.query("video == @video_name and frame == @frame")
        for box in boxes.itertuples(index=False):
            if box.impact == 1 and box.confidence > 1 and box.visibility > 0:    # Filter for definitive head impacts and turn labels red
                color, thickness = IMPACT_COLOR, 4
            else:
                color, thickness = HELMET_COLOR, 1
            # Add a box around the helmet
            cv2.rectangle(img, (box.left, box.top), (box.left + box.width, box.top + box.height), color, thickness=thickness)
            cv2.putText(img, box.label, (box.left, max(0, box.top - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, thickness=1)
        output_video.write(img)
    output_video.release()
    
    # Not all browsers support the codec, we will re-load the file at tmp_output_path and convert to a codec that is more broadly readable using ffmpeg
    if os.path.exists(output_path):
        os.remove(output_path)
    subprocess.run(["ffmpeg", "-i", tmp_output_path, "-crf", "18", "-preset", "veryfast", "-vcodec", "libx264", output_path])
    os.remove(tmp_output_path)
    
    return output_path

In [None]:
# Label the Video and display it.
labeled_video = annotate_video(f"/kaggle/input/nfl-impact-detection/train/{video_name}", video_labels)
display(Video(data=labeled_video, embed=True))

In [None]:
# Filter for definitive impacts labeled for this video
video_impacts = video_labels.loc[(video_labels.video == video_name) & (video_labels.impact == 1) & (video_labels.confidence > 1) & (video_labels.visibility > 0)]
len(video_impacts) # definitive impacts in this play

In [None]:
# Get this list of definitive impacts
video_impacts

**Note that every play consists of two views - a sideline view and an endzone view. So, to find the other view of this play**

In [None]:
sideline_video_name = video_name.replace("Endzone", "Sideline")
# Define the path and then display the video using 
sideline_video_path = f"/kaggle/input/nfl-impact-detection/train/{sideline_video_name}"
display(Video(data=sideline_video_path, embed=True))

# # **Tracking Data**

**The player track file in .csv format includes player position, direction, and orientation data for each player during the entire course of the play collected using the Next Gen Stats (NGS) system. This data is indexed by gameKey, playID, and player, with the time variable providing a temporal index within an individual play.**

**Lets filter the Track data to the 1st Play we looked at earlier.**

In [None]:
track_data = pd.read_csv('/kaggle/input/nfl-impact-detection/train_player_tracking.csv')
track_data.head()
len(track_data)

In [None]:
game_key = track_data['gameKey'][0]
play_id = track_data['playID'][0]
play_track = track_data.loc[(track_data.gameKey == game_key) & (track_data.playID == play_id)]
len(play_track)

In [None]:
play_track['event'].unique()

In [None]:
play_track['event'].value_counts()

**create a DataFrame for Player Position at the Snap**

In [None]:
at_snap = play_track.loc[play_track['event'] == 'ball_snap']
at_snap

In [None]:
import datetime as dt
import warnings

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from IPython.display import Video
from pathlib import Path

from matplotlib import animation
from matplotlib import patches
from tqdm.notebook import tqdm

warnings.simplefilter("ignore")

**Generation of Football Field**

In [None]:
def create_football_field(linenumbers=True,
                          endzones=True,
                          highlight_line=False,
                          highlight_line_number=50,
                          highlighted_name='Line of Scrimmage',
                          fifty_is_los=False,
                          figsize=(12, 7.33)):
    """
    Function that plots the football field for viewing plays.
    Allows for showing or hiding endzones.
    """
    rect = patches.Rectangle((0, 0), 120, 53.3, linewidth=0.1,
                             edgecolor='r', facecolor='forestgreen', zorder=0)  # changed the field color to forestgreen

    fig, ax = plt.subplots(1, figsize=figsize)
    ax.add_patch(rect)

    plt.plot([10, 10, 10, 20, 20, 30, 30, 40, 40, 50, 50, 60, 60, 70, 70, 80,
              80, 90, 90, 100, 100, 110, 110, 120, 0, 0, 120, 120],
             [0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3,
              53.3, 0, 0, 53.3, 53.3, 0, 0, 53.3, 53.3, 53.3, 0, 0, 53.3],
             color='white')
    if fifty_is_los:
        plt.plot([60, 60], [0, 53.3], color='gold')
        plt.text(62, 50, '<- Player Yardline at Snap', color='gold')
    # Endzones
    if endzones:
        ez1 = patches.Rectangle((0, 0), 10, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ez2 = patches.Rectangle((110, 0), 120, 53.3,
                                linewidth=0.1,
                                edgecolor='r',
                                facecolor='blue',
                                alpha=0.2,
                                zorder=0)
        ax.add_patch(ez1)
        ax.add_patch(ez2)
    plt.xlim(0, 120)
    plt.ylim(-5, 58.3)
    plt.axis('off')
    if linenumbers:
        for x in range(20, 110, 10):
            numb = x
            if x > 50:
                numb = 120 - x
            plt.text(x, 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='white')
            plt.text(x - 0.95, 53.3 - 5, str(numb - 10),
                     horizontalalignment='center',
                     fontsize=20,  # fontname='Arial',
                     color='white', rotation=180)
    if endzones:
        hash_range = range(11, 110)
    else:
        hash_range = range(1, 120)

    for x in hash_range:
        ax.plot([x, x], [0.4, 0.7], color='white')
        ax.plot([x, x], [53.0, 52.5], color='white')
        ax.plot([x, x], [22.91, 23.57], color='white')
        ax.plot([x, x], [29.73, 30.39], color='white')

    if highlight_line:
        hl = highlight_line_number + 10
        plt.plot([hl, hl], [0, 53.3], color='yellow')
        plt.text(hl + 2, 50, '<- {}'.format(highlighted_name),
                 color='yellow')
    return fig, ax

**set format of time column**

In [None]:
track_data['time'] = pd.to_datetime(track_data['time'])
track_data['color'] = track_data["player"].map(lambda x:'black' if "H" in x else "white")
track_data.head()

In [None]:
train_labels = pd.read_csv("../input/nfl-impact-detection/train_labels.csv")
train_labels.head()

**Let's filter the track data to analyze the same play we displayed above (happens to be the first play in the file).**

In [None]:
# Filter the track data to the play of interest
game_key = track_data['gameKey'][0]
play_id = track_data['playID'][0]
play_track = track_data.loc[(track_data.gameKey == game_key) & (track_data.playID == play_id)]
len(play_track)

In [None]:
# See what events are stored in the data
play_track['event'].unique()

In [None]:
# Build a dataframe for the player positions at the snap

at_snap = play_track.loc[play_track.event == 'ball_snap']
at_snap

In [None]:
create_football_field()
plt.show()

**To start, we are going plot the player positions at the snap. Let's use a helper function to set the color for the home and visiting team.**

In [None]:
# The visiting team *usually* wears white 
def set_color(row):
    if 'H' in row['player']:
        return "black"
    else:
        return "white"

at_snap['color'] = at_snap.apply(lambda row: set_color(row), axis=1)
at_snap

In [None]:
# Plot the positions of players at the snap

fig, ax = create_football_field()
at_snap.plot(x="x", y="y",  kind='scatter', ax=ax, color = at_snap['color'], s=300)
at_snap_home = at_snap.loc[at_snap['player'].str.contains('H')]
at_snap_away = at_snap.loc[at_snap['player'].str.contains('V')]

for index, row in at_snap_away.iterrows():
    ax.annotate(row['player'], (row['x'], row['y']), verticalalignment='center', horizontalalignment='center')
for index, row in at_snap_home.iterrows():
    ax.annotate(row['player'], (row['x'], row['y']), verticalalignment='center', horizontalalignment='center', color = 'white')
x_min = min(at_snap['x']) - 5
x_max = max(at_snap['x']) + 5
y_min = min(at_snap['y']) - 5
y_max = max(at_snap['y']) + 5
ax.set_xlim(x_min, x_max)
ax.set_ylim(y_min, y_max)
plt.show()

In [None]:
# Plot the positions of players through the play

play_track['color'] = play_track.apply(lambda row: set_color(row), axis=1)

# Filter to only include time after the snap
snap_time = at_snap['time'].iloc[0]
play_track = play_track.loc[play_track['time'] > snap_time]

fig, ax = create_football_field()
play_track.plot(x="x", y="y",  kind='scatter', ax=ax, color = play_track['color'], s= 1)

plt.show()

**Lets play more**

# Create train_player_tracking.csv with impact annotation

modify train_player_tracking.csv to only include tracking data that is in the video and annotate the tracking data point where impacts occured. To do this I first make alignment between tracking data and frames, and determine the time where impact occured by picking the closest time to the imact frame.

In [None]:
def make_alignment(train_track: pd.DataFrame, train_label: pd.DataFrame, video_dir: Path, game_key: int, play_id: int):
    play_track = train_track.query(f"gameKey == {game_key} & playID == {play_id}")
    play_label = train_label.query(f"gameKey == {game_key} & playID == {play_id}")
    
    play_track["impact"] = 0
    play_track["impactType"] = ""
    play_track["confidence"] = 0
    play_track["visibility"] = 0
    
    snap_frame = play_track.query("event == 'ball_snap'")
    snap_time = snap_frame["time"].iloc[0]
    
    video_name = f"{game_key}_{str(play_id).rjust(6, '0')}_Endzone.mp4"
    video = cv2.VideoCapture(str(video_dir / video_name))
    
    fps = video.get(cv2.CAP_PROP_FPS)
    nframes = play_label.frame.nunique()
    
    snap_time -= dt.timedelta(seconds=1.0 / fps * 10)
    
    duration = nframes / fps
    end_time = snap_time + dt.timedelta(seconds=duration)
    
    play = play_track.loc[(play_track["time"] >= snap_time) & (play_track["time"] < end_time)].copy()
    
    impact_frames = play_label.query("impact == 1 & view == 'Endzone'")
    for _, row in impact_frames.iterrows():
        frame = row.frame
        label = row.label
        time_from_start = frame / fps
        time = snap_time + dt.timedelta(seconds=time_from_start)
        
        abs_timedelta = abs(play["time"] - time).dt.total_seconds()
        min_abs_timedelta = abs_timedelta.min()
        impact_point_index = play[abs_timedelta == min_abs_timedelta].query(
            f"player == '{label}'").index[0]
        play.loc[impact_point_index, "impact"] = 1
        play.loc[impact_point_index, "impactType"] = row.impactType
        play.loc[impact_point_index, "confidence"] = row.confidence
        play.loc[impact_point_index, "visibility"] = row.visibility
    play = play.reset_index(drop=False)
    return play

In [None]:
pairs = track_data.groupby(["gameKey", "playID"]).count().index.tolist()
video_dir = Path("../input/nfl-impact-detection/train/")

play_trackings = []
for game_key, play_id in pairs:
    play_trackings.append(make_alignment(track_data, train_labels, video_dir, game_key, play_id))
    
annotated_trackings = pd.concat(play_trackings, axis=0).reset_index(drop=True)
annotated_trackings.head(10)

In [None]:
annotated_trackings.query("impact == 1 & gameKey == 57583 & playID == 82")

In [None]:
train_labels.query("impact == 1 & gameKey == 57583 & playID == 82 & view == 'Endzone'")

In [None]:
len(annotated_trackings.query(
    "impact == 1 & gameKey == 57583 & playID == 82")), len(train_labels.query(
    "impact == 1 & gameKey == 57583 & playID == 82 & view == 'Endzone'"))

In [None]:
annotated_trackings.to_csv("submission.csv", index=False)

In [None]:
annotated_trackings.impactType.unique()

# Animation

In [None]:
def create_animation(play_track: pd.DataFrame, video_dir: Path, save_dir: Path):
    fig, ax = create_football_field(figsize=(24, 12.66))

    snap_frame = play_track.query("event == 'ball_snap'")
    snap_time = snap_frame["time"].iloc[0]
    snap_time -= dt.timedelta(seconds=0.1)
    
    game_key = play_track["gameKey"].iloc[0]
    play_id = play_track["playID"].iloc[0]
    
    video_name = f"{game_key}_{str(play_id).rjust(6, '0')}_Endzone.mp4"
    video = cv2.VideoCapture(str(video_dir / video_name))
    
    fps = video.get(cv2.CAP_PROP_FPS)
    nframes = 0
    while True:
        worked, _ = video.read()
        if not worked:
            break
        nframes += 1
    
    duration = nframes / fps
    end_time = snap_time + dt.timedelta(seconds=duration)
    
    play = play_track.loc[(play_track["time"] >= snap_time) & (play_track["time"] < end_time)]

    unique_times = play.time.unique()
    
    show_impact_marker = "impact" in play_track.columns
    
    # initialize the plot
    points = {}
    annotations = {}
    obj_list = []
    start_time = unique_times[0]
    tracking_at_that_moment = play[play["time"] == start_time]
    for _, row in tracking_at_that_moment.iterrows():
        player_id = row.player
        x, y = row.x, row.y
        if show_impact_marker:
            impact = row.impact
            if impact == 1:
                impact_type = row.impactType
                if impact_type == "Helmet":
                    color = "red"
                elif impact_type == "Body":
                    color = "yellow"
                elif impact_type == "Shoulder" or impact_type == "shoulder":
                    color = "blue"
                elif impact_type == "Hand":
                    color = "orange"
                elif impact_type == "Ground":
                    color = "purple"
            else:
                color = row.color
        else:
            color = row.color
        plot_obj = ax.scatter(x, y, color=color, s=70)
        anno_obj = ax.annotate(player_id,
                               (x, y),
                               verticalalignment="center",
                               horizontalalignment="center",
                               color="white" if color == "black" else "black",
                               fontsize=10)
        points[player_id] = plot_obj
        annotations[player_id] = anno_obj
        obj_list.append(plot_obj)
        obj_list.append(anno_obj)
        
    def init():
        return obj_list
        
    def update(step: int):
        time = unique_times[step]
        tracking_at_that_moment = play[play["time"] == time]
        for _, row in tracking_at_that_moment.iterrows():
            player_id = row.player
            x, y = row.x, row.y
            points[player_id].set_offsets(np.array([x, y]))
            if show_impact_marker:
                impact = row.impact
                if impact == 1:
                    impact_type = row.impactType
                    if impact_type == "Helmet":
                        color = "red"
                    elif impact_type == "Body":
                        color = "yellow"
                    elif impact_type == "Shoulder" or impact_type == "shoulder":
                        color = "blue"
                    elif impact_type == "Hand":
                        color = "orange"
                    elif impact_type == "Ground":
                        color = "purple"
                else:
                    color = row.color
                points[player_id].set_color(color)
            annotations[player_id].set_x(x)
            annotations[player_id].set_y(y)
        return obj_list
    
    ani = animation.FuncAnimation(
        fig, update, frames=len(unique_times), interval=100, init_func=init)
    ani.save(save_dir / f"{game_key}_{str(play_id).rjust(6, '0')}_Tracking.mp4")
    plt.close()

In [None]:
game_key = track_data.loc[0, "gameKey"]
play_id = track_data.loc[0, "playID"]
play_track = track_data.query(f"gameKey == {game_key} & playID == {play_id}")

video_dir = Path("../input/nfl-impact-detection/train/")
save_dir = Path("./")

In [None]:
create_animation(play_track, video_dir, save_dir)
!ls

In [None]:
Video(data="./57583_000082_Tracking.mp4", embed=True)


# Annoatations with impact marker

In [None]:
play_track = annotated_trackings.query(f"gameKey == {game_key} & playID == {play_id}")
create_animation(play_track, video_dir, save_dir)
Video(data="./57583_000082_Tracking.mp4", embed=True)

Red markers correspond to impact events.

This animation corresponds to the play of 57583_000082_Endzone.mp4 and 57583_000082_Sideline.mp4. 

# Let's check it out.

In [None]:
Video(data="../input/nfl-impact-detection/train/57583_000082_Endzone.mp4", embed=True)

# for sideline view

In [None]:
Video(data="../input/nfl-impact-detection/train/57583_000082_Sideline.mp4", embed=True)

# finally I am going to create this animation for entire train set.

In [None]:
save_dir = Path("./train_tracking")
save_dir.mkdir(exist_ok=True, parents=True)

In [None]:
pairs = track_data.groupby(["gameKey", "playID"]).count().index.tolist()
pairs[:5]

In [None]:
for game_key, play_id in tqdm(pairs):
    play_track = track_data.query(f"gameKey == {game_key} & playID == {play_id}")
    create_animation(play_track, video_dir, save_dir)

In [None]:

save_dir = Path("./train_tracking_with_impact_marker")
save_dir.mkdir(exist_ok=True, parents=True)
for game_key, play_id in tqdm(pairs):
    play_track = annotated_trackings.query(f"gameKey == {game_key} & playID == {play_id}")
    create_animation(play_track, video_dir, save_dir)


# Create this animation for all the test set

In [None]:
test_track_data = pd.read_csv("../input/nfl-impact-detection/test_player_tracking.csv")
test_track_data["time"] = pd.to_datetime(test_track_data["time"])
test_track_data["color"] = test_track_data["player"].map(lambda x: "black" if "H" in x else "white")
save_dir = Path("./test_tracking")
save_dir.mkdir(exist_ok=True, parents=True)

pairs = test_track_data.groupby(["gameKey", "playID"]).count().index.tolist()

In [None]:
video_dir = Path("../input/nfl-impact-detection/test")
for game_key, play_id in tqdm(pairs):
    play_track = test_track_data.query(f"gameKey == {game_key} & playID == {play_id}")
    create_animation(play_track, video_dir, save_dir)