# Explorig the dataset

Sources:
1. [NFL Helmet Assignment image_labels view by STPETE_ISHII](https://www.kaggle.com/stpeteishii/nfl-helmet-assignment-image-labels-view)
2. [(NFL EDA Yukkuri by PIXYZ0130)](https://www.kaggle.com/pixyz0130/nfl-eda-yukkuri)

In [None]:
!ls /kaggle/input/nfl-health-and-safety-helmet-assignment

In [None]:
import os 
import cv2 
import glob
import random
import subprocess
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt 
from matplotlib import animation, rc
from IPython.display import Video, display

In [None]:
root_path = "../input/nfl-health-and-safety-helmet-assignment"
image_labels_file = "image_labels.csv"
train_labels_file = "train_labels.csv"
baseline_helmets_file = "train_baseline_helmets.csv"
image_dir = "images"
train_videos = "train"
test_videos = "test"

In [None]:
label_to_idx = {
    'Helmet': 0,
    'Helmet-Blurred': 1,
    'Helmet-Difficult': 2,
    'Helmet-Sideline': 3,
    'Helmet-Partial': 4
}

label_to_color = {
    'Helmet': (255, 255, 255),
    'Helmet-Blurred': (255, 0, 0),
    'Helmet-Difficult': (0, 255, 0),
    'Helmet-Sideline': (0, 0, 255),
    'Helmet-Partial': (0, 255, 255)
}

def draw_bboxes(image, bboxes, labels):
    # bbox in format [left, width, top, height]
    for i, bbox in enumerate(bboxes):
        label = labels[i]
        (x, y) = (bbox[0], bbox[2])
        (w, h) = (bbox[1], bbox[3])

        image = cv2.rectangle(
                    image,
                    (x, y),
                    (x+w, y+h),
                    label_to_color[label],
                    thickness=2
                )
    return image

def read_image(image_path):
    try:
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    except:
        print(f"Couldn't load the image!")

In [None]:
image_labels = pd.read_csv(f"{root_path}/{image_labels_file}")
image_labels

In [None]:
# Let's find the unique labels 
labels = image_labels['label'].unique().tolist()
print(f"{len(labels)} labels.\nThey are: {labels}")

In [None]:
image_files = glob.glob(f"{root_path}/{image_dir}/*")

print(f"Total number of images: {len(image_files)}\n")
print(f"Some examples:")
for i, file in enumerate(image_files[:5]):
    print(f"{i} : {file}")

In [None]:
# see labels of a random image
random_image = os.path.basename(
    random.choice(image_files)
)

image_labels[image_labels['image'] == random_image]

In [None]:
# Visualize a random image
def get_random_bboxed_image():
    image_selected = random.choice(image_files)
    image_name = os.path.basename(image_selected)

    image = cv2.imread(image_selected)
    selected_labels = image_labels[image_labels['image'] == image_name]
    bboxes = selected_labels.iloc[:, 2:].to_numpy()
    labels = selected_labels.iloc[:, 1].to_numpy()

    image_bboxed = draw_bboxes(image.copy(), bboxes, labels)
    
    return image_bboxed

# Visualize a particular image
def get_bboxed_image(image_path):
    image_name = os.path.basename(image_path)

    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    selected_labels = image_labels[image_labels['image'] == image_name]
    bboxes = selected_labels.iloc[:, 2:].to_numpy()
    labels = selected_labels.iloc[:, 1].to_numpy()

    image_bboxed = draw_bboxes(image.copy(), bboxes, labels)
    
    return image_bboxed

In [None]:
image_bboxed = get_random_bboxed_image()

plt.figure(figsize=(10, 10))
plt.imshow(image_bboxed)

In [None]:
# Create a slideshow of the images
def create_animation(num_frames=10, interval=1000):
    fig = plt.figure(figsize=(12, 6))
    plt.axis('off')
    
    image_bboxed = get_random_bboxed_image()
    image = plt.imshow(image_bboxed)
    
    def animate_func(i):
        image_bboxed = get_random_bboxed_image()
        image.set_array(image_bboxed)
        return [image]
    
    return animation.FuncAnimation(fig,
                                   animate_func,
                                   frames=num_frames,
                                   interval=interval # in ms
                                  )
rc('animation', html='jshtml')

In [None]:
create_animation()

## Look at the video

In [None]:
image_labels

In [None]:
def get_random_video_name():
#     "_".join(image_labels.iloc[1, 0].split('_')[:-1])
    video_files = os.listdir(f"{root_path}/{train_videos}")
    return random.choice(video_files)

def display_video(video_path, ratio=0.5):
    return Video(f"{root_path}/train/{video_path}",
                  embed=True,
                  height = int(720 * ratio),
                  width = int(1280 * ratio))

random_video = "_".join(image_labels.iloc[1, 0].split('_')[:-1])
display_video( get_random_video_name() )

In [None]:
def video_with_baseline_boxes(video_path, baseline_boxes, gt_labels, verbose=True):
    """
    Annotates a video with both the baseline model boxes and ground truth boxes.
    Baseline model prediction confidence is also displayed.
    """
    VIDEO_CODEC = "MP4V"
    HELMET_COLOR = (255, 0, 0) # Blue 
    BASELINE_COLOR = (0, 255, 0) # Green
    IMPACT_COLOR = (0, 0, 255) # Red
    video_name = os.path.basename(video_path).replace(".mp4", "")
    
    if verbose:
        print(f"Running for {video_name}")
    baseline_boxes = baseline_boxes.copy()
    gt_labels = gt_labels.copy()

    baseline_boxes["video"] = baseline_boxes["video_frame"].str.split("_").str[:3].str.join("_")
    gt_labels["video"] = gt_labels["video_frame"].str.split("_").str[:3].str.join("_")

    baseline_boxes["frame"] = baseline_boxes["video_frame"].str.split("_").str[-1].astype("int")
    gt_labels["frame"] = gt_labels["video_frame"].str.split("_").str[-1].astype("int")

    videocap = cv2.VideoCapture(video_path)
    fps = videocap.get(cv2.CAP_PROP_FPS)
    width = int(videocap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(videocap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    output_path = f"labelled_{video_name}.mp4"
    tmp_output_path = f"tmp_" + output_path
    output_video = cv2.VideoWriter(
                        tmp_output_path,
                        cv2.VideoWriter_fourcc(*VIDEO_CODEC),
                        fps,
                        (width, height)
                    )
    frame = 0
    while True:
        it_worked, img = videocap.read()
        if not it_worked:
            break
        # We need to add 1 to the frame count to match the label frame index
        # that starts at 1
        frame += 1

        # Let's add a frame index to the video so we can track where we are
        img_name = f"{video_name}_frame{frame}"
        cv2.putText(img, img_name, (0, 50), cv2.FONT_HERSHEY_SIMPLEX,
                    1.0, HELMET_COLOR, thickness=2)

        # Now, add the boxes
        boxes = baseline_boxes.query("video == @video_name and frame == @frame")
        if len(boxes) == 0:
            print("Boxes incorrect")
            return 
        for box in boxes.itertuples(index=False):
            cv2.rectangle(img, (box.left, box.top), (box.left+box.width, box.top+box.height),
                          BASELINE_COLOR, thickness=1)
            cv2.putText(img, f"{box.conf:.2f}", (box.left, max(0, box.top-5)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, BASELINE_COLOR, 1)

        boxes = gt_labels.query("video == @video_name and frame == @frame")
        if len(boxes) == 0:
            print("Boxes incorrect")
            return 
        for box in boxes.itertuples(index=False):
            # Filter for definitive head impacts and turn red
            if box.isDefinitiveImpact == True:
                color, thickness = IMPACT_COLOR, 3
            else:
                color, thickness = HELMET_COLOR, 1
            cv2.rectangle(img, (box.left, box.top), (box.left+box.width, box.top+box.height),
                          color, thickness=thickness)
            cv2.putText(img, box.label, (box.left+1, max(0, box.top-20)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, thickness=1)

        output_video.write(img)
    output_video.release()
    # Not all browsers support the codec, we will re-load the file at tmp_output_path
    # and convert to a codec that is more readable using ffmpeg
    if os.path.exists(output_path):
        os.remove(output_path)
    subprocess.run([
        "ffmpeg",
        "-i",
        tmp_output_path,
        "-crf",
        "18",
        "-preset",
        "veryfast",
        "-vcodec",
        "libx264",
        output_path
    ])
    os.remove(tmp_output_path)

    return output_path

In [None]:
example_video = '../input/nfl-health-and-safety-helmet-assignment/train/57584_000336_Sideline.mp4'
train_df = pd.read_csv(f"{root_path}/{train_labels_file}")
train_predict_df = pd.read_csv(f"{root_path}/{baseline_helmets_file}")

output_video = video_with_baseline_boxes(example_video,
                          train_predict_df, train_df)

frac = 1.0 # scaling factor for display
display(Video(data=output_video,
              embed=True,
              height=int(720*frac),
              width=int(1280*frac))
       )

In [None]:
train_df

## Gathering Insights on the Data

#### Number of bounding boxes per image

In [None]:
img_bboxes = image_labels.pivot_table(index=["image"], aggfunc='size')

print(f"BBox Statistics")
print(f"Minimum: {img_bboxes.min()}")
print(f"Maximum: {img_bboxes.max()}")
print(f"Mean: {img_bboxes.mean():0.2f}")
print(f"Median: {img_bboxes.median()}")
print(f"Mode: {img_bboxes.mode()}")

So, the images have a minumum of **1** and a maximum of **74** bboxes. On an average there will be **19** bboxes in each image.

But, there seems to be something fishy with th maximum value. For american football, there is a maximum of 11 players per team. So, the maximum number of players in each frame(wearing helmets) should be 22. So how come **74** bounding boxes come in a single frame. We will need to analyze that. 

First, lets draw graph of the number of bounding boxes in each image for a better understanding.

In [None]:
img_count = [img_bboxes[i] for i in range(0, img_bboxes.shape[0])]
sns.displot(data=img_count, kde=True)

## Curious case of increased helmet count

Most of the frames have helmets within the ranges only. Before proceeding further, let's find visualize few images with large number of helmets.

In [None]:
img_bboxes.sort_values(ascending=False).iloc[:5]

In [None]:
image_bboxed = get_bboxed_image(f"{root_path}/{image_dir}/57515_000677_Sideline_frame0892.jpg")

plt.figure(figsize=(20,20))
plt.imshow(image_bboxed)

So, it seems there are people outside the playing area, wearning helmets. So, the dataset isn't mislabelled. :Phew:

# The sizes of the bounding boxes compared

In [None]:
# Only looking at the first few thousand images. Should give an idea
# Can be increased for more accurate results

min_h = 1
min_w = 1
max_h = 0
max_w = 0
cur_video = ""

img_h = 1e-6
img_w = 1e-6

for i, (image, label, left, width, top, height) in image_labels.iterrows():    
    # load the image
    this_video = "_".join(image.split('_')[:-1])
    if this_video != cur_video:
        img = cv2.imread(f"{root_path}/{image_dir}/{image}")
        img_h, img_w, _ = img.shape
        cur_video = this_video


    normed_h = height / img_h
    normed_w = width / img_w
    
    if normed_h < min_h:
        min_h = normed_h
    if normed_w < min_w:
        min_w = normed_w
    if normed_h > max_h:
        max_h = normed_h
    if normed_w > max_w:
        max_w = normed_w
    
    if (i+1)% 100_000 == 0:
        break

In [None]:
print(f"Min -> H: {min_h:0.5f}, W: {min_w:0.5f}\nMax -> H: {max_h:0.5f}, W: {max_w:0.5f}")

That means the smallest bounding box(in the subset of images we selected) is atleast **0.2%** width and **0.4%** height compared to the image width and height respectively. The maximum values for height and width are **14.8%** and **10.2%** respectively. 
So, for a 1000x1000 image the bounding box dimensions can be as low as **20px x 40px** and as high as **148px x 102px**.

These are some considerations to be taken care when selecting the model 