I often find inspiration about how a model can be improved by visualizing it's predictions. In this notebook I provide a function `video_with_predictions`. This function combines a model's validation predictions with the assoicated video.

The predictions I'm modeling come from the great baseline here: https://www.kaggle.com/its7171/nfl-baseline-simple-helmet-mapping

Note, in the output videos:
- If incorrect, the ground truth box helmet boxes are `black` and predictions are `white`. 
- Correct preidctions have `green` boxes
- Impact helmet boxes are `red` if incorrect and `yellow` if correct (note these predictions are weighted 1000x compared to a non-impact helmet box prediction)

In [None]:
import numpy as np
import pandas as pd
import itertools
import glob
import os
import cv2
from sklearn.metrics import accuracy_score
from tqdm.auto import tqdm
from multiprocessing import Pool
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
import random


debug = True
CONF_THRE = 0.3
BASE_DIR = '../input/nfl-health-and-safety-helmet-assignment'

labels = pd.read_csv(f'{BASE_DIR}/train_labels.csv')
if debug:
    tracking = pd.read_csv(f'{BASE_DIR}/train_player_tracking.csv')
    helmets = pd.read_csv(f'{BASE_DIR}/train_baseline_helmets.csv')
else:
    tracking = pd.read_csv(f'{BASE_DIR}/test_player_tracking.csv')
    helmets = pd.read_csv(f'{BASE_DIR}/test_baseline_helmets.csv')
    
    
# copied from https://www.kaggle.com/robikscube/nfl-helmet-assignment-getting-started-guide
def add_track_features(tracks, fps=59.94, snap_frame=10):
    """
    Add column features helpful for syncing with video data.
    """
    tracks = tracks.copy()
    tracks["game_play"] = (
        tracks["gameKey"].astype("str")
        + "_"
        + tracks["playID"].astype("str").str.zfill(6)
    )
    tracks["time"] = pd.to_datetime(tracks["time"])
    snap_dict = (
        tracks.query('event == "ball_snap"')
        .groupby("game_play")["time"]
        .first()
        .to_dict()
    )
    tracks["snap"] = tracks["game_play"].map(snap_dict)
    tracks["isSnap"] = tracks["snap"] == tracks["time"]
    tracks["team"] = tracks["player"].str[0].replace("H", "Home").replace("V", "Away")
    tracks["snap_offset"] = (tracks["time"] - tracks["snap"]).astype(
        "timedelta64[ms]"
    ) / 1_000
    # Estimated video frame
    tracks["est_frame"] = (
        ((tracks["snap_offset"] * fps) + snap_frame).round().astype("int")
    )
    return tracks
tracking = add_track_features(tracking)


if debug:
#     sample_keys = random.sample(list(tracking['gameKey'].unique()), 3)
    sample_keys = [57583, 58095, 57682]
    helmets['gameKey'] = helmets['video_frame'].str.split('_').str[0]
    tracking = tracking[tracking['gameKey'].isin(sample_keys)]
    helmets = helmets[helmets['gameKey'].astype(int).isin(sample_keys)]
    labels = labels[labels['gameKey'].astype(int).isin(sample_keys)]
tracking.shape, helmets.shape, labels.shape


def find_nearest(array, value):
    value = int(value)
    array = np.asarray(array).astype(int)
    idx = (np.abs(array - value)).argmin()
    return array[idx]

def norm_arr(a):
    a = a-a.min()
    a = a/a.max()
    return a
    
def dist(a1, a2):
    return np.linalg.norm(a1-a2)

max_iter = 2000
def dist_for_different_len(a1, a2):
    assert len(a1) >= len(a2), f'{len(a1)}, {len(a2)}'
    len_diff = len(a1) - len(a2)
    a2 = norm_arr(a2)
    if len_diff == 0:
        a1 = norm_arr(a1)
        return dist(a1,a2), ()
    else:
        min_dist = 10000
        min_detete_idx = None
        cnt = 0
        del_list = list(itertools.combinations(range(len(a1)),len_diff))
        if len(del_list) > max_iter:
            del_list = random.sample(del_list, max_iter)
        for detete_idx in del_list:
            this_a1 = np.delete(a1, detete_idx)
            this_a1 = norm_arr(this_a1)
            this_dist = dist(this_a1, a2)
            #print(len(a1), len(a2), this_dist)
            if min_dist > this_dist:
                min_dist = this_dist
                min_detete_idx = detete_idx
                
        return min_dist, min_detete_idx
        
def rotate_arr(u, t, deg=True):
    if deg == True:
        t = np.deg2rad(t)
    R = np.array([[np.cos(t), -np.sin(t)],
                  [np.sin(t),  np.cos(t)]])
    return  np.dot(R, u)

def dist_rot(tracking_df, a2):
    tracking_df = tracking_df.sort_values('x')
    x = tracking_df['x']
    y = tracking_df['y']
    min_dist = 10000
    min_idx = None
    min_x = None
    dig_step = 3
    dig_max = dig_step*10
    for dig in range(-dig_max,dig_max+1,dig_step):
        arr = rotate_arr(np.array((x,y)), dig)
        this_dist, this_idx = dist_for_different_len(np.sort(arr[0]), a2)
        if min_dist > this_dist:
            min_dist = this_dist
            min_idx = this_idx
            min_x = arr[0]
    tracking_df['x_rot'] = min_x
    player_arr = tracking_df.sort_values('x_rot')['player'].values
    players = np.delete(player_arr,min_idx)
    return min_dist, players


def mapping_df(args):
    video_frame, df = args
    gameKey,playID,view,frame = video_frame.split('_')
    gameKey = int(gameKey)
    playID = int(playID)
    frame = int(frame)
    this_tracking = tracking[(tracking['gameKey']==gameKey) & (tracking['playID']==playID)]
    est_frame = find_nearest(this_tracking.est_frame.values, frame)
    this_tracking = this_tracking[this_tracking['est_frame']==est_frame]
    len_this_tracking = len(this_tracking)
    df['center_h_p'] = (df['left']+df['width']/2).astype(int)
    df['center_h_m'] = (df['left']+df['width']/2).astype(int)*-1
    df = df[df['conf']>CONF_THRE].copy()
    if len(df) > len_this_tracking:
        df = df.tail(len_this_tracking)
    df_p = df.sort_values('center_h_p').copy()
    df_m = df.sort_values('center_h_m').copy()
    
    if view == 'Endzone':
        this_tracking['x'], this_tracking['y'] = this_tracking['y'].copy(), this_tracking['x'].copy()
    a2_p = df_p['center_h_p'].values
    a2_m = df_m['center_h_m'].values

    min_dist_p, min_detete_idx_p = dist_rot(this_tracking ,a2_p)
    min_dist_m, min_detete_idx_m = dist_rot(this_tracking ,a2_m)
    if min_dist_p < min_dist_m:
        min_dist = min_dist_p
        min_detete_idx = min_detete_idx_p
        tgt_df = df_p
    else:
        min_dist = min_dist_m
        min_detete_idx = min_detete_idx_m
        tgt_df = df_m
    #print(video_frame, len(this_tracking), len(df), len(df[df['conf']>CONF_THRE]), this_tracking['x'].mean(), min_dist_p, min_dist_m, min_dist)
    tgt_df['label'] = min_detete_idx
    return tgt_df[['video_frame','left','width','top','height','label']]

p = Pool(processes=4)
submission_df_list = []
df_list = list(helmets.groupby('video_frame'))
with tqdm(total=len(df_list)) as pbar:
    for this_df in p.imap(mapping_df, df_list):
        submission_df_list.append(this_df)
        pbar.update(1)
p.close()

submission_df = pd.concat(submission_df_list)
# submission_df.to_csv('submission.csv', index=False)


# copied from https://www.kaggle.com/robikscube/nfl-helmet-assignment-getting-started-guide
class NFLAssignmentScorer:
    def __init__(
        self,
        labels_df: pd.DataFrame = None,
        labels_csv="train_labels.csv",
        check_constraints=True,
        weight_col="isDefinitiveImpact",
        impact_weight=1000,
        iou_threshold=0.35,
        remove_sideline=True,
    ):
        """
        Helper class for grading submissions in the
        2021 Kaggle Competition for helmet assignment.
        Version 1.0
        https://www.kaggle.com/robikscube/nfl-helmet-assignment-getting-started-guide

        Use:
        ```
        scorer = NFLAssignmentScorer(labels)
        scorer.score(submission_df)

        or

        scorer = NFLAssignmentScorer(labels_csv='labels.csv')
        scorer.score(submission_df)
        ```

        Args:
            labels_df (pd.DataFrame, optional):
                Dataframe containing theground truth label boxes.
            labels_csv (str, optional): CSV of the ground truth label.
            check_constraints (bool, optional): Tell the scorer if it
                should check the submission file to meet the competition
                constraints. Defaults to True.
            weight_col (str, optional):
                Column in the labels DataFrame used to applying the scoring
                weight.
            impact_weight (int, optional):
                The weight applied to impacts in the scoring metrics.
                Defaults to 1000.
            iou_threshold (float, optional):
                The minimum IoU allowed to correctly pair a ground truth box
                with a label. Defaults to 0.35.
            remove_sideline (bool, optional):
                Remove slideline players from the labels DataFrame
                before scoring.
        """
        if labels_df is None:
            # Read label from CSV
            if labels_csv is None:
                raise Exception("labels_df or labels_csv must be provided")
            else:
                self.labels_df = pd.read_csv(labels_csv)
        else:
            self.labels_df = labels_df.copy()
        if remove_sideline:
            self.labels_df = (
                self.labels_df.query("isSidelinePlayer == False")
                .reset_index(drop=True)
                .copy()
            )
        self.impact_weight = impact_weight
        self.check_constraints = check_constraints
        self.weight_col = weight_col
        self.iou_threshold = iou_threshold

    def check_submission(self, sub):
        """
        Checks that the submission meets all the requirements.

        1. No more than 22 Boxes per frame.
        2. Only one label prediction per video/frame
        3. No duplicate boxes per frame.

        Args:
            sub : submission dataframe.

        Returns:
            True -> Passed the tests
            False -> Failed the test
        """
        # Maximum of 22 boxes per frame.
        max_box_per_frame = sub.groupby(["video_frame"])["label"].count().max()
        if max_box_per_frame > 22:
            print("Has more than 22 boxes in a single frame")
            return False
        # Only one label allowed per frame.
        has_duplicate_labels = sub[["video_frame", "label"]].duplicated().any()
        if has_duplicate_labels:
            print("Has duplicate labels")
            return False
        # Check for unique boxes
        has_duplicate_boxes = (
            sub[["video_frame", "left", "width", "top", "height"]].duplicated().any()
        )
        if has_duplicate_boxes:
            print("Has duplicate boxes")
            return False
        return True

    def add_xy(self, df):
        """
        Adds `x1`, `x2`, `y1`, and `y2` columns necessary for computing IoU.

        Note - for pixel math, 0,0 is the top-left corner so box orientation
        defined as right and down (height)
        """

        df["x1"] = df["left"]
        df["x2"] = df["left"] + df["width"]
        df["y1"] = df["top"]
        df["y2"] = df["top"] + df["height"]
        return df

    def merge_sub_labels(self, sub, labels, weight_col="isDefinitiveImpact"):
        """
        Perform an outer join between submission and label.
        Creates a `sub_label` dataframe which stores the matched label for each submission box.
        Ground truth values are given the `_gt` suffix, submission values are given `_sub` suffix.
        """
        sub = sub.copy()
        labels = labels.copy()

        sub = self.add_xy(sub)
        labels = self.add_xy(labels)

        base_columns = [
            "label",
            "video_frame",
            "x1",
            "x2",
            "y1",
            "y2",
            "left",
            "width",
            "top",
            "height",
        ]

        sub_labels = sub[base_columns].merge(
            labels[base_columns + [weight_col]],
            on=["video_frame"],
            how="right",
            suffixes=("_sub", "_gt"),
        )
        return sub_labels

    def get_iou_df(self, df):
        """
        This function computes the IOU of submission (sub)
        bounding boxes against the ground truth boxes (gt).
        """
        df = df.copy()

        # 1. get the coordinate of inters
        df["ixmin"] = df[["x1_sub", "x1_gt"]].max(axis=1)
        df["ixmax"] = df[["x2_sub", "x2_gt"]].min(axis=1)
        df["iymin"] = df[["y1_sub", "y1_gt"]].max(axis=1)
        df["iymax"] = df[["y2_sub", "y2_gt"]].min(axis=1)

        df["iw"] = np.maximum(df["ixmax"] - df["ixmin"] + 1, 0.0)
        df["ih"] = np.maximum(df["iymax"] - df["iymin"] + 1, 0.0)

        # 2. calculate the area of inters
        df["inters"] = df["iw"] * df["ih"]

        # 3. calculate the area of union
        df["uni"] = (
            (df["x2_sub"] - df["x1_sub"] + 1) * (df["y2_sub"] - df["y1_sub"] + 1)
            + (df["x2_gt"] - df["x1_gt"] + 1) * (df["y2_gt"] - df["y1_gt"] + 1)
            - df["inters"]
        )
        # print(uni)
        # 4. calculate the overlaps between pred_box and gt_box
        df["iou"] = df["inters"] / df["uni"]

        return df.drop(
            ["ixmin", "ixmax", "iymin", "iymax", "iw", "ih", "inters", "uni"], axis=1
        )

    def filter_to_top_label_match(self, sub_labels):
        """
        Ensures ground truth boxes are only linked to the box
        in the submission file with the highest IoU.
        """
        return (
            sub_labels.sort_values("iou", ascending=False)
            .groupby(["video_frame", "label_gt"])
            .first()
            .reset_index()
        )

    def add_isCorrect_col(self, sub_labels):
        """
        Adds True/False column if the ground truth label
        and submission label are identical
        """
        sub_labels["isCorrect"] = (
            sub_labels["label_gt"] == sub_labels["label_sub"]
        ) & (sub_labels["iou"] >= self.iou_threshold)
        return sub_labels

    def calculate_metric_weighted(
        self, sub_labels, weight_col="isDefinitiveImpact", weight=1000
    ):
        """
        Calculates weighted accuracy score metric.
        """
        sub_labels["weight"] = sub_labels.apply(
            lambda x: weight if x[weight_col] else 1, axis=1
        )
        y_pred = sub_labels["isCorrect"].values
        y_true = np.ones_like(y_pred)
        weight = sub_labels["weight"]
        return accuracy_score(y_true, y_pred, sample_weight=weight)

    def score(self, sub, labels_df=None, drop_extra_cols=True):
        """
        Scores the submission file against the labels.

        Returns the evaluation metric score for the helmet
        assignment kaggle competition.

        If `check_constraints` is set to True, will return -999 if the
            submission fails one of the submission constraints.
        """
        if labels_df is None:
            labels_df = self.labels_df.copy()

        if self.check_constraints:
            if not self.check_submission(sub):
                return -999
        sub_labels = self.merge_sub_labels(sub, labels_df, self.weight_col)
        sub_labels = self.get_iou_df(sub_labels).copy()
        sub_labels = self.filter_to_top_label_match(sub_labels).copy()
        sub_labels = self.add_isCorrect_col(sub_labels)
        score = self.calculate_metric_weighted(
            sub_labels, self.weight_col, self.impact_weight
        )
        # Keep `sub_labels for review`
        if drop_extra_cols:
            drop_cols = [
                "x1_sub",
                "x2_sub",
                "y1_sub",
                "y2_sub",
                "x1_gt",
                "x2_gt",
                "y1_gt",
                "y2_gt",
            ]
            sub_labels = sub_labels.drop(drop_cols, axis=1)
        self.sub_labels = sub_labels
        return score

if debug:
    scorer = NFLAssignmentScorer(labels)
    baseline_score = scorer.score(submission_df)
    print(f"validation score {baseline_score:0.4f}") # this would be 0.33

# Video with Predictions

In [None]:
import os
import cv2
import subprocess
from IPython.display import Video, display
import pandas as pd

import os
import cv2
import subprocess
from IPython.display import Video, display
import pandas as pd
import numpy as np

def video_with_predictions(
    video_path: str, sub_labels: pd.DataFrame, max_frame=9999, freeze_impacts=True,
    verbose=True
) -> str:
    """
    Annotates a video with both the baseline model boxes and ground truth boxes.
    """
    VIDEO_CODEC = "MP4V"
    HELMET_COLOR = (0, 0, 0)  # Black
    
    INCORRECT_IMPACT_COLOR = (0, 0, 255)  # Red
    CORRECT_IMPACT_COLOR = (51, 255, 255)  # Yellow

    CORRECT_COLOR = (0, 255, 0)  # Green
    INCORRECT_COLOR = (255, 255, 255)  # White
    WHITE = (255, 255, 255)  # White 

    video_name = os.path.basename(video_path).replace(".mp4", "")
    if verbose:
        print(f"Running for {video_name}")
    sub_labels = sub_labels.copy()
    # Add frame and video columns:
    sub_labels['video'] = sub_labels['video_frame'].str.split('_').str[:3].str.join('_')
    sub_labels['frame'] = sub_labels['video_frame'].str.split('_').str[-1].astype('int')

    vidcap = cv2.VideoCapture(video_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    output_path = "pred_" + video_name + ".mp4"
    tmp_output_path = "tmp_" + output_path
    output_video = cv2.VideoWriter(
        tmp_output_path, cv2.VideoWriter_fourcc(*VIDEO_CODEC), fps, (width, height)
    )
    frame = 0
    while True:
        it_worked, img = vidcap.read()
        if not it_worked:
            break
        frame += 1

        img_name = f"{frame} : {video_name}"
        cv2.putText(
            img,
            img_name,
            (5, 20),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            WHITE,
            thickness=1,
        )
        
        cv2.putText(
            img,
            str(frame),
            (1230, 710),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.8,
            WHITE,
            thickness=1,
        )
        # Get stats about current state in frame
        stats = sub_labels.query('video == @video and frame <= @frame')
        correct_nonimp = len(stats.query('weight == 1 and isCorrect'))
        total_nonimp = len(stats.query('weight == 1'))
        correct_imp = len(stats.query('weight > 1 and isCorrect'))
        total_imp = len(stats.query('weight > 1'))
        correct_weighted = correct_nonimp + (correct_imp * 1000)
        total_weighted = total_nonimp + (total_imp * 1000)
        acc_imp = correct_imp/np.max([1, total_imp])
        acc_nonimp = correct_nonimp/np.max([1, total_nonimp])
        acc_weighted = correct_weighted/np.max([1, total_weighted])
        cv2.putText(
            img,
            f'{acc_imp:0.4f} Impact Boxes Accuracy :      ({correct_imp}/{total_imp})',
            (5, 40),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            WHITE,
            thickness=1,
        )

        cv2.putText(
            img,
            f'{acc_nonimp:0.4f} Non-Impact Boxes Accuracy: ({correct_nonimp}/{total_nonimp})',
            (5, 60),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            WHITE,
            thickness=1,
        )
        
        cv2.putText(
            img,
            f'{acc_weighted:0.4f} Weighted Accuracy:     ({correct_weighted}/{total_weighted})',
            (5, 80),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            WHITE,
            thickness=1,
        )

        
        video_frame = f'{video_name}_{frame}' 
        boxes = sub_labels.query("video_frame == @video_frame")
        if len(boxes) == 0:
            return
        for box in boxes.itertuples(index=False):
            if box.isCorrect and box.weight == 1:
                # CORRECT
                box_color = CORRECT_COLOR
                gt_color = CORRECT_COLOR
                pred_thickness = 1
            elif box.isCorrect and box.weight > 1:
                box_color = CORRECT_IMPACT_COLOR
                gt_color = CORRECT_IMPACT_COLOR
                pred_thickness = 3
            elif (box.isCorrect == False) and (box.weight > 1):
                box_color = INCORRECT_IMPACT_COLOR
                gt_color = INCORRECT_IMPACT_COLOR
                pred_thickness = 3
            elif (box.isCorrect == False) and (box.weight == 1):                
                box_color = INCORRECT_COLOR
                gt_color = HELMET_COLOR
                pred_thickness = 1

            # Ground Truth Box
            cv2.rectangle(
                img,
                (box.left_gt, box.top_gt),
                (box.left_gt + box.width_gt, box.top_gt + box.height_gt),
                gt_color,
                thickness=1,
            )
            # Prediction Box
            cv2.rectangle(
                img,
                (int(box.left_sub), int(box.top_sub)),
                (int(box.left_sub + box.width_sub), int(box.top_sub + box.height_sub)),
                box_color,
                thickness=pred_thickness,
            )

            cv2.putText(
                img,
                f"{box.label_gt}:{box.label_sub}",
                (max(0, box.left_gt - box.width_gt), max(0, box.top_gt - 5)),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                WHITE,
                thickness=1,
            )

        if boxes['weight'].sum() > 22 and freeze_impacts:
            for _ in range(60):
                # Freeze for 60 frames on impacts
                output_video.write(img)
        else:
            output_video.write(img)
        
        if frame >= max_frame:
            break
        
    output_video.release()
    # Not all browsers support the codec, we will re-load the file at tmp_output_path
    # and convert to a codec that is more broadly readable using ffmpeg
    if os.path.exists(output_path):
        os.remove(output_path)
    subprocess.run(
        [
            "ffmpeg",
            "-i",
            tmp_output_path,
            "-crf",
            "18",
            "-preset",
            "veryfast",
            "-vcodec",
            "libx264",
            output_path,
        ]
    )
    os.remove(tmp_output_path)

    return output_path

## Process prediction videos

In [None]:
# Add video column to `sub_labels`
scorer.sub_labels['video'] = scorer.sub_labels['video_frame'] \
    .str.split('_').str[:3].str.join('_')

video_dir = '../input/nfl-health-and-safety-helmet-assignment/train'

out_videos = []
videos = scorer.sub_labels['video'].unique()
for video in tqdm(videos):
    video_out = video_with_predictions(f'{video_dir}/{video}.mp4',
                           scorer.sub_labels,
                          )
    out_videos.append(video_out)

# Display example videos

In [None]:
frac = 0.65 # scaling factor for display
display(Video(data=out_videos[2],
              embed=True,
              height=int(720*frac),
              width=int(1280*frac))
       )

In [None]:
display(Video(data=out_videos[1],
              embed=True,
              height=int(720*frac),
              width=int(1280*frac))
       )