# Player tracker (WIP)

Using the pretrained yolov5 model to detect persons. With the detections trying to follow each individual player by choosing the nearest bounding box in the next frame. This approach has multiple issues e.g. sometimes two bounding boxes are merged to one, players leave the frame, etc.

In [None]:
!pip install ffmpeg-python

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from PIL import Image, ImageDraw
from scipy.spatial import distance_matrix
import cv2
import torch
from IPython.display import Video
import ffmpeg

In [None]:
# Get the frames

def get_frames(filepath):
    if not os.path.exists(filepath):
        raise OSError(f"The filepath {filepath} does not exist.")
    vidcap = cv2.VideoCapture(filepath)
    frames = []
    success, frame = vidcap.read()
    while success:
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Change to rgb
        frames.append(frame)
        success, frame = vidcap.read()  # Get the next frame
    return frames


root = "/kaggle/input/nfl-health-and-safety-helmet-assignment"
example_filepath = f"{root}/train/57784_001741_Sideline.mp4"

frames = get_frames(example_filepath)

frame_height, frame_width = frames[0].shape[:2]

In [None]:
# Get the yolov5 model. Luckily it has been trained to detect humans (along other things as well).
model = torch.hub.load('ultralytics/yolov5', 'yolov5s')

# Fix the plotting...
%matplotlib inline

In [None]:
def get_predictions(frames):
    predictions = []
    for frame in tqdm(frames):
        df = model(frame).pandas().xyxy[0]  # Predict
        df = df[df["class"] == 0]  # Restrict to persons
        df = df.reset_index(drop=True)

        # Calculate some properties
        df[["center_x", "center_y"]] = df.apply(
            lambda row: [row["xmin"] + (row["xmax"] - row["xmin"]) // 2, row["ymin"] + (row["ymax"] - row["ymin"]) // 2], 
            axis=1, 
            result_type="expand"
        )
        df["area"] = df.apply(lambda row: (row["xmax"] - row["xmin"]) * (row["ymax"] - row["ymin"]), axis=1)
        predictions.append(df)
    return predictions


def draw_bounding_boxes(frames, predictions, color):
    frames_with_bbox = []
    # Draw rectangles
    for frame, df in tqdm(zip(frames, predictions)):
        im = Image.fromarray(frame)
        im_draw = ImageDraw.Draw(im)
        for _, row in df.iterrows():  # Iterate persons
            im_draw.rectangle([(row["xmin"], row["ymin"]), (row["xmax"], row["ymax"])], outline=color)
        frames_with_bbox.append(np.array(im))
    return frames_with_bbox


predictions = get_predictions(frames)
frames_with_bbox = draw_bounding_boxes(frames, predictions, "black")

In [None]:
# Test that the bounding boxes are there
plt.figure(figsize=(10, 8))
plt.imshow(frames_with_bbox[0])

In [None]:
# Make a video with the bounding boxes

# Code copied from https://github.com/kkroening/ffmpeg-python/issues/246#issuecomment-520200981
def vidwrite(fn, images, framerate, vcodec='libx264'):
    if not isinstance(images, np.ndarray):
        images = np.asarray(images)
    n,height,width,channels = images.shape
    process = (
        ffmpeg
            .input('pipe:', format='rawvideo', pix_fmt='rgb24', s='{}x{}'.format(width, height))
            .output(fn, pix_fmt='yuv420p', vcodec=vcodec, r=framerate)
            .overwrite_output()
            .run_async(pipe_stdin=True)
    )
    for frame in tqdm(images):
        process.stdin.write(
            frame
                .astype(np.uint8)
                .tobytes()
        )
    process.stdin.close()
    process.wait()

# The framerate might be incorrect but does not matter at this point
vidwrite("output.mp4", frames_with_bbox, framerate=60)  

In [None]:
Video("/kaggle/working/output.mp4", embed=True, width=800)

In [None]:
def update_bounding_boxes(df_current, df_next, last_changed):
    """Update bounding boxes for the next frame.
    
    Using simple tracking by choosing the nearest bounding box 
    from the current bounding boxes.
    """
    # Get the closest bounding box in the next frame from the 
    # current bounding boxes.
    d_matrix = distance_matrix(
        df_current[["center_x", "center_y"]].values, 
        df_next[["center_x", "center_y"]].values
    )
    sorted_indexes = np.argsort(d_matrix.flatten())
    mappings = {}
    count = 0
    for k, index in enumerate(sorted_indexes):
        i = index // d_matrix.shape[1]
        j = index % d_matrix.shape[1]
        if j in mappings:
            continue
        else:
            mappings[j] = i
            count += 1
            if count == d_matrix.shape[1]:
                break
    
    # Update the current bounding boxes with the new bounding boxes.
    for j, i in mappings.items():
        df_current.loc[i] = df_next.loc[j]
        
    # Remove bounding boxes that try to leave the frame.
    remove_these = []
    for i, n in last_changed.items():
        if i in mappings.values():
            last_changed[i] = 0
        else:
            last_changed[i] += 1
        if n > 3:
            if (df_current.loc[i, "ymin"] < 5 
                or df_current.loc[i, "xmin"] < 5 
                or df_current.loc[i, "ymax"] > frame_height - 5 
                or df_current.loc[i, "xmin"] > frame_width - 5
               ):
                df_current = df_current.drop(i)
                remove_these.append(i)
    for i in remove_these:
        del last_changed[i]

    return df_current, last_changed


# Let's try to follow each player
df_current = predictions[0].copy()
history = [df_current.copy()]
last_changed = {i: 0 for i in range(len(df_current))}

tmps = []
for i in tqdm(range(1, len(predictions))):
    df = predictions[i]
    df_current, last_changed = update_bounding_boxes(df_current, df, last_changed)
    history.append(df_current.copy())

In [None]:
# Draw blue bounding boxes. Each blue box tries to present each individual player.
frames_with_bbox2 = draw_bounding_boxes(frames_with_bbox, history, "blue")

vidwrite("output2.mp4", frames_with_bbox2, framerate=60)

In [None]:
Video("/kaggle/working/output2.mp4", embed=True, width=800)

In [None]:
# TODO: Handle multiple boxes trying to present a single player. 
# This seems to be caused by some bounding box leaving another player.

In [None]:
# Draw blue bounding boxes. Each blue box tries to present each individual player.
frames_with_bbox3 = draw_bounding_boxes(frames, [df.loc[0:0] for df in history], "blue")

vidwrite("output3.mp4", frames_with_bbox3, framerate=60)

In [None]:
Video("/kaggle/working/output3.mp4", embed=True, width=800)

In [None]:
# The beginning goes well, but at the end the bounding boxes jump everywhere.
# Let's investigate a single player and its speed.

player_bbox = [df.loc[0] for df in history if 0 in df.index]
df_player = pd.DataFrame([s[["center_x", "center_y"]].to_list() for s in player_bbox], columns=["center_x", "center_y"])

df_player[["speed_x", "speed_y"]] = df_player[["center_x", "center_y"]] - df_player.shift(1)[["center_x", "center_y"]]
df_player.loc[0, ["speed_x", "speed_y"]] = 0
df_player["speed"] = df_player["speed_x"] ** 2 + df_player["speed_y"] ** 2
df_player

In [None]:
fig, axs = plt.subplots(1, 2, figsize=(14, 6))
df_player["speed"].plot(ax=axs[0])
df_player.loc[:190, "speed"].plot(ax=axs[1])

In [None]:
# I guess this could be smoothed out to get rid of the huge jumps.