In this notebook I will share one idea to merging traking data with sideline helmet label information.

Main approch is that if we can find specific 4 pair points(cx, cy) which is matching with `tracking images` & `side or endline images`, we can find homography `H` for Perspective Transformation.

in this notebook I'll use field line numbers to find homography `H` between `tracking images` and `sideline images`

Reference: 
- https://www.kaggle.com/robikscube/nfl-helmet-assignment-getting-started-guide
- https://www.kaggle.com/c/nfl-health-and-safety-helmet-assignment/discussion/264361#1467283
- https://www.kaggle.com/coldfir3/camera-tracking-matching-with-gradient-descent
- https://www.kaggle.com/go5kuramubon/merge-label-and-tracking-data

In [None]:
!pip install imageio-ffmpeg
# !pip install clone+https://github.com/luna983/stitch-aerial-photos.git

In [None]:
import os
import cv2
import imageio
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
from tqdm.auto import tqdm

# Prepare

In [None]:
## https://www.kaggle.com/go5kuramubon/merge-label-and-tracking-data

# Read in data files
BASE_DIR = '../input/nfl-health-and-safety-helmet-assignment'

# Labels and sample submission
labels = pd.read_csv(f'{BASE_DIR}/train_labels.csv')
ss = pd.read_csv(f'{BASE_DIR}/sample_submission.csv')

# Player tracking data
tr_tracking = pd.read_csv(f'{BASE_DIR}/train_player_tracking.csv')
te_tracking = pd.read_csv(f'{BASE_DIR}/test_player_tracking.csv')

# Baseline helmet detection labels
tr_helmets = pd.read_csv(f'{BASE_DIR}/train_baseline_helmets.csv')
te_helmets = pd.read_csv(f'{BASE_DIR}/test_baseline_helmets.csv')

# Extra image labels
img_labels = pd.read_csv(f'{BASE_DIR}/image_labels.csv')

In [None]:
##https://www.kaggle.com/robikscube/nfl-helmet-assignment-getting-started-guide

def add_track_features(tracks, fps=59.94, snap_frame=10):
    """
    Add column features helpful for syncing with video data.
    """
    tracks = tracks.copy()
    tracks["game_play"] = (
        tracks["gameKey"].astype("str")
        + "_"
        + tracks["playID"].astype("str").str.zfill(6)
    )
    tracks["time"] = pd.to_datetime(tracks["time"])
    snap_dict = (
        tracks.query('event == "ball_snap"')
        .groupby("game_play")["time"]
        .first()
        .to_dict()
    )
    tracks["snap"] = tracks["game_play"].map(snap_dict)
    tracks["isSnap"] = tracks["snap"] == tracks["time"]
    tracks["team"] = tracks["player"].str[0].replace("H", "Home").replace("V", "Away")
    tracks["snap_offset"] = (tracks["time"] - tracks["snap"]).astype(
        "timedelta64[ms]"
    ) / 1_000
    # Estimated video frame
    tracks["est_frame"] = (
        ((tracks["snap_offset"] * fps) + snap_frame).round().astype("int")
    )
    return tracks


tr_tracking = add_track_features(tr_tracking)
te_tracking = add_track_features(te_tracking)


In [None]:
def merge_label_and_tracking(tracking_df, label_df):

    tracking_with_game_index = tracking_df.set_index(["gameKey", "playID", "player"])

    df_list = []

    for key, _label_df in tqdm(label_df.groupby(["gameKey", "playID", "view", "label"])):
        # skip because there are sideline player
        if key[3] == "H00" or key[3] == "V00":
            continue

        tracking_data = tracking_with_game_index.loc[(key[0], key[1], key[3])]
        _label_df = _label_df.sort_values("frame")

        # merge with frame and est_frame
        merged_df = pd.merge_asof(
            _label_df,
            tracking_data,
            left_on="frame",
            right_on="est_frame",
            direction='nearest',
        )
        df_list.append(merged_df)

    all_merged_df = pd.concat(df_list)
    all_merged_df = all_merged_df.sort_values(["video_frame", "label"], ignore_index=True)
    
    return all_merged_df

In [None]:
merged_df = merge_label_and_tracking(tr_tracking, labels)

In [None]:
merged_df.frame

In [None]:
unique_gameKeys = merged_df.gameKey.unique()
check_frame = 1
homography_df = merged_df[(merged_df.gameKey == unique_gameKeys[0]) & (merged_df.frame == check_frame) & (merged_df.view =='Sideline')].copy()
homography_df.head()

## PerspectiveTransform

If we know matched Keypoints in the images, we can find homography `H` using `cv2.findHomography`. 

below code show how we can transform sideline helmet boxes to tracking data scale.

In [None]:
trakcing_coordinate = np.float32(list(zip(homography_df['x'],53.33-homography_df['y']))).reshape(-1,1,2)
label_coordinate =  np.float32(list(zip(homography_df['left']+homography_df['width']/2,homography_df['top']-homography_df['height']/2))).reshape(-1,1,2)

In [None]:
H, mask = cv2.findHomography(label_coordinate, trakcing_coordinate)
transformed_coordinate =  cv2.perspectiveTransform(label_coordinate, H)

In [None]:
print(H)

In [None]:
plt.figure(figsize=(12,10))

plt.scatter(transformed_coordinate[:, :, 0],transformed_coordinate[:, :, 1])

plt.scatter(homography_df['x'], 53.33-homography_df['y'])

plt.legend(['Transformed coordinate from Sideline helmet box','Ground truth tracking data'])

But important thing is that we can't match each keypoints exactly becaues we don't have a enough information to find homography.

If we can match specific pair points with tracking images, side & endzone images, we might find good homography.

I'll use filed line numbers to match both images

## Video to Frame

In [None]:
video_name = homography_df.video.unique()
video_path = f"{BASE_DIR}/train/{video_name[0]}"

vid = imageio.get_reader(video_path, 'ffmpeg')
img = vid.get_data(check_frame - 1)
plt.figure(figsize=(12, 10))
plt.imshow(img)

## Finding filed line number points in sideline

In [None]:
line_numbers = [[110, 600],  ## Home Sideline 20
                [550, 630],  ## Home Sideline 30
                [990, 680],  ## Home Sideline 40
                [1150, 200], ## Victory Sideline 40
                [770, 200]]  ## Victory Sideline 30
for line_number in line_numbers:
    img = cv2.circle(img, (line_number[0],line_number[1]), radius=2, color=(0, 255, 255), thickness=10)

plt.figure(figsize=(12, 10))
plt.imshow(img)    

## 

## Finding filed line number points in tracking data
![images](https://drive.google.com/uc?export=view&id=1IdUQHo9G673ifp-mIrwiG_ep0q88H13N)

If we treat tracking `x`, `y`  as pair points in this images, we can guess that Finding filed line number points.

In [None]:
projection_numbers = [[30, 53.3-10], ## Home Sideline 20
                      [40, 53.3-10], ## Home Sideline 30
                      [50, 53.3-10], ## Home Sideline 40
                      [50, 10],      ## Victory Sideline 40
                      [40, 10]]      ## Victory Sideline 30

In [None]:
H, mask = cv2.findHomography(np.float32(line_numbers).reshape(5, 2), np.float32(projection_numbers).reshape(5, 2))

In [None]:
print(H)

In [None]:
transformed_coordinate =  cv2.perspectiveTransform(label_coordinate, H)

In [None]:
plt.figure(figsize=(12,10))

plt.scatter(transformed_coordinate[:, :, 0],transformed_coordinate[:, :, 1])

plt.scatter(homography_df['x'], 53.33-homography_df['y'])

plt.legend(['Transformed coordinate from Sideline helmet box','Ground truth tracking data'])

## Next to 
- Matching label using homography information
- Build filed number detection model ? 
- Merging with MOT models like deepsort, FairMOT

matches**Feature Matching**

In [None]:
class SiftKpDesc():
    def __init__(self, kp, desc):
        # List of keypoints in (x,y) crd -> N x 2
        self.kp = kp

        # List of Descriptors at keypoints : N x 128
        self.desc = desc


class SiftMatching:

    _BLUE = [255, 0, 0]
    _GREEN = [0, 255, 0]
    _RED = [0, 0, 255]
    _CYAN = [255, 255, 0]

    _line_thickness = 2
    _radius = 5
    _circ_thickness = 2


    def __init__(self, img_1, img_2, results_fldr='', nfeatures=2000, gamma=0.8):

#         fname_1 = os.path.basename(img_1_path)
#         fname_2 = os.path.basename(img_2_path)

#         if not results_fldr:
#             results_fldr = os.path.split(img_1_path)[0]

#         self.result_fldr = os.path.join(results_fldr, 'results')

#         self.prefix = fname_1.split('.')[0] + '_' + fname_2.split('.')[0]

#         if not os.path.exists(self.result_fldr):
#             os.makedirs(self.result_fldr)

        self.img_1_bgr = img_1
        self.img_2_bgr = img_2

        self.nfeatures = nfeatures
        self.gamma = gamma


    def read_image(self, img_path):

        img_bgr = cv2.imread(img_path, cv2.IMREAD_COLOR)

        return img_bgr



    def get_sift_features(self, img_bgr, nfeatures=2000):

        img_gray = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2GRAY)

        sift_obj = cv2.xfeatures2d.SIFT_create(nfeatures)

        # kp_list_obj is a list of "KeyPoint" objects with location stored as tuple in "pt" attribute
        kp_list_obj, desc = sift_obj.detectAndCompute(image=img_gray, mask=None)

        kp = [x.pt for x in kp_list_obj]

        return SiftKpDesc(kp, desc)


    def match_features(self, sift_kp_desc_obj1, sift_kp_desc_obj2, gamma=0.8):
        correspondence = []  # list of lists of [x1, y1, x2, y2]

        for i in range(len(sift_kp_desc_obj1.kp)):
            sc = np.linalg.norm(sift_kp_desc_obj1.desc[i] - sift_kp_desc_obj2.desc, axis=1)
            idx = np.argsort(sc)

            val = sc[idx[0]] / sc[idx[1]]

            if val <= gamma:
                correspondence.append([*sift_kp_desc_obj1.kp[i], *sift_kp_desc_obj2.kp[idx[0]]])

        return correspondence


    def draw_correspondence(self, correspondence, img_1, img_2):

        if len(img_1.shape) == 2:
            img_1 = np.repeat(img_1[:, :, np.newaxis], 3, axis=2)

        if len(img_2.shape) == 2:
            img_2 = np.repeat(img_2[:, :, np.newaxis], 3, axis=2)

        h, w, _ = img_1.shape

        img_stack = np.hstack((img_1, img_2))

        for x1, y1, x2, y2 in correspondence:
            x1_d = int(round(x1))
            y1_d = int(round(y1))

            x2_d = int(round(x2) + w)
            y2_d = int(round(y2))

            cv2.circle(img_stack, (x1_d, y1_d), radius=self._radius, color=self._BLUE,
                       thickness=self._circ_thickness, lineType=cv2.LINE_AA)

            cv2.circle(img_stack, (x2_d, y2_d), radius=self._radius, color=self._BLUE,
                       thickness=self._circ_thickness, lineType=cv2.LINE_AA)

            cv2.line(img_stack, (x1_d, y1_d), (x2_d, y2_d), color=self._CYAN,
                     thickness=self._line_thickness)

        fname = os.path.join(self.result_fldr, self.prefix + '_sift_corr.jpg')
        cv2.imwrite(fname, img_stack)


    def run(self):

        sift_kp_desc_obj1 = self.get_sift_features(self.img_1_bgr, nfeatures=self.nfeatures)
        sift_kp_desc_obj2 = self.get_sift_features(self.img_2_bgr, nfeatures=self.nfeatures)

        correspondence = self.match_features(sift_kp_desc_obj1, sift_kp_desc_obj2, gamma=self.gamma)

        self.draw_correspondence(correspondence, self.img_1_bgr, self.img_2_bgr)

        return correspondence

In [None]:
import numpy as np

In [None]:
import cv2
img = vid.get_data(check_frame - 1)
img2 = vid.get_data(check_frame)


In [None]:
import cv2
   
video_name = homography_df.video.unique()
# video_path = f"{BASE_DIR}/train/57905_002404_Sideline.mp4"
video_path = f"{BASE_DIR}/train/57584_000336_Sideline.mp4"

vid = imageio.get_reader(video_path, 'ffmpeg')
    

In [None]:
# Read the query image as query_img
# and traing image This query image
# is what you need to find in train image
# Save it in the same directory
# with the name image.jpg  
query_img = np.array(vid.get_data(0))
train_img = np.array(vid.get_data(100))

# query_img = cv2.resize(query_img,(360,640))
# train_img = cv2.resize(train_img,(360,640))

# alpha = 1 # Contrast control (1.0-3.0)
# beta = 64.0 # Brightness control (0-100)

# train_img = cv2.convertScaleAbs(train_img, alpha=alpha, beta=beta)
# query_img = cv2.convertScaleAbs(query_img, alpha=alpha, beta=beta)



# Convert it to grayscale
query_img_bw = cv2.cvtColor(query_img,cv2.COLOR_BGR2GRAY)
train_img_bw = cv2.cvtColor(train_img, cv2.COLOR_BGR2GRAY)

query_img_bw = cv2.GaussianBlur(query_img_bw,(35,35),cv2.BORDER_DEFAULT)
train_img_bw = cv2.GaussianBlur(train_img_bw,(35,35),cv2.BORDER_DEFAULT)

# query_img_bw = cv2.adaptiveThreshold(query_img_bw,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,51,28)
# train_img_bw = cv2.adaptiveThreshold(train_img_bw,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,51,28)

# kernel = np.ones((2, 2), 'uint8')

# query_img_bw = cv2.adaptiveThreshold(query_img_bw,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
# query_img_bw = cv2.dilate(query_img_bw, kernel, iterations=1)

# train_img_bw = cv2.adaptiveThreshold(train_img_bw,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
# train_img_bw = cv2.dilate(train_img_bw, kernel, iterations=1)

# se1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
# se2 = cv2.getStructuringElement(cv2.MORPH_RECT, (1,1))
# query_img_bw = cv2.morphologyEx(query_img_bw, cv2.MORPH_CLOSE, se1)
# query_img_bw = cv2.morphologyEx(query_img_bw, cv2.MORPH_OPEN, se2)
# kernel = np.ones((3, 3), np.uint8)
# query_img_bw = query_img_bw-cv2.erode(query_img_bw, kernel)
# query_img_bw[query_img_bw < 0] = 255

# se1 = cv2.getStructuringElement(cv2.MORPH_RECT, (5,5))
# se2 = cv2.getStructuringElement(cv2.MORPH_RECT, (1,1))
# train_img_bw = cv2.morphologyEx(train_img_bw, cv2.MORPH_CLOSE, se1)
# train_img_bw = cv2.morphologyEx(train_img_bw, cv2.MORPH_OPEN, se2)
# kernel = np.ones((3, 3), np.uint8)
# train_img_bw = train_img_bw-cv2.erode(train_img_bw, kernel)
# train_img_bw[train_img_bw < 0] = 255

# Initialize the ORB detector algorithm
orb = cv2.ORB_create(150)
   
# Now detect the keypoints and compute
# the descriptors for the query image
# and train image
queryKeypoints, queryDescriptors = orb.detectAndCompute(query_img_bw,None)
trainKeypoints, trainDescriptors = orb.detectAndCompute(train_img_bw,None)

# Initialize the Matcher for matching
# the keypoints and then match the
# keypoints
matcher = cv2.BFMatcher()
matches = matcher.match(queryDescriptors,trainDescriptors)
   
# draw the matches to the final image
# containing both the images the drawMatches()
# function takes both images and keypoints
# and outputs the matched query image with
# its train image
final_img = cv2.drawMatches(query_img, queryKeypoints, 
train_img, trainKeypoints, matches[0:3],None)
   
final_img = cv2.resize(final_img, (1000,650))
  
# Show the final image
plt.figure(figsize=(12,10))
plt.imshow(final_img)

In [None]:
print(queryKeypoints[0].pt)
print(trainKeypoints[0].pt)

In [None]:
train_img.shape
print(720/2,1280/2)

In [None]:
# a = query_img_bw.astype(np.float)
# gray = np.float32(query_img)
# dst = cv2.cornerHarris(query_img_bw,2,3,0.04)
# dst = cv2.dilate(dst,None)
# query_img[dst>0.01*dst.max()]=[0,0,255]
# plt.figure(figsize=(12,10))
# plt.imshow(query_img)

kernel = np.ones((2, 2), 'uint8')
img = cv2.adaptiveThreshold(query_img_bw,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,51,28)
# img = cv2.dilate(img, kernel, iterations=1)

plt.figure(figsize=(12,10))
plt.imshow(img)

In [None]:
gray = query_img_bw.copy()
img = query_img.copy()

edges = cv2.Canny( gray,50,150,apertureSize = 3)

# Show result
plt.figure(figsize=(15,10))
plt.imshow(edges)

In [None]:
print(queryKeypoints[matches[2].queryIdx].pt)
print(trainKeypoints[matches[2].trainIdx].pt)
# queryDescriptors,
# trainDescriptors

In [None]:
arr = []
for i in range(len(matches)):
    if matches[i].trainIdx in arr:
        print("arr!!!!",matches[i].trainIdx)
        arr.append(matches[i].trainIdx) 
    else:
        arr.append(matches[i].trainIdx) 

In [None]:
arr[92]

In [None]:
plt.figure(figsize=(15,10))
plt.imshow(query_img_bw)

In [None]:
os.listdir(f"{BASE_DIR}/train/")