In [None]:
# Install helmet-assignment helper code
!pip install ../input/helmet-assignment-helpers/helmet-assignment-main/ > /dev/null 2>&1
from helmet_assignment.score import NFLAssignmentScorer, check_submission
from helmet_assignment.features import add_track_features

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input as InceptionV3_preprocess_input
from tensorflow.keras.applications.efficientnet import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input as Efficient_preprocess_input
from tensorflow.keras.applications.resnet import ResNet50
from tensorflow.keras.applications.resnet import preprocess_input as ResNet50_preprocess_input
from tensorflow.keras.applications.efficientnet import EfficientNetB7
from tensorflow.keras.applications.vgg16 import preprocess_input as VGG_preprocess_input 
from tensorflow.keras.applications.vgg16 import VGG16
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from sklearn.cluster import SpectralBiclustering
import matplotlib.pylab as plt
from tqdm.auto import tqdm
from PIL import Image
import shutil
import cv2
import os

In [None]:
n_test_videos = len(os.listdir('../input/nfl-health-and-safety-helmet-assignment/test/'))
# Run in debug mode unless during submission
if n_test_videos == 6:
    debug = True
else:
    debug = False
# Read in the data.
BASE_DIR = '../input/nfl-health-and-safety-helmet-assignment'
n_debug_samples = 2
random_state = 42

labels = pd.read_csv(f'{BASE_DIR}/train_labels.csv')
if debug:
    tracking = pd.read_csv(f'{BASE_DIR}/train_player_tracking.csv')
    helmets = pd.read_csv(f'{BASE_DIR}/train_labels.csv')
else:
    tracking = pd.read_csv(f'{BASE_DIR}/test_player_tracking.csv')
    helmets = pd.read_csv(f'{BASE_DIR}/test_baseline_helmets.csv')
    
tracking = add_track_features(tracking)

In [None]:
def add_cols(df):
    df['game_play'] = df['video_frame'].str.split('_').str[:2].str.join('_')
    if 'video' not in df.columns:
        df['video'] = df['video_frame'].str.split('_').str[:3].str.join('_') + '.mp4'
    return df

if debug:
    helmets = add_cols(helmets)
    labels = add_cols(labels)
    # Select `n_debug_samples` worth of videos to debug with
    sample_videos = labels['video'].drop_duplicates() \
        .sample(n_debug_samples, random_state=random_state)[labels['view']=='Sideline'].to_list()
    sample_gameplays = ['_'.join(x.split('_')[:2]) for x in sample_videos]
    tracking = tracking[tracking['game_play'].isin(sample_gameplays)]
    helmets = helmets[helmets['video'].isin(sample_videos)]
    labels = labels[labels['video'].isin(sample_videos)]
tracking.shape, helmets.shape, labels.shape

# Deepsort Postprocessing


In [None]:
import sys
sys.path.append('../input/easydict-master/easydict-master/')
sys.path.append('../input/yolov5-deepsort-pytorch/Yolov5_DeepSort_Pytorch-master/Yolov5_DeepSort_Pytorch-master/deep_sort_pytorch/')
from deep_sort.deep_sort import DeepSort
from utils.parser import get_config

## Deepsort config


In [None]:
%%writefile deepsort.yaml

DEEPSORT:
  REID_CKPT: "../input/yolov5-deepsort-pytorch/ckpt.t7"
  MAX_DIST: 0.2
  MIN_CONFIDENCE: 0.3
  NMS_MAX_OVERLAP: 0.5
  MAX_IOU_DISTANCE: 0.9
  MAX_AGE: 15
  N_INIT: 1
  NN_BUDGET: 40

In [None]:
"""
Helper functions from yolov5 to plot deepsort labels.
"""

def compute_color_for_id(label):
    """
    Simple function that adds fixed color depending on the id
    """
    palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)

    color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
    return tuple(color)

def plot_one_box(x, im, color=None, label=None, line_thickness=3):
    # Plots one bounding box on image 'im' using OpenCV
    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label: 
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    return im

In [None]:
def deepsort_helmets(video_data,
                     video_dir,
                     deepsort_config='deepsort.yaml',
                     plot=False,
                     plot_frames=[]):
    
    # Setup Deepsort
    cfg = get_config()
    cfg.merge_from_file(deepsort_config)    
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    
    # Run through frames.
    video_data = video_data.sort_values('frame').reset_index(drop=True)
    
    video_file = f'{video_dir}/{myvideo}.mp4'
    if os.path.exists('/kaggle/working/temp'):
        shutil.rmtree('/kaggle/working/temp')
    os.mkdir('/kaggle/working/temp')
    !ffmpeg \
        -hide_banner \
        -loglevel fatal \
        -nostats \
        -i $video_file temp/%d.png
    
    ds = []
    for frame, d in tqdm(video_data.groupby(['frame']), total=video_data['frame'].nunique()):
        d['x'] = (d['left'] + round(d['width'] / 2))
        d['y'] = (d['top'] + round(d['height'] / 2))

        xywhs = d[['x','y','width','height']].values
        
        image = Image.open(f'/kaggle/working/temp/{frame}.png')
        image = np.array(image)

        confs = np.ones([len(d),])
        clss =  np.zeros([len(d),])
        outputs = deepsort.update(xywhs, confs, clss, image)

        if (plot and frame > cfg.DEEPSORT.N_INIT) or (frame in plot_frames):
            for j, (output, conf) in enumerate(zip(outputs, confs)): 

                bboxes = output[0:4]
                id = output[4]
                cls = output[5]

                c = int(cls)  # integer class
                label = f'{id}'
                color = compute_color_for_id(id)
                im = plot_one_box(bboxes, image, label=label, color=color, line_thickness=2)
            fig, ax = plt.subplots(figsize=(15, 10))
            video_frame = d['video_frame'].values[0]
            ax.set_title(f'Deepsort labels: {video_frame}')
            plt.imshow(im)
            plt.show()

        preds_df = pd.DataFrame(outputs, columns=['left','top','right','bottom','deepsort_cluster','class'])
        if len(preds_df) > 0:
            # TODO Fix this messy merge
            d = pd.merge_asof(d.sort_values(['left','top']),
                              preds_df[['left','top','deepsort_cluster']] \
                              .sort_values(['left','top']), on='left', suffixes=('','_deepsort'),
                              direction='nearest')
        ds.append(d)
    shutil.rmtree('/kaggle/working/temp')
    dout = pd.concat(ds)
    return dout

def add_deepsort_team_cluster_col(out):
    # Find the top occuring label for each deepsort_cluster
    sortlabel_map = out.groupby('deepsort_cluster')['team_cluster'].value_counts() \
        .sort_values(ascending=False).to_frame() \
        .rename(columns={'team_cluster':'team_cluster_count'}) \
        .reset_index() \
        .groupby(['deepsort_cluster']) \
        .first()['team_cluster'].to_dict()
    # Find the # of times that label appears for the deepsort_cluster.
    sortlabelcount_map = out.groupby('deepsort_cluster')['team_cluster'].value_counts() \
        .sort_values(ascending=False).to_frame() \
        .rename(columns={'team_cluster':'team_cluster_count'}) \
        .reset_index() \
        .groupby(['deepsort_cluster']) \
        .first()['team_cluster_count'].to_dict()
    
    out['team_cluster_deepsort'] = out['deepsort_cluster'].map(sortlabel_map)
    out['team_cluster_count_deepsort'] = out['deepsort_cluster'].map(sortlabelcount_map)

    return out


## Deepsort for tream players tracking

In [None]:
# Add video and frame columns to submission.
helmets['video'] = helmets['video_frame'].str.split('_').str[:3].str.join('_')
helmets['frame'] = helmets['video_frame'].str.split('_').str[-1].astype('int')

if debug:
    video_dir = '../input/nfl-health-and-safety-helmet-assignment/train/'
else:
    video_dir = '../input/nfl-health-and-safety-helmet-assignment/test/'

# Loop through test videos and apply. If in debug mode show the score change.
outs = []
for myvideo, video_data in tqdm(helmets.groupby('video'), total=helmets['video'].nunique()):
    print(f'==== {myvideo} ====')
    if debug:
        # Plot deepsort labels when in debug mode.
        out = deepsort_helmets(video_data, video_dir, plot_frames=[])
    else:
        out = deepsort_helmets(video_data, video_dir)        
    outs.append(out)
helmets = pd.concat(outs).copy()

In [None]:
helmets_df_list = []

for index,sample_helmets in list(helmets.groupby('video_frame')):
    if sample_helmets['frame'].values[0] == 1:
        sample_video = sample_helmets['video'].values[0]
        video_frame_1 = helmets.query('video == @sample_video & frame == 3').copy()
        video_frame_1['frame'] = 1
        video_frame_1['video_frame'] = sample_helmets['video_frame'].values[0]
        helmets_df_list.append(video_frame_1)
    elif sample_helmets['frame'].values[0] == 2:
        video_frame_2 = helmets.query('video == @sample_video & frame == 3').copy()
        video_frame_2['frame'] = 2
        video_frame_2['video_frame'] = sample_helmets['video_frame'].values[0]
        helmets_df_list.append(video_frame_2)
    else:
        sample_video = sample_helmets['video'].values[0]
        helmets_df_list.append(sample_helmets) 
        
helmets_df = pd.concat(helmets_df_list)

# clustering players for each team

In [None]:
# for loading/processing the images  
from keras.preprocessing.image import load_img 
from keras.preprocessing.image import img_to_array 
from keras.applications.vgg16 import preprocess_input 

# models 
from keras.applications.vgg16 import VGG16 
from keras.models import Model

# clustering and dimension reduction
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

# for everything else
import os
import numpy as np
import matplotlib.pyplot as plt
from random import randint
import pandas as pd
import pickle

In [None]:
video_paths = []
if debug == True:
    for video in helmets_df['video'].unique():
        video_paths.append(f'../input/nfl-health-and-safety-helmet-assignment/train/'+str(video + '.mp4'))
else:
    for video in helmets_df['video'].unique():
        video_paths.append(f'../input/nfl-health-and-safety-helmet-assignment/test/'+str(video + '.mp4'))

In [None]:
helmets_df['team_cluster'] = 0

In [None]:
def extract_features(bbox_images):
    features = []
    #model = EfficientNetB0(weights='imagenet', include_top=False)
    model = VGG16(weights='imagenet', include_top=False)
    #inputs = Efficient_preprocess_input(bbox_images)
    inputs = VGG_preprocess_input(bbox_images)
    outputs = model.predict(inputs,batch_size=bbox_images.shape[0])
    for x in outputs.tolist():
        features.append(np.array(x).reshape(-1))
    return features 

In [None]:
import glob
from tqdm.auto import tqdm

for video_path in video_paths:
    all_frame_numbers = []
    images_features = []
    training_features = []
    video_file = video_path
    video_name = video_path.split('/')[-1]
    video_name = video_name[:-4]
    if os.path.exists('/kaggle/working/temp'):
        shutil.rmtree('/kaggle/working/temp')
    os.mkdir('/kaggle/working/temp')
    !ffmpeg \
        -hide_banner \
        -loglevel fatal \
       -nostats \
        -i $video_file temp/%d.png
    for image_path in tqdm(glob.glob('/kaggle/working/temp/*')):
        original_image = Image.open(image_path)
        original_image = np.array(original_image)
        frame_number = image_path.split('/')[-1]
        frame_number = int(frame_number[:-4])
        all_frame_numbers.append(frame_number)
        bbox_images = []
        ratio = 0.01
        image_info = helmets_df[['left','width','top','height','video','frame']].query('video == @video_name & frame == @frame_number')
        for index,bbox in image_info.iterrows():
            x_start = bbox['left'] + int(bbox['width']*ratio)
            y_start = bbox['top'] + int(bbox['height']*ratio)
            
            x_end = bbox['left'] + int(bbox['width']*(1-ratio))
            y_end = bbox['top'] + int(bbox['height']*(1-ratio))
            bbox_image = original_image[y_start:y_end,x_start:x_end]
            resized_bbox_image = cv2.resize(bbox_image,(224, 224),interpolation=cv2.INTER_AREA)
            bbox_images.append(resized_bbox_image)
            
        bbox_images = np.array(bbox_images).reshape(-1,224,224,3)
        frame_features = extract_features(bbox_images)
        images_features.append(frame_features)
        for bbox_features in frame_features:
            training_features.append(np.array(bbox_features))
    pca = PCA(n_components=1000, random_state=22)
    pca.fit(np.array(training_features))
    new_training_features = pca.transform(training_features) 
    clusters = SpectralBiclustering(n_clusters=2, random_state=0).fit(new_training_features)
    i = 0
    for n,image_features in enumerate(images_features):
        frame_number = all_frame_numbers[n]
        helmets_df['team_cluster'].loc[(helmets_df['video'] == video_name) & (helmets_df['frame'] == frame_number)] = clusters.row_labels_[i:i+len(image_features)]
        i = i + len(image_features)
    print(f'======={video_name}=========')
    if debug == True:
        helmets_df['ground_truth_clusters'] = helmets_df['label'].str[0]
        helmets_df['ground_truth_clusters'] = helmets_df['ground_truth_clusters'].str.replace('V', '0')
        helmets_df['ground_truth_clusters'] = helmets_df['ground_truth_clusters'].str.replace('H', '1')    
        diff = [int(i) for i in helmets_df['ground_truth_clusters'].values] - helmets_df['team_cluster'].values
        total_error_case1_without_deepsort = np.absolute(diff).sum()
        print(f'video_name{video_name}')
        print(f'total error case1(V:0 & H:1) without deepsort:{total_error_case1_without_deepsort}')
        helmets_df['ground_truth_clusters'] = helmets_df['label'].str[0]
        helmets_df['ground_truth_clusters'] = helmets_df['ground_truth_clusters'].str.replace('V', '1')
        helmets_df['ground_truth_clusters'] = helmets_df['ground_truth_clusters'].str.replace('H', '0')    
        diff = [int(i) for i in helmets_df['ground_truth_clusters'].values] - helmets_df['team_cluster'].values
        total_error_case2_without_deepsort = np.absolute(diff).sum()
        print(f'total error case2(V:1 & H:0) without deepsort:{total_error_case2_without_deepsort}')

        helmets_df = add_deepsort_team_cluster_col(helmets_df)
        helmets_df['ground_truth_clusters'] = helmets_df['label'].str[0]
        helmets_df['ground_truth_clusters'] = helmets_df['ground_truth_clusters'].str.replace('V', '0')
        helmets_df['ground_truth_clusters'] = helmets_df['ground_truth_clusters'].str.replace('H', '1')    
        diff = [int(i) for i in helmets_df['ground_truth_clusters'].values] - helmets_df['team_cluster_deepsort'].values
        total_error_case1_with_deepsort = np.absolute(diff).sum()
        print(f'total error case1(V:0 & H:1) with deepsort:{total_error_case1_with_deepsort}')
        helmets_df['ground_truth_clusters'] = helmets_df['label'].str[0]
        helmets_df['ground_truth_clusters'] = helmets_df['ground_truth_clusters'].str.replace('V', '1')
        helmets_df['ground_truth_clusters'] = helmets_df['ground_truth_clusters'].str.replace('H', '0')    
        diff = [int(i) for i in helmets_df['ground_truth_clusters'].values] - helmets_df['team_cluster_deepsort'].values
        total_error_case2_with_deepsort = np.absolute(diff).sum()
        print(f'total error case2(V:1 & H:0) with deepsort:{total_error_case2_with_deepsort}')