### Some parts of my work is taken from: https://www.kaggle.com/its7171/2class-object-detection-inference


In [None]:
!pip install --no-deps '../input/timm-package/timm-0.1.26-py3-none-any.whl' > /dev/null
!pip install --no-deps '../input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl' > /dev/null

In [None]:
import warnings

warnings.filterwarnings("ignore")

import sys
sys.path.insert(0, "../input/timm-efficientdet-pytorch/timm-efficientdet-pytorch/")
sys.path.insert(0, "../input/omegaconf")

import torch
import os
from datetime import datetime
from tqdm import tqdm
import time
import random
import cv2
import pandas as pd
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from glob import glob
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain,DetBenchEval
from effdet.efficientdet import HeadNet

In [None]:
DATA_ROOT_PATH = 'test_images'
SEED = 42
FOLDS = 5
FOLD = 0
EFF_DET = 5 
Threshold = 0.3
LOOK_BACKWARD_LIMIT = 10
IOU_THRESHOLD = 0.5
IMAGE_SIZE = 512
NUM_CLASSES = 2
BATCH_SIZE = 16
FRAME_THRESHOLD = 100
CHECKPOINT = '../input/nfl-512-detection-classification/effdet5-models-fold-4/effdet5-models-fold-4/checkpoint_epoch9.bin'

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

In [None]:
# df = pd.read_json('../input/nfl-2020-video-frames/meta_data_5.json')
# test_df = df.loc[df.fold==FOLD,['video','image','frame','image_path']].reset_index(drop=True)
# print(test_df.shape)
# test_df.sample(2)

In [None]:
def mk_images(video_name, video_labels, video_dir, out_dir):
    video_path=f"{video_dir}/{video_name}"
    video_name = os.path.basename(video_path)
    vidcap = cv2.VideoCapture(video_path)
    frame = 0
    while True:
        it_worked, img = vidcap.read()
        if not it_worked:
            break
        frame += 1
        img_name = f"{video_name}_frame{frame}"
        image_path = f'{out_dir}/{video_name}'.replace('.mp4',f'_{frame}.png')
        _ = cv2.imwrite(image_path, img)
        if frame==FRAME_THRESHOLD:
            break
        
        
out_dir = DATA_ROOT_PATH
if not os.path.exists(out_dir):
    !mkdir -p $out_dir
    video_dir = '/kaggle/input/nfl-impact-detection/test'
    uniq_video = [path.split('/')[-1] for path in glob(f'{video_dir}/*.mp4')]
    for video_name in tqdm(uniq_video):
        mk_images(video_name, pd.DataFrame(), video_dir, out_dir)
        
image_root = DATA_ROOT_PATH + '/'
df = pd.DataFrame(os.listdir(image_root),columns=['image'])
df['image_path'] = image_root+df['image']
df['video'] = df.image.apply(lambda x:'_'.join(x.split('_')[:-1])+'.mp4')
df['frame'] = df.image.apply(lambda x:x.split('_')[-1].split('.')[0]).astype(int)
test_df = df
print(test_df.shape)
test_df.sample(2)

In [None]:
class DatasetRetriever(Dataset):
    def __init__(self, df, transforms=None):
        super().__init__()
        self.df = df
        self.transforms = transforms

    def __getitem__(self, index: int):
        row = self.df.loc[index]
        image_id = row.image
        image_path = row.image_path
        image = cv2.imread(image_path, cv2.IMREAD_COLOR).copy().astype(np.float32)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']
        return image, image_id

    def __len__(self) -> int:
        return self.df.shape[0]

def get_test_transforms():
    return A.Compose([
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

test_dataset = DatasetRetriever(
    df=test_df,
    transforms=get_test_transforms(),
)
len(test_dataset)

In [None]:
def load_net(checkpoint_path):
    config = get_efficientdet_config('tf_efficientdet_d{}'.format(EFF_DET))
    net = EfficientDet(config, pretrained_backbone=False)
    config.num_classes = 2
    config.image_size=512
    net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))
    checkpoint = torch.load(checkpoint_path)
    net.load_state_dict(checkpoint['model_state_dict'])
    net = DetBenchEval(net, config)
    net.eval();
    return net.cuda()

net = load_net(CHECKPOINT)

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

data_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    drop_last=False,
    collate_fn=collate_fn
)
len(data_loader)

In [None]:
def iou(bbox1, bbox2):
    bbox1 = [float(x) for x in bbox1]
    bbox2 = [float(x) for x in bbox2]

    (x0_1, y0_1, x1_1, y1_1) = bbox1
    (x0_2, y0_2, x1_2, y1_2) = bbox2

    # get the overlap rectangle
    overlap_x0 = max(x0_1, x0_2)
    overlap_y0 = max(y0_1, y0_2)
    overlap_x1 = min(x1_1, x1_2)
    overlap_y1 = min(y1_1, y1_2)

    # check if there is an overlap
    if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0:
            return 0

    # if yes, calculate the ratio of the overlap to each ROI size and the unified size
    size_1 = (x1_1 - x0_1) * (y1_1 - y0_1)
    size_2 = (x1_2 - x0_2) * (y1_2 - y0_2)
    size_intersection = (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)
    size_union = size_1 + size_2 - size_intersection

    return size_intersection / size_union

def filter_duplicates(row):
    # Compute intersection over union
    boxes,scores,labels = row.box_predictions,row.score_list,row.label_list
    filtered_boxes = []
    filtered_scores = []
    filtered_labels = []
    for box,score,label in zip(boxes,scores,labels):
        # Assume score_list is sorted
        add_box = True
        for i,ref_box in enumerate(filtered_boxes):
            if iou(box,ref_box)>0.5:
                add_box = False
                if label==2:filtered_labels[i]=2
                
        if add_box: 
            filtered_boxes.append(box)
            filtered_scores.append(score)
            filtered_labels.append(label)
    row.box_predictions,row.score_list,row.label_list = filtered_boxes,filtered_scores,filtered_labels
    return row

In [None]:
def make_predictions(images, score_threshold=0.5):
    images = torch.stack(images).cuda().float()
    box_list = []
    score_list = []
    label_list = []
    with torch.no_grad():
        det = net(images, torch.tensor([1]*images.shape[0]).float().cuda())
        for i in range(images.shape[0]):
            boxes = det[i].detach().cpu().numpy()[:,:4]    
            scores = det[i].detach().cpu().numpy()[:,4]   
            label = det[i].detach().cpu().numpy()[:,5]
            boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
            boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
            boxes[:,0] = boxes[:,0] * 1280 / 512
            boxes[:,1] = boxes[:,1] * 720 / 512
            boxes[:,2] = boxes[:,2] * 1280 / 512
            boxes[:,3] = boxes[:,3] * 720 / 512
            indexes = np.where((scores > score_threshold) )
            box_list.append(boxes[indexes])
            score_list.append(scores[indexes])
            label_list.append(label[indexes])
            
    return box_list, score_list, label_list

In [None]:
predictions = pd.DataFrame(index=range(len(test_df)),
                      columns = [ 'image', 'box_predictions', 'score_list','label_list'])

test_index = 0
test_batch = BATCH_SIZE
for step, (images, image_ids) in tqdm(enumerate(data_loader)):
    start_index = test_index
    end_index = min(len(test_df)-1,test_index+test_batch-1)
    n_samples = end_index-start_index+1
    box_list, score_list, label_list = make_predictions(images, score_threshold=Threshold)
    predictions.loc[start_index:end_index,'image'] = image_ids[:n_samples]
    predictions.loc[start_index:end_index,'box_predictions'] = box_list[:n_samples]
    predictions.loc[start_index:end_index,'score_list'] = score_list[:n_samples]
    predictions.loc[start_index:end_index,'label_list'] = label_list[:n_samples]
    test_index += test_batch
    if end_index==(len(test_df)-1): break
predictions = predictions.apply(filter_duplicates,axis=1)
predictions = pd.merge(predictions,test_df,on=['image'])
predictions.to_json('predictions.csv',orient='records')
predictions['n_impact'] = predictions.label_list.apply(lambda x:(np.array(x)==2).sum())
predictions.groupby('video').n_impact.sum()

# Visualisation

In [None]:
image = cv2.imread(predictions.loc[0,'image_path'])
pred_boxes = predictions.loc[0,'box_predictions']
image_id = predictions.loc[0,'image']
print(image_id,image.shape)

fig, ax = plt.subplots(1, 1, figsize=(16, 8))

for box in pred_boxes:
    cv2.rectangle(image, (box[0], box[1]), (box[2],  box[3]), (1, 0, 0), 3)
    
ax.set_axis_off()
ax.imshow(image);

# Postprocessing

In [None]:
import math
def iou(bbox1, bbox2):
    bbox1 = [float(x) for x in bbox1]
    bbox2 = [float(x) for x in bbox2]

    (x0_1, y0_1, x1_1, y1_1) = bbox1
    (x0_2, y0_2, x1_2, y1_2) = bbox2

    # get the overlap rectangle
    overlap_x0 = max(x0_1, x0_2)
    overlap_y0 = max(y0_1, y0_2)
    overlap_x1 = min(x1_1, x1_2)
    overlap_y1 = min(y1_1, y1_2)

    # check if there is an overlap
    if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0:
            return 0

    # if yes, calculate the ratio of the overlap to each ROI size and the unified size
    size_1 = (x1_1 - x0_1) * (y1_1 - y0_1)
    size_2 = (x1_2 - x0_2) * (y1_2 - y0_2)
    size_intersection = (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)
    size_union = size_1 + size_2 - size_intersection

    return size_intersection / size_union

def match_boxes(bbox_set1,bbox_set2,diagonal=True,iou_threshold=0.35):
    # Compute intersection over union
    all_pairs = {}
    relation = [None for x in bbox_set2]
    done1 = {}
    done2 = {}
    for i,bbox1 in enumerate(bbox_set1):
        for j,bbox2 in enumerate(bbox_set2):
            if i!=j or diagonal:
                all_pairs[(i,j)] = iou(bbox1,bbox2)
    for (index1,index2),iou_score in sorted(all_pairs.items(), key=lambda item: item[1],reverse=True):
        if iou_score<iou_threshold: return relation
        if (index1 not in done1) and (index2 not in done2):
            relation[index2] = index1
            done1[index1] = 1
            done2[index2] = 1
    return relation

def map_classes(row):
    mapping = {}
    for box,pred in zip(row.box_predictions,row.label_list):
        mapping[tuple(box)] = pred
    return mapping

def assign_players(df,video,LOOK_BACKWARD_LIMIT=LOOK_BACKWARD_LIMIT,IOU_THRESHOLD=IOU_THRESHOLD):
    video_df = df[df.video==video].sort_values(by='frame').set_index('frame')
    row_to_class_map = video_df['row_to_class_map']
    row_to_class_map.apply(lambda x:x.update({():-1}))
    player_to_bbox = pd.DataFrame()
    bbox_to_player = {k+1:{} for k in range(video_df.shape[0])}
    N_PLAYERS = 0
    # Initial Player Prediction
    bboxes = video_df.loc[1,'box_predictions']
    for player_id,box in enumerate(bboxes):
        player_to_bbox[player_id] = None
        player_to_bbox.at[1,player_id] = box
        bbox_to_player[1][tuple(box)] = player_id
        N_PLAYERS += 1
        

    # Subsequent Player Prediction
    for frame in range(2,video_df.shape[0]+1):
        player_to_bbox.loc[frame] = None
        bboxes = video_df.loc[frame,'box_predictions']
        assigned = {}
        look_backward = 1
        while(look_backward<min(frame,LOOK_BACKWARD_LIMIT+1) and len(bboxes)>0):
            ref_bbox = []
            for box in video_df.loc[frame-look_backward,'box_predictions']:
                if bbox_to_player[frame-look_backward][tuple(box)] not in assigned:
                    ref_bbox.append(box)
            relation = match_boxes(ref_bbox,bboxes,iou_threshold=IOU_THRESHOLD)
            left_out_bboxes = []
            # Assign Players
            for index,ref_index in enumerate(relation):
                if ref_index is not None:
                    # Read Player ID and box
                    player_id = bbox_to_player[frame-look_backward][tuple(ref_bbox[ref_index])]
                    box = bboxes[index]
                    # Write Player ID
                    player_to_bbox.at[frame,player_id] = box
                    bbox_to_player[frame][tuple(box)] = player_id
                    assigned[player_id] = True
                else:
                    left_out_bboxes.append(bboxes[index])

            bboxes = left_out_bboxes
            look_backward +=1
        # New Player Addition
        for box in bboxes:
            player_id = N_PLAYERS
            player_to_bbox[player_id] = None
            player_to_bbox.at[frame,player_id] = box
            bbox_to_player[frame][tuple(box)] = player_id
            N_PLAYERS += 1
            
    # Map Classes to Players
    player_to_class_map = player_to_bbox.apply(lambda row: row.fillna('').apply(tuple).apply(
            lambda x:row_to_class_map.loc[row.name][x]),axis=1)
    
    return player_to_bbox,player_to_class_map

def filter_impacts(player_series):
    impacted_player_series = pd.DataFrame(player_series.loc[player_series==2])
    impacted_player_series['indices'] = impacted_player_series.index
    start_indices = impacted_player_series[impacted_player_series.indices.diff(1).fillna(99).abs()>9].indices.tolist()
    end_indices = impacted_player_series[impacted_player_series.indices.diff(-1).fillna(99).abs()>9].indices.tolist()
    output_series = player_series.copy(deep=True)
    output_series[player_series>0] = 1
    output_series[player_series<0] = -1
    for start_index,end_index in zip(start_indices,end_indices):
        difference = end_index-start_index+1
        n_medians = math.ceil(difference/9)
        difference_split = difference/(n_medians+1)
        centres = [int(start_index+(i+1)*difference_split) for i in range(n_medians)]
        output_series.loc[centres] = 2
    return output_series


# Preparing Submission File

In [None]:
submission_df = pd.DataFrame(columns=['gameKey','playID','view','video','frame','left','width','top','height'])
predictions['row_to_class_map'] = predictions.apply(map_classes,axis=1)
entries = 0
for video in predictions.video.unique():
    # Assign Players
    assigned_players_boxes,assigned_players_classes = assign_players(predictions,video)
    # Filter interframe bbox
    assigned_players_classes_filtered = assigned_players_classes.apply(filter_impacts)
    n_filtered = (assigned_players_classes_filtered==2).sum().sum()
    n_classes = (assigned_players_classes==2).sum().sum()
    print(video,n_filtered,n_classes,(~assigned_players_boxes.isna()).sum().sum())
    
    # Only fill if frame gap between labelled boxes<10
    all_bboxes = assigned_players_boxes.fillna(method='ffill').values.reshape(-1)
    
    
    all_classes = assigned_players_classes_filtered.values.reshape(-1)
    all_frames = assigned_players_boxes.apply(lambda x: pd.DataFrame(x).apply(
        lambda y: y.name, axis=1)).values.reshape(-1)
    bboxes_with_impact = all_bboxes[np.where(all_classes==2)]
    frames_with_impact = all_frames[np.where(all_classes==2)]
    
    # Write to dataframe
    gameKey,playID,view,_ = video.replace('.','_').split('_')
    for frame,box in zip(frames_with_impact,bboxes_with_impact):
        submission_df.loc[entries,'gameKey'] = gameKey
        submission_df.loc[entries,'playID'] = playID
        submission_df.loc[entries,'view'] = view
        submission_df.loc[entries,'video'] = video
        submission_df.loc[entries,'frame'] = frame
        submission_df.loc[entries,'left'] = int(box[0])
        submission_df.loc[entries,'top'] = int(box[1])
        submission_df.loc[entries,'width'] = int(box[2]-box[0])
        submission_df.loc[entries,'height'] = int(box[3]-box[1])
        entries += 1

# Submission

In [None]:
submission_df = submission_df[submission_df.frame>20]
submission_df

In [None]:
# clearing working dir
# be careful when running this code on local environment!
# !rm -rf *
!mv * /tmp/

In [None]:
import nflimpact
env = nflimpact.make_env()
env.predict(submission_df) 