This notebook is based on https://www.kaggle.com/artkulak/2class-object-detection-inference-with-filtering <br> which is elicited from https://www.kaggle.com/its7171/2class-object-detection-inference
I added a metric from https://www.kaggle.com/nvnnghia/evaluation-metrics 

The model in this notebook is trained from the dataset which is shuffled regardless of Endzone & Sideline. so it is quite difficult to implement this metric to the model.

If this was helpful to you, Please Give me upvote, that will be of help to me.

In [None]:
import pandas as pd
d = pd.read_csv('../input/nfl-impact-detection/test_player_tracking.csv')
IS_PRIVATE = d.shape != (19269, 12)
print(IS_PRIVATE)

IS_PRIVATE = True

In [None]:
if IS_PRIVATE:
    !pip install ../input/nfl-lib/timm-0.1.26-py3-none-any.whl
    !tar xfz ../input/nfl-lib/pkgs.tgz
    # for pytorch1.6
    cmd = "sed -i -e 's/ \/ / \/\/ /' timm-efficientdet-pytorch/effdet/bench.py"
    !$cmd

In [None]:
import sys
sys.path.insert(0, "timm-efficientdet-pytorch")
sys.path.insert(0, "omegaconf")

import torch
import os
from datetime import datetime
import time
import random
import cv2
import pandas as pd
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from glob import glob
import pandas as pd
import gc
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain, DetBenchEval
from effdet.efficientdet import HeadNet
import warnings
from tqdm import tqdm

import seaborn as sns

import numpy as np
from scipy.optimize import linear_sum_assignment
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image

warnings.filterwarnings("ignore")

DATA_ROOT_PATH = 'test_images'
SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
seed_everything(SEED)

In [None]:
#################
# SET CONSTANTS
#################

DETECTION_THRESHOLD = 0.4
DETECTOR_FILTERING_THRESHOLD = 0.3

In [None]:
def iou(bbox1, bbox2):
    bbox1 = [float(x) for x in bbox1]
    bbox2 = [float(x) for x in bbox2]

    (x0_1, y0_1, x1_1, y1_1) = bbox1
    (x0_2, y0_2, x1_2, y1_2) = bbox2

    # get the overlap rectangle
    overlap_x0 = max(x0_1, x0_2)
    overlap_y0 = max(y0_1, y0_2)
    overlap_x1 = min(x1_1, x1_2)
    overlap_y1 = min(y1_1, y1_2)

    # check if there is an overlap
    if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0:
            return 0

    # if yes, calculate the ratio of the overlap to each ROI size and the unified size
    size_1 = (x1_1 - x0_1) * (y1_1 - y0_1)
    size_2 = (x1_2 - x0_2) * (y1_2 - y0_2)
    size_intersection = (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)
    size_union = size_1 + size_2 - size_intersection

    return size_intersection / size_union

In [None]:
def precision_calc(gt_boxes, pred_boxes):
    cost_matix = np.ones((len(gt_boxes), len(pred_boxes)))
    for i, box1 in enumerate(gt_boxes):
        for j, box2 in enumerate(pred_boxes):
            dist = abs(box1[0]-box2[0])
            if dist > 4:
                continue
            iou_score = iou(box1[1:], box2[1:])

            if iou_score < 0.35:
                continue
            else:
                cost_matix[i,j]=0

    row_ind, col_ind = linear_sum_assignment(cost_matix)
    fn = len(gt_boxes) - row_ind.shape[0]
    fp = len(pred_boxes) - col_ind.shape[0]
    tp=0
    for i, j in zip(row_ind, col_ind):
        if cost_matix[i,j]==0:
            tp+=1
        else:
            fp+=1
            fn+=1
    return tp, fp, fn

In [None]:
video_labels = pd.read_pickle("../input/nfl-pkl/video_label_just.pkl")
video_labels['image_name'] = video_labels['video'].str.replace('.mp4', '') + '_' + video_labels['frame'].astype(str).str.zfill(3) + '.png'
video_labels

In [None]:
np.random.seed(0)
video_names = np.random.permutation(video_labels.video.str[:12].unique())
valid_video_len = int(len(video_names)*0.2)
video_valid = video_names[:valid_video_len]
video_train = video_names[valid_video_len:]
images_valid = video_labels[ video_labels.video.str[:12].isin(video_valid)].image_name.unique()
images_train = video_labels[~video_labels.video.str[:12].isin(video_valid)].image_name.unique()

In [None]:
def get_valid_transforms():
    return A.Compose([
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

In [None]:
TRAIN_ROOT_PATH = '../input/nfl-impact-detection-train-frames'

class DatasetRetriever(Dataset):

    def __init__(self, marking, image_ids, transforms=None, test=False):
        super().__init__()

        self.image_ids = image_ids
        self.marking = marking
        self.transforms = transforms
        self.test = test

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        
        image, boxes, labels = self.load_image_and_boxes(index)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = torch.tensor(labels)
        target['image_id'] = torch.tensor([index])
        
        if self.transforms:
            for i in range(10):
                sample = self.transforms(**{
                    'image': image,
                    'bboxes': target['boxes'],
                    'labels': labels
                })
                if len(sample['bboxes']) > 0:
                    image = sample['image']
                    target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
                    target['boxes'][:,[0,1,2,3]] = target['boxes'][:,[1,0,3,2]]  #yxyx: be warning
                    break
        return image, image_id, target
        
    def __len__(self) -> int:
        return self.image_ids.shape[0]

    def load_image_and_boxes(self, index):
        image_id = self.image_ids[index]
#         print(f'{TRAIN_ROOT_PATH}/{image_id}')
        image = cv2.imread(f'{TRAIN_ROOT_PATH}/{image_id[:-8]}/{image_id}', cv2.IMREAD_COLOR).copy().astype(np.float32)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        records = self.marking[self.marking['image_name'] == image_id]
        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        labels = records['impact'].values
        return image, boxes, labels

In [None]:
def load_net(checkpoint_path):
    config = get_efficientdet_config('tf_efficientdet_d5')
    net = EfficientDet(config, pretrained_backbone=False)
    config.num_classes = 2
    config.image_size=512
    net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))
    checkpoint = torch.load(checkpoint_path)
    net.load_state_dict(checkpoint['model_state_dict'])
    net = DetBenchEval(net, config)
    net.eval();
    return net.cuda()
if IS_PRIVATE:
    net = load_net('../input/nfl-models//best-checkpoint-002epoch.bin')

In [None]:
validation_dataset = DatasetRetriever(
    image_ids=images_valid,
    marking=video_labels,
    transforms=get_valid_transforms(),
    test=True,
)

def collate_fn(batch):
    return tuple(zip(*batch))

data_loader = DataLoader(
    validation_dataset,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    drop_last=False,
    collate_fn=collate_fn,
    pin_memory=True
)

In [None]:
def make_predictions(images, score_threshold=0.5):
    images = torch.stack(images).cuda().float()
    box_list = []
    score_list = []
    with torch.no_grad():
        det = net(images, torch.tensor([1]*images.shape[0]).float().cuda())
        for i in range(images.shape[0]):
            boxes = det[i].detach().cpu().numpy()[:,:4]    
            scores = det[i].detach().cpu().numpy()[:,4]   
            label = det[i].detach().cpu().numpy()[:,5]
            # useing only label = 2
            indexes = np.where((scores > score_threshold) & (label == 2))[0]
            boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
            boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
            box_list.append(boxes[indexes])
            score_list.append(scores[indexes])
    return box_list, score_list
import matplotlib.pyplot as plt

In [None]:
#check prediction

cnt = 0
for images, image_ids, labels in data_loader:
    box_list, score_list = make_predictions(images, score_threshold=DETECTION_THRESHOLD)
    for i in range(len(images)):
        sample = images[i].permute(1,2,0).cpu().numpy()
        boxes = box_list[i].astype(np.int32).clip(min=0, max=511)
        scores = score_list[i]
        if len(scores) >= 1:
            fig, ax = plt.subplots(1, 1, figsize=(16, 8))
            sample = cv2.resize(sample , (int(1280), int(720)))
            for box,score in zip(boxes,scores):
                box[0] = box[0] * 1280 / 512
                box[1] = box[1] * 720 / 512
                box[2] = box[2] * 1280 / 512
                box[3] = box[3] * 720 / 512
                cv2.rectangle(sample, (box[0], box[1]), (box[2], box[3]), (1, 0, 0), 3)
            for box in labels[i]['boxes'][labels[i]['labels']==2]:
                cv2.rectangle(sample, (box[1], box[0]), (box[3], box[2]), (0, 1, 0), 2)
            ax.set_axis_off()
            ax.imshow(sample);
            cnt += 1
    if cnt >= 10:
        break

In [None]:
result_image_ids = []
results_boxes = []
results_scores = []
gt_image_ids = []
gt_boxes = []
for images, image_ids, labels in tqdm(data_loader):
    box_list, score_list = make_predictions(images, score_threshold=DETECTION_THRESHOLD)
    for i, image in enumerate(images):
        boxes = box_list[i]
        scores = score_list[i]
        image_id = image_ids[i]
        boxes[:, 0] = (boxes[:, 0] * 1280 / 512)
        boxes[:, 1] = (boxes[:, 1] * 720 / 512)
        boxes[:, 2] = (boxes[:, 2] * 1280 / 512)
        boxes[:, 3] = (boxes[:, 3] * 720 / 512)
#         boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
#         boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
        boxes = boxes.astype(np.int32)
        boxes[:, 0] = boxes[:, 0].clip(min=0, max=1280-1)
        boxes[:, 2] = boxes[:, 2].clip(min=0, max=1280-1)
        boxes[:, 1] = boxes[:, 1].clip(min=0, max=720-1)
        boxes[:, 3] = boxes[:, 3].clip(min=0, max=720-1)
        result_image_ids += [image_id]*len(boxes)
        results_boxes.append(boxes)
        results_scores.append(scores)
        gt = labels[i]['boxes'][labels[i]['labels']==2]
        gt_image_ids += [image_id]*len(gt)
        gt_boxes.append(gt)
    for i, image in enumerate(labels):
        image_id = image_ids[i]

In [None]:
box_df = pd.DataFrame(np.concatenate(gt_boxes), columns=['bot','left','top', 'right' ])
gt_df = pd.DataFrame({'image_name':gt_image_ids})
gt_df = pd.concat([gt_df, box_df], axis=1)

In [None]:
gt_df['gameKey'] = gt_df.image_name.str.split('_').str[0].astype(int)
gt_df['playID'] = gt_df.image_name.str.split('_').str[1].astype(int)
gt_df['view'] = gt_df.image_name.str.split('_').str[2]
gt_df['frame'] = gt_df.image_name.str.split('_').str[3].str.replace('.png','').astype(int)
gt_df['video'] = gt_df.image_name.str.rsplit('_',1).str[0] + '.mp4'
gt_df = gt_df[["gameKey","playID","view","video","frame",'left', 'bot', 'right', 'top']]
gt_df

In [None]:
DETECTOR_FILTERING_THRESHOLD = 0.3

In [None]:
box_df = pd.DataFrame(np.concatenate(results_boxes), columns=['left', 'bot', 'right', 'top'])
test_df = pd.DataFrame({'scores':np.concatenate(results_scores), 'image_name':result_image_ids})
test_df = pd.concat([test_df, box_df], axis=1)
test_df = test_df[test_df.scores > DETECTOR_FILTERING_THRESHOLD]
test_df['gameKey'] = test_df.image_name.str.split('_').str[0].astype(int)
test_df['playID'] = test_df.image_name.str.split('_').str[1].astype(int)
test_df['view'] = test_df.image_name.str.split('_').str[2]
test_df['frame'] = test_df.image_name.str.split('_').str[3].str.replace('.png','').astype(int)
test_df['video'] = test_df.image_name.str.rsplit('_',1).str[0] + '.mp4'
test_df = test_df[["scores","gameKey","playID","view","video","frame",'left', 'bot', 'right', 'top']]
# test_df

In [None]:
test_df_end = test_df[test_df['view']=='Endzone']
test_df_side = test_df[test_df['view']=='Sideline']

In [None]:
ftp, ffp, ffn = [], [], []
for count, video in enumerate(set(gt_df['video'])):
    pred_boxes = test_df_side[test_df_side['video']==video][["frame",'left', 'bot', 'right', 'top']].to_numpy()
    gt_boxes = gt_df[gt_df['video']==video][["frame",'left', 'bot', 'right', 'top']].to_numpy()
    tp, fp, fn = precision_calc(gt_boxes, pred_boxes)
    ftp.append(tp)
    ffp.append(fp)
    ffn.append(fn)

tp = np.sum(ftp)
fp = np.sum(ffp)
fn = np.sum(ffn)
precision = tp / (tp + fp + 1e-6)
recall =  tp / (tp + fn +1e-6)
f1_score = 2*(precision*recall)/(precision+recall+1e-6)
print(f'TP: {tp}, FP: {fp}, FN: {fn}, PRECISION: {precision:.4f}, RECALL: {recall:.4f}, F1 SCORE: {f1_score:.4f}')

In [None]:
#################
# FILTER
#################


dropIDX = []
for keys in test_df.groupby(['gameKey', 'playID']).size().to_dict().keys():
    tmp_df = test_df.query('gameKey == @keys[0] and playID == @keys[1]')
    for index, row in tmp_df.iterrows():
        if row['view'] == 'Endzone':
            check_df = tmp_df.query('view == "Sideline"')
            if check_df['frame'].apply(lambda x: np.abs(x - row['frame']) <= 4).sum() == 0:
                dropIDX.append(index)
        
        if row['view'] == 'Sideline':
            check_df = tmp_df.query('view == "Endzone"')
            if check_df['frame'].apply(lambda x: np.abs(x - row['frame']) <= 4).sum() == 0:
                dropIDX.append(index)

In [None]:
# #################
# # FILTER
# #################


# dropIDX = []
# for keys in test_df.groupby(['gameKey', 'playID']).size().to_dict().keys():
#     tmp_df = test_df.query('gameKey == @keys[0] and playID == @keys[1]')
    
#     for index, row in tmp_df.iterrows():
            
#         currentFrame = row['frame']

#         bboxCount1 = tmp_df.query('view == "Sideline" and abs(frame - @currentFrame) <= 0').shape[0]
#         bboxCount2 = tmp_df.query('view == "Endzone" and abs(frame - @currentFrame) <= 0').shape[0]
#         if bboxCount1 != bboxCount2:
#             dropIDX.append(index)

In [None]:
test_df = test_df.drop(index = dropIDX).reset_index(drop = True)
test_df

In [None]:
ftp, ffp, ffn = [], [], []
for count, video in enumerate(set(gt_df['video'])):
    pred_boxes = test_df[test_df['video']==video][["frame",'left', 'bot', 'right', 'top']].to_numpy()
    gt_boxes = gt_df[gt_df['video']==video][["frame",'left', 'bot', 'right', 'top']].to_numpy()
    tp, fp, fn = precision_calc(gt_boxes, pred_boxes)
    ftp.append(tp)
    ffp.append(fp)
    ffn.append(fn)

tp = np.sum(ftp)
fp = np.sum(ffp)
fn = np.sum(ffn)
precision = tp / (tp + fp + 1e-6)
recall =  tp / (tp + fn +1e-6)
f1_score = 2*(precision*recall)/(precision+recall+1e-6)
print(f'TP: {tp}, FP: {fp}, FN: {fn}, PRECISION: {precision:.4f}, RECALL: {recall:.4f}, F1 SCORE: {f1_score:.4f}')