This notebook detects 2 class objects.
- class1: helmet without impact
- class2: helmet with impact

Object Detection part is based on [EfficientDet notebook](https://www.kaggle.com/shonenkov/training-efficientdet) for [global wheat detection competition](https://www.kaggle.com/c/global-wheat-detection) by [shonenkov](https://www.kaggle.com/shonenkov), which is using [github repos efficientdet-pytorch](https://github.com/rwightman/efficientdet-pytorch) by [@rwightman](https://www.kaggle.com/rwightman).

Inference part can be foud [here](https://www.kaggle.com/its7171/2class-object-detection-inference/).

In [1]:
Kaggle = False
Colab = not Kaggle
TRAIN = False

In [2]:
import os, sys
from pathlib import Path

if Colab:
    from google.colab import drive
    drive.mount('/content/drive')

    path = "/content/drive/My Drive"
    os.chdir(path)
    os.listdir(path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
if Kaggle:
    !pip install ../input/nfl-lib/timm-0.1.26-py3-none-any.whl
    !tar xfz ../input/nfl-lib/pkgs.tgz
else:
    !pip install NFL/nfl-lib/timm-0.1.26-py3-none-any.whl
    !tar xfz NFL/nfl-lib/pkgs.tgz    
    !pip install albumentations==0.4.6
# for pytorch1.6
cmd = "sed -i -e 's/ \/ / \/\/ /' timm-efficientdet-pytorch/effdet/bench.py"
!$cmd



In [4]:
import sys
sys.path.insert(0, "timm-efficientdet-pytorch")
sys.path.insert(0, "omegaconf")

import torch
import os
import re
from datetime import datetime
import time
import random
import cv2
import pandas as pd
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GroupKFold
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from glob import glob
import pandas as pd
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain, DetBenchEval
from effdet.efficientdet import HeadNet
from tqdm import tqdm

SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(SEED)

# Data Preparation

In [5]:
if Kaggle:
    BASEPATH = "../input/nfl-impact-detection"
    outdir = '.'
    TRAIN_IMGPATH = outdir
    MODELS_PATH = '../input/ed6-512-k5-aug3'
else:
    PATH = 'NFL/'
    BASEPATH = PATH + 'Data'
    TRAIN_IMGPATH = BASEPATH
    outdir = Path(PATH+'res')
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    outdir = Path(PATH+'res/efficientDet-res')
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    MODELNAME = "ED5-512"
    # MODELNAME = "ED4-512"
    VERSION = '{}'.format(MODELNAME)
    outdir = os.path.join(outdir, VERSION)
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    # from datetime import datetime, timedelta
    # dateTimeObj = datetime.now()
    # timestampStr = dateTimeObj.strftime("%d-%b-%Y-%H")
    # modelpath = os.path.join(outdir, 'all-kfold-hairaug-456-norm-metanew-b16')
    timestampStr = 'org-epoch20-aug3'
    outdir = os.path.join(outdir, timestampStr)
    if not os.path.exists(outdir):
        os.mkdir(outdir)  
    MODELS_PATH = outdir
SCORE_TH = 0.4
SKIP_SCORE = 0.4        

In [6]:
def mk_images(video_name, video_labels, video_dir, out_dir, only_with_impact=True):
    video_path=f"{video_dir}/{video_name}"
    video_name = os.path.basename(video_path)
    vidcap = cv2.VideoCapture(video_path)
    if only_with_impact:
        boxes_all = video_labels.query("video == @video_name")
        print(video_path, boxes_all[boxes_all.impact == 1.0].shape[0])
    else:
        print(video_path)
    frame = 0
    while True:
        it_worked, img = vidcap.read()
        if not it_worked:
            break
        frame += 1
        if only_with_impact:
            boxes = video_labels.query("video == @video_name and frame == @frame")
            boxes_with_impact = boxes[boxes.impact == 1.0]
            if boxes_with_impact.shape[0] == 0:
                continue
        img_name = f"{video_name}_frame{frame}"
        image_path = f'{out_dir}/{video_name}'.replace('.mp4',f'_{frame}.png')
        _ = cv2.imwrite(image_path, img)

In [7]:
# out_dir = DATA_ROOT_PATH
# if not os.path.exists(out_dir):
#     !mkdir -p $out_dir
#     video_dir = '/kaggle/input/nfl-impact-detection/test'
#     uniq_video = [path.split('/')[-1] for path in glob(f'{video_dir}/*.mp4')]
#     for video_name in uniq_video:
#         mk_images(video_name, pd.DataFrame(), video_dir, out_dir, only_with_impact=False)

In [8]:
# video_labels = pd.read_csv(os.path.join(BASEPATH, 'train_labels.csv')).fillna(0)
# video_labels_with_impact = video_labels[video_labels['impact'] > 0]
# for row in tqdm(video_labels_with_impact[['video','frame','label']].values):
#     frames = np.array([-4,-3,-2,-1,1,2,3,4])+row[1]
#     video_labels.loc[(video_labels['video'] == row[0]) 
#                                  & (video_labels['frame'].isin(frames))
#                                  & (video_labels['label'] == row[2]), 'impact'] = 1
# video_labels['image_name'] = video_labels['video'].str.replace('.mp4', '') + '_' + video_labels['frame'].astype(str) + '.png'
# video_labels = video_labels[video_labels.groupby('image_name')['impact'].transform("sum") > 0].reset_index(drop=True)
# video_labels['impact'] = video_labels['impact'].astype(int)+1
# video_labels['x'] = video_labels['left']
# video_labels['y'] = video_labels['top']
# video_labels['w'] = video_labels['width']
# video_labels['h'] = video_labels['height']
# video_labels.head()

video_labels = pd.read_csv(os.path.join(BASEPATH,'video_labels.csv'), index_col=0)
video_labels.loc[:,'VID'] = ''
video_labels.loc[:, 'VID'] =  ['_'.join(re.split('_|\\.', vid)[0:2]) for vid in video_labels['video']]
display(video_labels.head())
print(video_labels.shape)

Unnamed: 0,gameKey,playID,view,video,frame,label,left,width,top,height,impact,impactType,confidence,visibility,image_name,x,y,w,h,VID
0,57583,82,Endzone,57583_000082_Endzone.mp4,34,V73,655,21,331,15,1,0,0.0,0.0,57583_000082_Endzone_34.png,655,331,21,15,57583_000082
1,57583,82,Endzone,57583_000082_Endzone.mp4,34,H99,583,21,312,30,2,0,0.0,0.0,57583_000082_Endzone_34.png,583,312,21,30,57583_000082
2,57583,82,Endzone,57583_000082_Endzone.mp4,34,V15,1069,22,301,20,1,0,0.0,0.0,57583_000082_Endzone_34.png,1069,301,22,20,57583_000082
3,57583,82,Endzone,57583_000082_Endzone.mp4,34,H97,402,21,313,29,1,0,0.0,0.0,57583_000082_Endzone_34.png,402,313,21,29,57583_000082
4,57583,82,Endzone,57583_000082_Endzone.mp4,34,V72,445,21,328,16,1,0,0.0,0.0,57583_000082_Endzone_34.png,445,328,21,16,57583_000082


(197838, 20)


In [9]:
# display(video_labels.head())
# print(video_labels.shape)
# video_labels.to_csv(os.path.join(BASEPATH,'video_labels.csv'), index=False)

In [10]:
# np.random.seed(0)
# video_names = np.random.permutation(video_labels.video.unique())
# valid_video_len = int(len(video_names)*0.2)
# video_valid = video_names[:valid_video_len]
# video_train = video_names[valid_video_len:]
# images_valid = video_labels[ video_labels.video.isin(video_valid)].image_name.unique()
# images_train = video_labels[~video_labels.video.isin(video_valid)].image_name.unique()


# Stratified K-Fold


In [11]:
# skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)
gkf = GroupKFold(n_splits=5)
df_folds = video_labels[['image_name']].copy()
df_folds.loc[:, 'bbox_count'] = 1
df_folds = df_folds.groupby('image_name').count()
df_folds.loc[:, 'video'] = video_labels[['image_name', 'video']].groupby('image_name').min()['video']
# print(display(df_folds))

# df_folds.loc[:, 'stratify_group'] = np.char.add(
#     df_folds['video'].values.astype(str),
#     df_folds['bbox_count'].apply(lambda x: f'_{x // 20}').values.astype(str),
# )
df_folds.loc[:, 'stratify_group'] = video_labels[['image_name', 'VID']].groupby('image_name').min()['VID']
df_folds.loc[:, 'fold'] = 0
# print(display(df_folds))
# for fold_number, (train_index, val_index) in enumerate(skf.split(X=df_folds.index, y=df_folds['stratify_group'])):
#     df_folds.loc[df_folds.iloc[val_index].index, 'fold'] = fold_number
for fold_number, (train_index, val_index) in enumerate(gkf.split(X=df_folds.index, groups=df_folds['stratify_group'])):
    df_folds.loc[df_folds.iloc[val_index].index, 'fold'] = fold_number

# print(display(df_folds))


In [12]:
# print(video_labels[['VID', 'video']])

In [13]:
for i in range(5):
    print(list(df_folds['fold'].values).count(i))

2006
2004
2006
2003
1998


In [14]:
# df_folds.stratify_group.unique()
video_fold_dic = pd.DataFrame(columns=['VID','fold'])
for vid in df_folds.stratify_group.unique():
    fold = df_folds.fold[df_folds.stratify_group==vid].unique()
    # print(vid, fold)
    dftemp = pd.DataFrame({'VID': [vid],
                    'fold': [fold[0]]})
    video_fold_dic = video_fold_dic.append(dftemp, ignore_index= True)

display(video_fold_dic.head())

Unnamed: 0,VID,fold
0,57583_000082,1
1,57584_000336,2
2,57584_002674,2
3,57586_000540,4
4,57586_001934,2


In [15]:
# vid_list = video_fold_dic['VID'][video_fold_dic['fold']==0]
# print(vid_list)

In [16]:
uniq_video = video_labels.video.unique()
# video_dir = '/kaggle/input/nfl-impact-detection/train'
video_dir = os.path.join(BASEPATH, 'train')
out_dir_total = os.path.join(TRAIN_IMGPATH, 'train_images_total')
if not os.path.exists(out_dir_total):
    !mkdir -p $out_dir_total
    uniq_video = [path.split('/')[-1] for path in glob(f'{video_dir}/*.mp4')]
    for video_name in uniq_video:
        mk_images(video_name, pd.DataFrame(), video_dir, out_dir_total, only_with_impact=False)

print(out_dir_total)

NFL/Data/train_images_total


## Albumentations

In [17]:
def get_train_transforms():
    return A.Compose(
        [
            A.HorizontalFlip(p=0.5),
            # A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=10, interpolation=1, border_mode=4, 
            #                    value=None, mask_value=None, always_apply=False, p=0.5),
            A.OneOf([
              A.RandomSizedCrop(min_max_height=(500, 720), height=720, width=720, p=1.0),
              A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=10, interpolation=1, border_mode=4, 
                                value=None, mask_value=None, always_apply=False, p=1.0),
            ], p=0.5),
            A.OneOf([
                A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, 
                                     val_shift_limit=0.2, p=0.9),
                A.RandomBrightnessContrast(brightness_limit=0.2, 
                                           contrast_limit=0.2, p=0.9),
            ], p=0.4),
            A.JpegCompression(quality_lower=85, quality_upper=95, p=0.2),
            A.Resize(height=512, width=512, p=1),
            A.OneOf([
                A.Blur(blur_limit=3, p=1.0),
                A.MedianBlur(blur_limit=3, p=1.0),
            ],p=0.1),
            A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.3),
            # A.Blur(blur_limit=5, always_apply=False, p=0.1),
            # A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, always_apply=False, p=0.1),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0, 
            min_visibility=0,
            label_fields=['labels']
        )
    )

def get_valid_transforms():
    return A.Compose(
        [
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0, 
            min_visibility=0,
            label_fields=['labels']
        )
    )

def get_ooftest_valid_transforms():
    return A.Compose([
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.0)

## Dataset

In [18]:
TRAIN_ROOT_PATH = os.path.join(TRAIN_IMGPATH, 'train_images')
TRAIN_VAL_ROOT_PATH = os.path.join(TRAIN_IMGPATH, 'train_images_total')
class DatasetRetriever(Dataset):

    def __init__(self, marking, image_ids, transforms=None, test=False):
        super().__init__()

        self.image_ids = image_ids
        self.marking = marking
        self.transforms = transforms
        self.test = test

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        
        # image, boxes, labels = self.load_image_and_boxes(index)
        if self.test or random.random() > 0.5:
            image, boxes, labels = self.load_image_and_boxes(index)
        elif random.random() > 0.33:
            image, boxes, labels = self.load_cutmix_image_and_boxes(index)
        else:
            image, boxes, labels = self.load_mixup_image_and_boxes(index)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = torch.tensor(labels)
        target['image_id'] = torch.tensor([index])

        if self.transforms:
            for i in range(10):
                sample = self.transforms(**{
                    'image': image,
                    'bboxes': target['boxes'],
                    'labels': labels
                })
                if len(sample['bboxes']) > 0:
                    image = sample['image']
                    target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
                    target['boxes'][:,[0,1,2,3]] = target['boxes'][:,[1,0,3,2]]  #yxyx: be warning
                    break
        return image, target, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

    def load_image_and_boxes(self, index):
        image_id = self.image_ids[index]
        # print(f'{TRAIN_ROOT_PATH}/{image_id}')
        image = cv2.imread(f'{TRAIN_ROOT_PATH}/{image_id}', cv2.IMREAD_COLOR).copy().astype(np.float32)
        # img = cv2.imread(f'{TRAIN_ROOT_PATH}/{image_id}', cv2.IMREAD_COLOR)
        # if img is NoneType:
        #     print(f'{TRAIN_ROOT_PATH}/{image_id}')
        # image = img.copy().astype(np.float32)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        records = self.marking[self.marking['image_name'] == image_id]
        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        labels = records['impact'].values
        return image, boxes, labels

    def load_mixup_image_and_boxes(self, index):
        image, boxes, labels = self.load_image_and_boxes(index)
        r_image, r_boxes, r_labels = self.load_image_and_boxes(random.randint(0, self.image_ids.shape[0] - 1))
        return (image+r_image)/2, np.vstack((boxes, r_boxes)).astype(np.int32), np.concatenate((labels, r_labels))
    

    def load_cutmix_image_and_boxes(self, index, imsize=720):
        """ 
        This implementation of cutmix author:  https://www.kaggle.com/nvnnghia 
        Refactoring and adaptation: https://www.kaggle.com/shonenkov
        """
        w, h = imsize, imsize
        s = imsize // 2
    
        xc, yc = [int(random.uniform(imsize * 0.25, imsize * 0.75)) for _ in range(2)]  # center x, y
        indexes = [index] + [random.randint(0, self.image_ids.shape[0] - 1) for _ in range(3)]

        result_image = np.full((imsize, imsize, 3), 1, dtype=np.float32)
        result_boxes = []
        result_labels = np.array([], dtype=np.int)

        for i, index in enumerate(indexes):
            image, boxes, labels = self.load_image_and_boxes(index)
            if i == 0:
                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
            elif i == 1:  # top right
                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
            elif i == 2:  # bottom left
                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
            elif i == 3:  # bottom right
                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
            result_image[y1a:y2a, x1a:x2a] = image[y1b:y2b, x1b:x2b]
            padw = x1a - x1b
            padh = y1a - y1b

            boxes[:, 0] += padw
            boxes[:, 1] += padh
            boxes[:, 2] += padw
            boxes[:, 3] += padh

            result_boxes.append(boxes)
            result_labels = np.concatenate((result_labels, labels))

        result_boxes = np.concatenate(result_boxes, 0)
        np.clip(result_boxes[:, 0:], 0, 2 * s, out=result_boxes[:, 0:])
        result_boxes = result_boxes.astype(np.int32)
        index_to_use = np.where((result_boxes[:,2]-result_boxes[:,0])*(result_boxes[:,3]-result_boxes[:,1]) > 0)
        result_boxes = result_boxes[index_to_use]
        result_labels = result_labels[index_to_use]
        
        return result_image, result_boxes, result_labels

In [19]:
class TestDatasetRetriever(Dataset):
    def __init__(self, image_ids, transforms=None):
        super().__init__()
        self.image_ids = image_ids
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        image = cv2.imread(f'{TRAIN_VAL_ROOT_PATH}/{image_id}', cv2.IMREAD_COLOR).copy().astype(np.float32)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']
        return image, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

In [20]:
# train_dataset = DatasetRetriever(
#     # image_ids=images_train,
#     image_ids=df_folds[df_folds['fold'] != fold_number].index.values,
#     marking=video_labels,
#     transforms=get_train_transforms(),
#     test=False,
# )

# validation_dataset = DatasetRetriever(
#     # image_ids=images_valid,
#     image_ids=df_folds[df_folds['fold'] == fold_number].index.values,
#     marking=video_labels,
#     transforms=get_valid_transforms(),
#     test=True,
# )

## Fitter

In [21]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [22]:
import warnings

warnings.filterwarnings("ignore")

class Fitter:
    
    def __init__(self, model, device, config, checkpointfile, train_on_checkpoint):
        self.config = config
        self.epoch = 0

        self.base_dir = f'./{config.folder}'
        if not os.path.exists(self.base_dir):
            os.makedirs(self.base_dir)
        
        self.log_path = f'{self.base_dir}/log.txt'
        self.best_summary_loss = 10**5

        self.model = model
        self.device = device

        param_optimizer = list(self.model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ] 

        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=config.lr)
        self.scheduler = config.SchedulerClass(self.optimizer, **config.scheduler_params)
        self.log(f'Fitter prepared. Device is {self.device}')
        self.checkpointfile = checkpointfile
        self.train_on_checkpoint = train_on_checkpoint

    def fit(self, train_loader, validation_loader):
        if self.train_on_checkpoint:
            self.load(self.checkpointfile)
            print("Loading from pretrained-model: {}".format(self.checkpointfile))
        for e in range(self.config.n_epochs):
            if self.config.verbose:
                lr = self.optimizer.param_groups[0]['lr']
                timestamp = datetime.utcnow().isoformat()
                self.log(f'\n{timestamp}\nLR: {lr}')

            t = time.time()
            summary_loss = self.train_one_epoch(train_loader)

            self.log(f'[RESULT]: Train. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}')
            self.save(f'{self.base_dir}/last-checkpoint.bin')

            t = time.time()
            summary_loss = self.validation(validation_loader)

            self.log(f'[RESULT]: Val. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}')
            if summary_loss.avg < self.best_summary_loss:
                self.best_summary_loss = summary_loss.avg
                self.model.eval()
                self.save(f'{self.base_dir}/best-checkpoint-{str(self.epoch).zfill(3)}epoch.bin')
                for path in sorted(glob(f'{self.base_dir}/best-checkpoint-*epoch.bin'))[:-3]:
                    os.remove(path)

            if self.config.validation_scheduler:
                self.scheduler.step(metrics=summary_loss.avg)

            self.epoch += 1

    def validation(self, val_loader):
        self.model.eval()
        summary_loss = AverageMeter()
        t = time.time()
        for step, (images, targets, image_ids) in enumerate(val_loader):
            if self.config.verbose:
                if step % self.config.verbose_step == 0:
                    print(
                        f'Val Step {step}/{len(val_loader)}, ' + \
                        f'summary_loss: {summary_loss.avg:.5f}, ' + \
                        f'time: {(time.time() - t):.5f}', end='\r'
                    )
            with torch.no_grad():
                images = torch.stack(images)
                batch_size = images.shape[0]
                images = images.to(self.device).float()
                boxes = [target['boxes'].to(self.device).float() for target in targets]
                labels = [target['labels'].to(self.device).float() for target in targets]

                loss, _, _ = self.model(images, boxes, labels)
                summary_loss.update(loss.detach().item(), batch_size)

        return summary_loss

    def train_one_epoch(self, train_loader):
        self.model.train()
        summary_loss = AverageMeter()
        t = time.time()
        for step, (images, targets, image_ids) in enumerate(train_loader):
            if self.config.verbose:
                if step % self.config.verbose_step == 0:
                    print(
                        f'Train Step {step}/{len(train_loader)}, ' + \
                        f'summary_loss: {summary_loss.avg:.5f}, ' + \
                        f'time: {(time.time() - t):.5f}', end='\r'
                    )
            
            images = torch.stack(images)
            images = images.to(self.device).float()
            batch_size = images.shape[0]
            boxes = [target['boxes'].to(self.device).float() for target in targets]
            labels = [target['labels'].to(self.device).float() for target in targets]

            self.optimizer.zero_grad()
            
            loss, _, _ = self.model(images, boxes, labels)
            
            loss.backward()

            summary_loss.update(loss.detach().item(), batch_size)

            self.optimizer.step()

            if self.config.step_scheduler:
                self.scheduler.step()

        return summary_loss
    
    def save(self, path):
        self.model.eval()
        torch.save({
            'model_state_dict': self.model.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'scheduler_state_dict': self.scheduler.state_dict(),
            'best_summary_loss': self.best_summary_loss,
            'epoch': self.epoch,
        }, path)

    def load(self, path):
        checkpoint = torch.load(path)
        self.model.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        self.best_summary_loss = checkpoint['best_summary_loss']
        self.epoch = checkpoint['epoch'] + 1
        
    def log(self, message):
        if self.config.verbose:
            print(message)
        with open(self.log_path, 'a+') as logger:
            logger.write(f'{message}\n')

In [23]:
class TrainGlobalConfig:
    num_workers = 4
    batch_size = 4 
    val_batch_size = 16
    n_epochs = 5
    lr = 0.0002
    folder = os.path.join(outdir, 'effdet5-models')
    verbose = True
    verbose_step = 1
    step_scheduler = False
    validation_scheduler = True
    SchedulerClass = torch.optim.lr_scheduler.ReduceLROnPlateau
    scheduler_params = dict(
        mode='min',
        factor=0.5,
        patience=1,
        verbose=False, 
        threshold=0.0001,
        threshold_mode='abs',
        cooldown=0, 
        min_lr=1e-8,
        eps=1e-08
    )

In [24]:
def collate_fn(batch):
    return tuple(zip(*batch))

# def run_training():
#     if Kaggle:
#         device = torch.device('cuda:0')
#     else:
#         device = ('cuda' if torch.cuda.is_available() else 'cpu')
#     net.to(device)

#     train_loader = torch.utils.data.DataLoader(
#         train_dataset,
#         batch_size=TrainGlobalConfig.batch_size,
#         sampler=RandomSampler(train_dataset),
#         pin_memory=False,
#         drop_last=True,
#         num_workers=TrainGlobalConfig.num_workers,
#         collate_fn=collate_fn,
#     )
#     val_loader = torch.utils.data.DataLoader(
#         validation_dataset, 
#         batch_size=TrainGlobalConfig.batch_size,
#         num_workers=TrainGlobalConfig.num_workers,
#         shuffle=False,
#         sampler=SequentialSampler(validation_dataset),
#         pin_memory=False,
#         collate_fn=collate_fn,
#     )

#     fitter = Fitter(model=net, device=device, config=TrainGlobalConfig)
#     fitter.fit(train_loader, val_loader)

In [25]:
def make_predictions_withnet(net_, images, score_threshold=0.5):
    images = torch.stack(images).cuda().float()
    box_list = []
    score_list = []
    class_list = []
    with torch.no_grad():
        det = net_(images, torch.tensor([1]*images.shape[0]).float().cuda())
        for i in range(images.shape[0]):
            boxes = det[i].detach().cpu().numpy()[:,:4]    
            scores = det[i].detach().cpu().numpy()[:,4]   
            label = det[i].detach().cpu().numpy()[:,5]
            # useing only label = 2
            indexes = np.where((scores > score_threshold) & (label == 2))[0]
            boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
            boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
            if len(boxes[indexes]) == 0:
                continue
            box_list.append(boxes[indexes])
            score_list.append(scores[indexes])
            class_list.append(label[indexes]) 
    return box_list, class_list, score_list

def make_predictions_withnet_imglist(net_, images, image_ids, score_threshold=0.5):
    images = torch.stack(images).cuda().float()
    box_list = []
    score_list = []
    class_list = []
    img_id_list = []
    with torch.no_grad():
        det = net_(images, torch.tensor([1]*images.shape[0]).float().cuda())
        for i in range(images.shape[0]):
            boxes = det[i].detach().cpu().numpy()[:,:4]    
            scores = det[i].detach().cpu().numpy()[:,4]   
            label = det[i].detach().cpu().numpy()[:,5]
            # useing only label = 2
            indexes = np.where((scores > score_threshold) & (label == 2))[0]
            boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
            boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
            
            if len(boxes[indexes]) == 0:
                continue
            box_list.append(boxes[indexes])
            score_list.append(scores[indexes])
            class_list.append(label[indexes]) 
            img_id_list.append(image_ids[i])
            # print(img_id_list[-1])
    return box_list, class_list, score_list, img_id_list    

def make_predictions_withnet_loder(net_, data_loader_, score_threshold=0.5):   
    box_list = []
    score_list = []
    class_list = []
    image_id_list = []
    for images, image_ids in data_loader_:
        box_list_, class_list_, score_list_ = make_predictions_withnet(net_, images, score_threshold=score_threshold)
        box_list = box_list + box_list_
        score_list = score_list + score_list_
        class_list = class_list + class_list_
        image_id_list = image_id_list + [id for id in image_ids]
    return box_list, class_list, score_list, image_id_list

def make_predictions_validationset_withnet_loader(net_, data_loader_, val_gt_loader_, score_threshold=0.5):   
    box_list = []
    score_list = []
    class_list = []
    image_id_list = []
    val_img_id_list = []
    val_targets_list = []
    for images, image_ids in data_loader_:
        box_list_, class_list_, score_list_, img_id_list_ = make_predictions_withnet_imglist(net_, images, image_ids, score_threshold=score_threshold)
        box_list = box_list + box_list_
        score_list = score_list + score_list_
        class_list = class_list + class_list_
        # target_boxes = [target['boxes'].float() for target in targets]
        # targets_list = targets_list + target_boxes
        image_ids_index = [int(re.split('_|\\.',id)[3]) for id in img_id_list_]
        image_id_list = image_id_list + [id for id in image_ids_index]
    
    for images, targets, image_ids in val_gt_loader_:
        boxes = [target['boxes'].float() for target in targets]
        labels = [target['labels'].float() for target in targets]
        image_ids_index = [int(re.split('_|\\.',id)[3]) for id in image_ids]
        val_img_id_list = val_img_id_list + [id for id in image_ids_index]
        val_targets_list = val_targets_list + boxes

    return box_list, class_list, score_list, image_id_list, val_img_id_list, val_targets_list

In [26]:
def transfer_for_f1(pred_list, gt_list, pred_frame_id_list, gt_frame_id_list):
    pred_frame_num = len(pred_frame_id_list)
    print("Pred Frame num: ", pred_frame_num)
    gt_frame_num = len(gt_frame_id_list)
    print("Gt Frame num: ", gt_frame_num)
    if pred_frame_num != len(pred_list) or gt_frame_num != len(gt_list):
        print("ERROR in transfer_for_f1!!!!!!!!")
        return pred_list, gt_list, pred_frame_id_list, gt_frame_id_list

    preds_total = []
    gts_total = []
    preds = []
    gts = []
    new_video = True
    k = 0
    for i in range(len(pred_frame_id_list)):
        frame_id = pred_frame_id_list[i]
        new_video = (frame_id == 1)
        if new_video:
            if len(preds) > 0:
                print("pred {} video processed done, len preds {}:".format(k, len(preds)))
                preds_total.append(preds)
                k = k + 1
            preds = []
        preds.append([[frame_id, pred[0],pred[1],pred[2],pred[3]] for pred in pred_list[i] if len(pred_list[i])])
    if len(preds) > 0:
        preds_total.append(preds)
        print("pred {} video processed done, len preds {}:".format(k, len(preds)))
        k = k + 1

    new_video = True
    k = 0
    for i in range(len(gt_frame_id_list)):
        frame_id = gt_frame_id_list[i]
        new_video = (i==0) or (i>0 and i < len(gt_frame_id_list)-2 and gt_frame_id_list[i] < gt_frame_id_list[i-1] and gt_frame_id_list[i] < gt_frame_id_list[i+1])
        if new_video:
            if len(gts) > 0:
                print("gt {} video processed done, len preds {}:".format(k, len(gts)))
                gts_total.append(gts)
                k = k + 1
            gts = []
        gts.append([[frame_id, gt[0],gt[1],gt[2],gt[3]] for gt in gt_list[i]])
    if len(gts) > 0:
        gts_total.append(gts)
        print("gt {} video processed done, len preds {}:".format(k, len(gts)))
        k = k + 1

    print("preds_total len:", len(preds_total))
    print("gts_total len:", len(gts_total))
    return preds_total, gts_total

In [27]:
# a = []
# asub = []
# # asub.append([0,[g,g,g,g]] for g in range(4))
# # asub.append([0,[g,g,g,g]] for g in range(2))
# asub.append([[0, (g+i for i in range(4))]  for g in range(4)])
# asub.append([[0,g+1,g+2,g+3,g+4] for g in range(2)])
# print(asub)
# a.append(asub)
# # a = [[[0,1,1,1,1],[0,2,2,2,2],[0,4,4,4,4]],[[0,1,1,1,1],[0,2,2,2,2]]]
# np.save('test.npy',a)


In [28]:
a = [[3, id] for id in range(3)]
print(a)
a = []
if len(a)>0:
  print("21")
else:
  print("12")

[[3, 0], [3, 1], [3, 2]]
12


In [29]:
def load_net(checkpoint_path):
    config = get_efficientdet_config('tf_efficientdet_d5')
    net = EfficientDet(config, pretrained_backbone=False)
    config.num_classes = 2
    config.image_size=512
    net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))
    checkpoint = torch.load(checkpoint_path)
    net.load_state_dict(checkpoint['model_state_dict'])
    net = DetBenchEval(net, config)
    net.eval();
    return net.cuda()

In [30]:
def select_train_video_ids(fold_number):
    image_ids=np.array([path.split('/')[-1] for path in glob(f'{TRAIN_VAL_ROOT_PATH}/*.png')])
    print("image total number:", len(image_ids))
    # video_labels.loc[:, 'VID'] =  ['_'.join(re.split('_|\\.', vid)[0:2]) for vid in video_labels['video']]
    image_ids_vid = ['_'.join(re.split('_|\\.', imgid)[0:2]) for imgid in image_ids]
    vid_list = video_fold_dic['VID'][video_fold_dic['fold']==fold_number].values
    # print(vid_list)
    print("video list length:", len(vid_list))
    # print(np.unique(image_ids_vid))
    return np.array([image_ids[i] for i in range(len(image_ids)) if image_ids_vid[i] in vid_list])

In [31]:
def run_training_kfold(num_fold, train_or_predict = True):
    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    #device = torch.device('cuda:0')
    TRAIN_ON_CHECKPOINT = True
    if Kaggle:
        device = torch.device('cuda:0')
    else:
        device = ('cuda' if torch.cuda.is_available() else 'cpu')
    boxes = []
    targets = []
    pred_ids = []
    gt_ids = []
    for fold_number in range(num_fold):
        print('Fold: {}'.format(fold_number + 1))
        if fold_number == 1:
            break
        # if fold_number==4:
        #     TRAIN_ON_CHECKPOINT = True
        # else:
        #     TRAIN_ON_CHECKPOINT = False
        print("TRAIN_ON_CHECKPOINT:", TRAIN_ON_CHECKPOINT)
        train_dataset = DatasetRetriever(
            image_ids=df_folds[df_folds['fold'] != fold_number].index.values,
            marking=video_labels,
            transforms=get_train_transforms(),
            test=False,
        )
        # image_ids=df_folds[df_folds['fold'] == fold_number].index.values
        # print("Val dataset imgid:", image_ids)
        # print("Val dataset imgid type:", type(image_ids))
        validation_dataset = DatasetRetriever(
            image_ids=df_folds[df_folds['fold'] == fold_number].index.values,
            marking=video_labels,
            transforms=get_valid_transforms(),
            test=True,
        )
        # print("preparing dataset done!")
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=TrainGlobalConfig.batch_size,
            sampler=RandomSampler(train_dataset),
            pin_memory=False,
            drop_last=True,
            num_workers=TrainGlobalConfig.num_workers,
            collate_fn=collate_fn,
        )
        val_loader = torch.utils.data.DataLoader(
            validation_dataset, 
            batch_size=TrainGlobalConfig.batch_size,
            num_workers=TrainGlobalConfig.num_workers,
            shuffle=False,
            sampler=SequentialSampler(validation_dataset),
            pin_memory=False,
            collate_fn=collate_fn,
        )
        print("selecting test image ids...")
        test_image_ids = select_train_video_ids(fold_number)
        # print(test_image_ids)
        # print(type(test_image_ids))
        print("selecting {} images for fold {}".format(len(test_image_ids),fold_number))
        test_dataset = TestDatasetRetriever(
            # image_ids=np.array([path.split('/')[-1] for path in glob(f'{TRAIN_ROOT_PATH}/*.png')]),
            # image_ids=df_folds[df_folds['fold'] == fold_number].index.values,
            image_ids = test_image_ids,
            transforms=get_ooftest_valid_transforms()
        )
        test_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=16,
            shuffle=False,
            num_workers=4,
            drop_last=False,
            collate_fn=collate_fn
        )
        # print("preparing loader done")
        if train_or_predict:
            net, checkpoint_file = get_net(fold_number, TRAIN_ON_CHECKPOINT)
            net.to(device)
            TrainGlobalConfig.folder = os.path.join(outdir, f'effdet5-models/fold{fold_number}')
            fitter = Fitter(model=net, device=device, config=TrainGlobalConfig, checkpointfile=checkpoint_file, train_on_checkpoint = TRAIN_ON_CHECKPOINT)
            fitter.fit(train_loader, val_loader)
        else:
            print("WE ARE GOING TO CALCULATE CV SCORES FOR EACH VIDEOS!!!!")
            if Colab:
                models_list = os.listdir(os.path.join(MODELS_PATH, f'effdet5-models/fold{fold_number}'))
                # print("all files in models path", models_list)
                models_list = [i for i in models_list if "best" in i]
                fold_best_model = os.path.join(MODELS_PATH, f'effdet5-models/fold{fold_number}', models_list[-1])
            else:
                models_list = os.listdir(os.path.join(MODELS_PATH))
                models_list = [i for i in models_list if f'fold{fold_number}' in i]
                fold_best_model = os.path.join(MODELS_PATH, models_list[-1])
            print("loading model name: ", fold_best_model)
            net_ = load_net(fold_best_model)
            net_.to(device)  
            box_list, class_list, score_list, image_id_list, val_img_id_list, val_targets_list = make_predictions_validationset_withnet_loader(net_, test_loader, val_loader, score_threshold=SCORE_TH)
            boxes = boxes + box_list
            targets = targets + val_targets_list
            pred_ids = pred_ids + image_id_list
            gt_ids = gt_ids + val_img_id_list

    if not train_or_predict:
        print("Transfer to cv score data....")
        preds, gts = transfer_for_f1(boxes, targets, pred_ids, gt_ids)
        np.save(os.path.join(outdir, 'oof-gt.npy'), gts)
        np.save(os.path.join(outdir, 'oof-pred.npy'), preds)
        np.savez(os.path.join(outdir, 'oof'), gts=gts, preds=preds) 
        print("Transmation done!!")            

In [32]:
def iou(bbox1, bbox2):
    bbox1 = [float(x) for x in bbox1]
    bbox2 = [float(x) for x in bbox2]

    (x0_1, y0_1, x1_1, y1_1) = bbox1
    (x0_2, y0_2, x1_2, y1_2) = bbox2

    # get the overlap rectangle
    overlap_x0 = max(x0_1, x0_2)
    overlap_y0 = max(y0_1, y0_2)
    overlap_x1 = min(x1_1, x1_2)
    overlap_y1 = min(y1_1, y1_2)

    # check if there is an overlap
    if overlap_x1 - overlap_x0 <= 0 or overlap_y1 - overlap_y0 <= 0:
            return 0

    # if yes, calculate the ratio of the overlap to each ROI size and the unified size
    size_1 = (x1_1 - x0_1) * (y1_1 - y0_1)
    size_2 = (x1_2 - x0_2) * (y1_2 - y0_2)
    size_intersection = (overlap_x1 - overlap_x0) * (overlap_y1 - overlap_y0)
    size_union = size_1 + size_2 - size_intersection

    return size_intersection / size_union

In [33]:
def precision_calc(gt_boxes, pred_boxes):
    cost_matix = np.ones((len(gt_boxes), len(pred_boxes)))
    for i, box1 in enumerate(gt_boxes):
        for j, box2 in enumerate(pred_boxes):
            dist = abs(box1[0]-box2[0])
            if dist > 4:
                continue
            iou_score = iou(box1[1:], box2[1:])

            if iou_score < 0.35:
                continue
            else:
                cost_matix[i,j]=0

    row_ind, col_ind = linear_sum_assignment(cost_matix)
    fn = len(gt_boxes) - row_ind.shape[0]
    fp = len(pred_boxes) - col_ind.shape[0]
    tp=0
    for i, j in zip(row_ind, col_ind):
        if cost_matix[i,j]==0:
            tp+=1
        else:
            fp+=1
            fn+=1
    return tp, fp, fn

In [34]:
def cal_cvscore(oof_file = os.path.join(outdir, 'oof.npz'), oof_gt_file = os.path.join(outdir, 'oof-gt.npy'), oof_pred_file = os.path.join(outdir, 'oof-pred.npy')):
    #Calculating CV score.
    testdata = np.load(oof_file, allow_pickle=True)
    preds = np.load(oof_pred_file, allow_pickle=True)
    gts = np.load(oof_gt_file, allow_pickle=True)
    ftp, ffp, ffn = [], [], []
    # for count, data in enumerate(testdata):
    #     print(count)
    #     print(data)
    #     gt_boxes = data['gts']
    #     print(gt_boxes)
    #     pred_boxes = data['preds']
    #     tp, fp, fn = precision_calc(gt_boxes, pred_boxes)
    #     ftp.append(tp)
    #     ffp.append(fp)
    #     ffn.append(fn)

    for i in range(len(preds)):
        gt_boxes = gts[i]
        print("gt_boxes in video{} is len {}".format(i, len(gt_boxes)))
        print(gt_boxes[0][:3])
        pred_boxes = preds[i]
        print(pred_boxes[:15])
        print("pred_boxes in video{} is len {}".format(i, len(pred_boxes)))
        tp, fp, fn = precision_calc(gt_boxes, pred_boxes)
        ftp.append(tp)
        ffp.append(fp)
        ffn.append(fn)

    tp = np.sum(ftp)
    fp = np.sum(ffp)
    fn = np.sum(ffn)
    precision = tp / (tp + fp + 1e-6)
    recall =  tp / (tp + fn +1e-6)
    f1_score = 2*(precision*recall)/(precision+recall+1e-6)
    print(f'TP: {tp}, FP: {fp}, FN: {fn}, PRECISION: {precision:.4f}, RECALL: {recall:.4f}, F1 SCORE: {f1_score}')

In [35]:
def get_net(fold_number, train_on_checkpoint):
    # config = get_efficientdet_config('tf_efficientdet_d5')
    # print("Enter get_net, TRAIN_ON_CHECKPOINT:", train_on_checkpoint)
    config = get_efficientdet_config('tf_efficientdet_d5')

    net = EfficientDet(config, pretrained_backbone=False)

    if train_on_checkpoint:
        # print(outdir)
        checkpointfile = os.path.join(outdir, f'effdet5-models/fold{fold_number}/last-checkpoint.bin')
    else:
        checkpoint = torch.load(os.path.join(PATH, 'nfl-models/efficientdet_d5-ef44aea8.pth'))
        net.load_state_dict(checkpoint)
        checkpointfile = ''
    print("checkpointfile:", checkpointfile)
    # checkpoint = torch.load(os.path.join(PATH, 'nfl-models/efficientdet_d5-ef44aea8.pth'))
    # checkpoint = torch.load(os.path.join(PATH, 'nfl-models/efficientdet_d4-5b370b7a.pth'))
    # net.load_state_dict(checkpoint)
    config.num_classes = 2
    config.image_size = 512
    net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))
    return DetBenchTrain(net, config), checkpointfile

# net, checkpointfile = get_net()

In [36]:
# run_training()
run_training_kfold(num_fold=5, train_or_predict = TRAIN)

Fold: 1
TRAIN_ON_CHECKPOINT: True
selecting test image ids...
image total number: 44448
video list length: 12
selecting 8060 images for fold 0
WE ARE GOING TO CALCULATE CV SCORES FOR EACH VIDEOS!!!!
loading model name:  NFL/res/efficientDet-res/ED5-512/org-epoch20-aug3/effdet5-models/fold0/best-checkpoint-045epoch.bin
Fold: 2
Transfer to cv score data....
Pred Frame num:  2637
Gt Frame num:  2006
pred 0 video processed done, len preds 465:
pred 1 video processed done, len preds 1072:
pred 2 video processed done, len preds 540:
pred 3 video processed done, len preds 177:
pred 4 video processed done, len preds 383:
gt 0 video processed done, len preds 76:
gt 1 video processed done, len preds 19:
gt 2 video processed done, len preds 4:
gt 3 video processed done, len preds 62:
gt 4 video processed done, len preds 19:
gt 5 video processed done, len preds 4:
gt 6 video processed done, len preds 54:
gt 7 video processed done, len preds 92:
gt 8 video processed done, len preds 74:
gt 9 video p

In [37]:
# print(gts)

In [38]:
if not TRAIN:
    print("Calculating cv scores!!")
    cal_cvscore()

Calculating cv scores!!
gt_boxes in video0 is len 76
[[103, tensor(131.5556), tensor(258.8000), tensor(147.2000), tensor(267.6000)], [103, tensor(188.4444), tensor(142.4000), tensor(209.0667), tensor(153.2000)], [103, tensor(210.4889), tensor(119.6000), tensor(226.1333), tensor(128.8000)]]
[[[440, 300.46188, 274.1194, 308.27188, 286.67383]], [[448, 289.10715, 275.25616, 297.90555, 293.18146]], [[449, 291.15408, 276.40753, 299.4592, 293.5833], [449, 299.3325, 285.54956, 305.62302, 293.66235]], [[450, 289.16653, 276.67752, 297.9338, 294.15665], [450, 300.111, 285.03403, 306.54904, 294.02066]], [[451, 289.34167, 276.16528, 298.05408, 293.84802], [451, 300.25223, 284.88904, 306.9608, 294.2168]], [[452, 299.32935, 284.06546, 306.1618, 294.11008], [452, 289.89648, 277.66656, 298.23322, 294.22247]], [[41, 222.14024, 211.70863, 230.51152, 229.62093]], [[42, 224.18207, 216.90019, 230.89047, 229.73042]], [[43, 223.27008, 218.63528, 229.75018, 230.53311]], [[44, 220.6662, 219.92197, 227.13757, 23

TypeError: ignored

In [None]:
# clearing working dir
# be careful when running this code on local environment!
# !rm -rf *
# !mv * /tmp/train_images

# import shutil
# shutil.rmtree(out_dir)  