This notebook detects 2 class objects.
- class1: helmet without impact
- class2: helmet with impact

Object Detection part is based on [EfficientDet notebook](https://www.kaggle.com/shonenkov/training-efficientdet) for [global wheat detection competition](https://www.kaggle.com/c/global-wheat-detection) by [shonenkov](https://www.kaggle.com/shonenkov), which is using [github repos efficientdet-pytorch](https://github.com/rwightman/efficientdet-pytorch) by [@rwightman](https://www.kaggle.com/rwightman).

Inference part can be foud [here](https://www.kaggle.com/its7171/2class-object-detection-inference/).

In [1]:
Kaggle = False
Colab = !Kaggle

In [2]:
import os, sys
from pathlib import Path

if Colab:
    from google.colab import drive
    drive.mount('/content/drive')

    path = "/content/drive/My Drive"
    os.chdir(path)
    os.listdir(path)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
if Kaggle:
    !pip install ../input/nfl-lib/timm-0.1.26-py3-none-any.whl
    !tar xfz ../input/nfl-lib/pkgs.tgz
else:
    !pip install NFL/nfl-lib/timm-0.1.26-py3-none-any.whl
    !tar xfz NFL/nfl-lib/pkgs.tgz    
    !pip install albumentations==0.4.6
# for pytorch1.6
cmd = "sed -i -e 's/ \/ / \/\/ /' timm-efficientdet-pytorch/effdet/bench.py"
!$cmd



In [4]:
import sys
sys.path.insert(0, "timm-efficientdet-pytorch")
sys.path.insert(0, "omegaconf")

import torch
import os
from datetime import datetime
import time
import random
import cv2
import pandas as pd
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GroupKFold
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from glob import glob
import pandas as pd
from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain
from effdet.efficientdet import HeadNet
from tqdm import tqdm

SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(SEED)

# Data Preparation

In [5]:
if Kaggle:
    BASEPATH = "../input/nfl-impact-detection"
    outdir = '.'
    TRAIN_IMGPATH = outdir
else:
    PATH = 'NFL/'
    BASEPATH = PATH + 'Data'
    TRAIN_IMGPATH = BASEPATH
    outdir = Path(PATH+'res')
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    outdir = Path(PATH+'res/efficientDet-res')
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    # MODELNAME = "ED5-512"
    # MODELNAME = "ED4-512"
    MODELNAME = "ED6-512"
    VERSION = '{}'.format(MODELNAME)
    outdir = os.path.join(outdir, VERSION)
    if not os.path.exists(outdir):
        os.mkdir(outdir)
    # from datetime import datetime, timedelta
    # dateTimeObj = datetime.now()
    # timestampStr = dateTimeObj.strftime("%d-%b-%Y-%H")
    # modelpath = os.path.join(outdir, 'all-kfold-hairaug-456-norm-metanew-b16')
    timestampStr = 'org-epoch20-aug3-gkfold'
    outdir = os.path.join(outdir, timestampStr)
    if not os.path.exists(outdir):
        os.mkdir(outdir)  

In [6]:
# video_labels = pd.read_csv(os.path.join(BASEPATH, 'train_labels.csv')).fillna(0)
# video_labels_with_impact = video_labels[video_labels['impact'] > 0]
# for row in tqdm(video_labels_with_impact[['video','frame','label']].values):
#     frames = np.array([-4,-3,-2,-1,1,2,3,4])+row[1]
#     video_labels.loc[(video_labels['video'] == row[0]) 
#                                  & (video_labels['frame'].isin(frames))
#                                  & (video_labels['label'] == row[2]), 'impact'] = 1
# video_labels['image_name'] = video_labels['video'].str.replace('.mp4', '') + '_' + video_labels['frame'].astype(str) + '.png'
# video_labels = video_labels[video_labels.groupby('image_name')['impact'].transform("sum") > 0].reset_index(drop=True)
# video_labels['impact'] = video_labels['impact'].astype(int)+1
# video_labels['x'] = video_labels['left']
# video_labels['y'] = video_labels['top']
# video_labels['w'] = video_labels['width']
# video_labels['h'] = video_labels['height']
# video_labels.head()

import re
video_labels = pd.read_csv(os.path.join(BASEPATH,'video_labels.csv'), index_col=0)
video_labels.loc[:,'VID'] = ''
video_labels.loc[:, 'VID'] =  ['_'.join(re.split('_|\\.', vid)[0:2]) for vid in video_labels['video']]
display(video_labels.head())
print(video_labels.shape)

Unnamed: 0,gameKey,playID,view,video,frame,label,left,width,top,height,impact,impactType,confidence,visibility,image_name,x,y,w,h,VID
0,57583,82,Endzone,57583_000082_Endzone.mp4,34,V73,655,21,331,15,1,0,0.0,0.0,57583_000082_Endzone_34.png,655,331,21,15,57583_000082
1,57583,82,Endzone,57583_000082_Endzone.mp4,34,H99,583,21,312,30,2,0,0.0,0.0,57583_000082_Endzone_34.png,583,312,21,30,57583_000082
2,57583,82,Endzone,57583_000082_Endzone.mp4,34,V15,1069,22,301,20,1,0,0.0,0.0,57583_000082_Endzone_34.png,1069,301,22,20,57583_000082
3,57583,82,Endzone,57583_000082_Endzone.mp4,34,H97,402,21,313,29,1,0,0.0,0.0,57583_000082_Endzone_34.png,402,313,21,29,57583_000082
4,57583,82,Endzone,57583_000082_Endzone.mp4,34,V72,445,21,328,16,1,0,0.0,0.0,57583_000082_Endzone_34.png,445,328,21,16,57583_000082


(197838, 20)


In [7]:
# display(video_labels.head())
# print(video_labels.shape)
# video_labels.to_csv(os.path.join(BASEPATH,'video_labels.csv'), index=False)

In [8]:
# np.random.seed(0)
# video_names = np.random.permutation(video_labels.video.unique())
# valid_video_len = int(len(video_names)*0.2)
# video_valid = video_names[:valid_video_len]
# video_train = video_names[valid_video_len:]
# images_valid = video_labels[ video_labels.video.isin(video_valid)].image_name.unique()
# images_train = video_labels[~video_labels.video.isin(video_valid)].image_name.unique()


# Stratified K-Fold

In [9]:
# skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# df_folds = video_labels[['image_name']].copy()
# df_folds.loc[:, 'bbox_count'] = 1
# df_folds = df_folds.groupby('image_name').count()
# df_folds.loc[:, 'video'] = video_labels[['image_name', 'video']].groupby('image_name').min()['video']
# df_folds.loc[:, 'stratify_group'] = np.char.add(
#     df_folds['video'].values.astype(str),
#     df_folds['bbox_count'].apply(lambda x: f'_{x // 20}').values.astype(str),
# )

# df_folds.loc[:, 'fold'] = 0
# for fold_number, (train_index, val_index) in enumerate(skf.split(X=df_folds.index, y=df_folds['stratify_group'])):
#     df_folds.loc[df_folds.iloc[val_index].index, 'fold'] = fold_number

## Group K-Fold

In [10]:
gkf = GroupKFold(n_splits=5)
df_folds = video_labels[['image_name']].copy()
df_folds.loc[:, 'bbox_count'] = 1
df_folds = df_folds.groupby('image_name').count()
df_folds.loc[:, 'video'] = video_labels[['image_name', 'video']].groupby('image_name').min()['video']
df_folds.loc[:, 'stratify_group'] = video_labels[['image_name', 'VID']].groupby('image_name').min()['VID']
df_folds.loc[:, 'fold'] = 0
for fold_number, (train_index, val_index) in enumerate(gkf.split(X=df_folds.index, groups=df_folds['stratify_group'])):
    df_folds.loc[df_folds.iloc[val_index].index, 'fold'] = fold_number
print(display(df_folds))    

Unnamed: 0_level_0,bbox_count,video,stratify_group,fold
image_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
57583_000082_Endzone_106.png,20,57583_000082_Endzone.mp4,57583_000082,1
57583_000082_Endzone_107.png,19,57583_000082_Endzone.mp4,57583_000082,1
57583_000082_Endzone_108.png,19,57583_000082_Endzone.mp4,57583_000082,1
57583_000082_Endzone_109.png,20,57583_000082_Endzone.mp4,57583_000082,1
57583_000082_Endzone_110.png,20,57583_000082_Endzone.mp4,57583_000082,1
...,...,...,...,...
58107_004362_Sideline_68.png,21,58107_004362_Sideline.mp4,58107_004362,0
58107_004362_Sideline_69.png,21,58107_004362_Sideline.mp4,58107_004362,0
58107_004362_Sideline_70.png,21,58107_004362_Sideline.mp4,58107_004362,0
58107_004362_Sideline_71.png,21,58107_004362_Sideline.mp4,58107_004362,0


None


In [11]:
for i in range(5):
    print(list(df_folds['fold'].values).count(i))

2006
2004
2006
2003
1998


In [12]:
video_fold_dic = pd.DataFrame(columns=['VID','fold'])
for vid in df_folds.stratify_group.unique():
    fold = df_folds.fold[df_folds.stratify_group==vid].unique()
    print(vid, fold)
    dftemp = pd.DataFrame({'VID': [vid],
                    'fold': [fold[0]]})
    video_fold_dic = video_fold_dic.append(dftemp, ignore_index= True)

display(video_fold_dic.head())

57583_000082 [1]
57584_000336 [2]
57584_002674 [2]
57586_000540 [4]
57586_001934 [2]
57586_004152 [1]
57594_000923 [0]
57596_002686 [1]
57597_000658 [0]
57597_001242 [3]
57676_003572 [3]
57679_003316 [4]
57680_002206 [3]
57680_003470 [0]
57682_002630 [3]
57684_001985 [1]
57686_002546 [1]
57700_001264 [1]
57775_000933 [1]
57778_004244 [0]
57781_000252 [4]
57782_000600 [1]
57783_003374 [2]
57784_001741 [2]
57785_002026 [2]
57786_003085 [1]
57787_003413 [4]
57788_000781 [2]
57790_002792 [4]
57790_002839 [0]
57904_001367 [3]
57905_002404 [1]
57906_000718 [3]
57907_003615 [0]
57910_001164 [4]
57911_000147 [1]
57911_002492 [4]
57912_001325 [2]
57913_000218 [4]
57915_003093 [2]
57992_000301 [4]
57992_000350 [3]
57993_000475 [0]
57995_000109 [0]
57997_003691 [3]
57998_002181 [0]
58000_001306 [2]
58005_001254 [3]
58005_001612 [2]
58048_000086 [3]
58093_001923 [2]
58094_000423 [4]
58094_002819 [3]
58095_004022 [1]
58098_001193 [0]
58102_002798 [4]
58103_003494 [0]
58104_000352 [3]
58106_002918 [

Unnamed: 0,VID,fold
0,57583_000082,1
1,57584_000336,2
2,57584_002674,2
3,57586_000540,4
4,57586_001934,2


In [13]:
def mk_images(video_name, video_labels, video_dir, out_dir, only_with_impact=True):
    video_path=f"{video_dir}/{video_name}"
    video_name = os.path.basename(video_path)
    vidcap = cv2.VideoCapture(video_path)
    if only_with_impact:
        boxes_all = video_labels.query("video == @video_name")
        print(video_path, boxes_all[boxes_all.impact > 1.0].shape[0])
    else:
        print(video_path)
    frame = 0
    while True:
        it_worked, img = vidcap.read()
        if not it_worked:
            break
        frame += 1
        if only_with_impact:
            boxes = video_labels.query("video == @video_name and frame == @frame")
            boxes_with_impact = boxes[boxes.impact > 1.0]
            if boxes_with_impact.shape[0] == 0:
                continue
        img_name = f"{video_name}_frame{frame}"
        image_path = f'{out_dir}/{video_name}'.replace('.mp4',f'_{frame}.png')
        _ = cv2.imwrite(image_path, img)

In [14]:
uniq_video = video_labels.video.unique()
# video_dir = '/kaggle/input/nfl-impact-detection/train'
video_dir = os.path.join(BASEPATH, 'train')
out_dir = os.path.join(TRAIN_IMGPATH, 'train_images')
print(out_dir)
# !mkdir -p $out_dir
# for video_name in uniq_video:
#     mk_images(video_name, video_labels, video_dir, out_dir)

NFL/Data/train_images


## Albumentations

In [15]:
def get_train_transforms():
    return A.Compose(
        [
            A.HorizontalFlip(p=0.5),
            # A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=10, interpolation=1, border_mode=4, 
            #                    value=None, mask_value=None, always_apply=False, p=0.5),
            A.OneOf([
              A.RandomSizedCrop(min_max_height=(500, 720), height=720, width=720, p=1.0),
              A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=10, interpolation=1, border_mode=4, 
                                value=None, mask_value=None, always_apply=False, p=1.0),
            ], p=0.5),
            A.OneOf([
                A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, 
                                     val_shift_limit=0.2, p=0.9),
                A.RandomBrightnessContrast(brightness_limit=0.2, 
                                           contrast_limit=0.2, p=0.9),
            ], p=0.4),
            A.JpegCompression(quality_lower=85, quality_upper=95, p=0.2),
            A.Resize(height=512, width=512, p=1),
            A.OneOf([
                A.Blur(blur_limit=3, p=1.0),
                A.MedianBlur(blur_limit=3, p=1.0),
            ],p=0.1),
            A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.3),
            # A.Blur(blur_limit=5, always_apply=False, p=0.1),
            # A.HueSaturationValue(hue_shift_limit=20, sat_shift_limit=30, val_shift_limit=20, always_apply=False, p=0.1),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0, 
            min_visibility=0,
            label_fields=['labels']
        )
    )

def get_valid_transforms():
    return A.Compose(
        [
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0, 
            min_visibility=0,
            label_fields=['labels']
        )
    )

## Dataset

In [16]:
TRAIN_ROOT_PATH = os.path.join(TRAIN_IMGPATH, 'train_images')

class DatasetRetriever(Dataset):

    def __init__(self, marking, image_ids, transforms=None, test=False):
        super().__init__()

        self.image_ids = image_ids
        self.marking = marking
        self.transforms = transforms
        self.test = test

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        
        # image, boxes, labels = self.load_image_and_boxes(index)
        if self.test or random.random() > 0.5:
            image, boxes, labels = self.load_image_and_boxes(index)
        elif random.random() > 0.33:
            image, boxes, labels = self.load_cutmix_image_and_boxes(index)
        else:
            image, boxes, labels = self.load_mixup_image_and_boxes(index)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = torch.tensor(labels)
        target['image_id'] = torch.tensor([index])

        if self.transforms:
            for i in range(10):
                sample = self.transforms(**{
                    'image': image,
                    'bboxes': target['boxes'],
                    'labels': labels
                })
                if len(sample['bboxes']) > 0:
                    image = sample['image']
                    target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
                    target['boxes'][:,[0,1,2,3]] = target['boxes'][:,[1,0,3,2]]  #yxyx: be warning
                    break
        return image, target, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

    def load_image_and_boxes(self, index):
        image_id = self.image_ids[index]
        # print(f'{TRAIN_ROOT_PATH}/{image_id}')
        image = cv2.imread(f'{TRAIN_ROOT_PATH}/{image_id}', cv2.IMREAD_COLOR).copy().astype(np.float32)
        # img = cv2.imread(f'{TRAIN_ROOT_PATH}/{image_id}', cv2.IMREAD_COLOR)
        # if img is NoneType:
        #     print(f'{TRAIN_ROOT_PATH}/{image_id}')
        # image = img.copy().astype(np.float32)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        records = self.marking[self.marking['image_name'] == image_id]
        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        labels = records['impact'].values
        return image, boxes, labels

    def load_mixup_image_and_boxes(self, index):
        image, boxes, labels = self.load_image_and_boxes(index)
        r_image, r_boxes, r_labels = self.load_image_and_boxes(random.randint(0, self.image_ids.shape[0] - 1))
        return (image+r_image)/2, np.vstack((boxes, r_boxes)).astype(np.int32), np.concatenate((labels, r_labels))
    

    def load_cutmix_image_and_boxes(self, index, imsize=720):
        """ 
        This implementation of cutmix author:  https://www.kaggle.com/nvnnghia 
        Refactoring and adaptation: https://www.kaggle.com/shonenkov
        """
        w, h = imsize, imsize
        s = imsize // 2
    
        xc, yc = [int(random.uniform(imsize * 0.25, imsize * 0.75)) for _ in range(2)]  # center x, y
        indexes = [index] + [random.randint(0, self.image_ids.shape[0] - 1) for _ in range(3)]

        result_image = np.full((imsize, imsize, 3), 1, dtype=np.float32)
        result_boxes = []
        result_labels = np.array([], dtype=np.int)

        for i, index in enumerate(indexes):
            image, boxes, labels = self.load_image_and_boxes(index)
            if i == 0:
                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
            elif i == 1:  # top right
                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
            elif i == 2:  # bottom left
                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
            elif i == 3:  # bottom right
                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
            result_image[y1a:y2a, x1a:x2a] = image[y1b:y2b, x1b:x2b]
            padw = x1a - x1b
            padh = y1a - y1b

            boxes[:, 0] += padw
            boxes[:, 1] += padh
            boxes[:, 2] += padw
            boxes[:, 3] += padh

            result_boxes.append(boxes)
            result_labels = np.concatenate((result_labels, labels))

        result_boxes = np.concatenate(result_boxes, 0)
        np.clip(result_boxes[:, 0:], 0, 2 * s, out=result_boxes[:, 0:])
        result_boxes = result_boxes.astype(np.int32)
        index_to_use = np.where((result_boxes[:,2]-result_boxes[:,0])*(result_boxes[:,3]-result_boxes[:,1]) > 0)
        result_boxes = result_boxes[index_to_use]
        result_labels = result_labels[index_to_use]
        
        return result_image, result_boxes, result_labels

In [17]:
# train_dataset = DatasetRetriever(
#     # image_ids=images_train,
#     image_ids=df_folds[df_folds['fold'] != fold_number].index.values,
#     marking=video_labels,
#     transforms=get_train_transforms(),
#     test=False,
# )

# validation_dataset = DatasetRetriever(
#     # image_ids=images_valid,
#     image_ids=df_folds[df_folds['fold'] == fold_number].index.values,
#     marking=video_labels,
#     transforms=get_valid_transforms(),
#     test=True,
# )

## Fitter

In [18]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [19]:
import warnings

warnings.filterwarnings("ignore")

class Fitter:
    
    def __init__(self, model, device, config, checkpointfile, train_on_checkpoint):
        self.config = config
        self.epoch = 0

        self.base_dir = f'./{config.folder}'
        if not os.path.exists(self.base_dir):
            os.makedirs(self.base_dir)
        
        self.log_path = f'{self.base_dir}/log.txt'
        self.best_summary_loss = 10**5

        self.model = model
        self.device = device

        param_optimizer = list(self.model.named_parameters())
        no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
        optimizer_grouped_parameters = [
            {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.001},
            {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0}
        ] 

        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=config.lr)
        self.scheduler = config.SchedulerClass(self.optimizer, **config.scheduler_params)
        self.log(f'Fitter prepared. Device is {self.device}')
        self.checkpointfile = checkpointfile
        self.train_on_checkpoint = train_on_checkpoint

    def fit(self, train_loader, validation_loader):
        if self.train_on_checkpoint:
            self.load(self.checkpointfile)
            print("Loading from pretrained-model: {}".format(self.checkpointfile))
        for e in range(self.config.n_epochs):
            if self.config.verbose:
                lr = self.optimizer.param_groups[0]['lr']
                timestamp = datetime.utcnow().isoformat()
                self.log(f'\n{timestamp}\nLR: {lr}')

            t = time.time()
            summary_loss = self.train_one_epoch(train_loader)

            self.log(f'[RESULT]: Train. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}')
            self.save(f'{self.base_dir}/last-checkpoint.bin')

            t = time.time()
            summary_loss = self.validation(validation_loader)

            self.log(f'[RESULT]: Val. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}')
            if summary_loss.avg < self.best_summary_loss:
                self.best_summary_loss = summary_loss.avg
                self.model.eval()
                self.save(f'{self.base_dir}/best-checkpoint-{str(self.epoch).zfill(3)}epoch.bin')
                for path in sorted(glob(f'{self.base_dir}/best-checkpoint-*epoch.bin'))[:-3]:
                    os.remove(path)

            if self.config.validation_scheduler:
                self.scheduler.step(metrics=summary_loss.avg)

            self.epoch += 1

    def validation(self, val_loader):
        self.model.eval()
        summary_loss = AverageMeter()
        t = time.time()
        for step, (images, targets, image_ids) in enumerate(val_loader):
            if self.config.verbose:
                if step % self.config.verbose_step == 0:
                    print(
                        f'Val Step {step}/{len(val_loader)}, ' + \
                        f'summary_loss: {summary_loss.avg:.5f}, ' + \
                        f'time: {(time.time() - t):.5f}', end='\r'
                    )
            with torch.no_grad():
                images = torch.stack(images)
                batch_size = images.shape[0]
                images = images.to(self.device).float()
                boxes = [target['boxes'].to(self.device).float() for target in targets]
                labels = [target['labels'].to(self.device).float() for target in targets]

                loss, _, _ = self.model(images, boxes, labels)
                summary_loss.update(loss.detach().item(), batch_size)

        return summary_loss

    def train_one_epoch(self, train_loader):
        self.model.train()
        summary_loss = AverageMeter()
        t = time.time()
        for step, (images, targets, image_ids) in enumerate(train_loader):
            if self.config.verbose:
                if step % self.config.verbose_step == 0:
                    print(
                        f'Train Step {step}/{len(train_loader)}, ' + \
                        f'summary_loss: {summary_loss.avg:.5f}, ' + \
                        f'time: {(time.time() - t):.5f}', end='\r'
                    )
            
            images = torch.stack(images)
            images = images.to(self.device).float()
            batch_size = images.shape[0]
            boxes = [target['boxes'].to(self.device).float() for target in targets]
            labels = [target['labels'].to(self.device).float() for target in targets]

            self.optimizer.zero_grad()
            
            loss, _, _ = self.model(images, boxes, labels)
            
            loss.backward()

            summary_loss.update(loss.detach().item(), batch_size)

            self.optimizer.step()

            if self.config.step_scheduler:
                self.scheduler.step()

        return summary_loss
    
    def save(self, path):
        self.model.eval()
        torch.save({
            'model_state_dict': self.model.model.state_dict(),
            'optimizer_state_dict': self.optimizer.state_dict(),
            'scheduler_state_dict': self.scheduler.state_dict(),
            'best_summary_loss': self.best_summary_loss,
            'epoch': self.epoch,
        }, path)

    def load(self, path):
        checkpoint = torch.load(path)
        self.model.model.load_state_dict(checkpoint['model_state_dict'])
        self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
        self.best_summary_loss = checkpoint['best_summary_loss']
        self.epoch = checkpoint['epoch'] + 1
        
    def log(self, message):
        if self.config.verbose:
            print(message)
        with open(self.log_path, 'a+') as logger:
            logger.write(f'{message}\n')

In [20]:
class TrainGlobalConfig:
    num_workers = 4
    batch_size = 4 
    n_epochs = 6
    lr = 0.0002
    folder = os.path.join(outdir, 'effdet6-models')
    verbose = True
    verbose_step = 1
    step_scheduler = False
    validation_scheduler = True
    SchedulerClass = torch.optim.lr_scheduler.ReduceLROnPlateau
    scheduler_params = dict(
        mode='min',
        factor=0.5,
        patience=1,
        verbose=False, 
        threshold=0.0001,
        threshold_mode='abs',
        cooldown=0, 
        min_lr=1e-8,
        eps=1e-08
    )

In [21]:
def collate_fn(batch):
    return tuple(zip(*batch))

# def run_training():
#     if Kaggle:
#         device = torch.device('cuda:0')
#     else:
#         device = ('cuda' if torch.cuda.is_available() else 'cpu')
#     net.to(device)

#     train_loader = torch.utils.data.DataLoader(
#         train_dataset,
#         batch_size=TrainGlobalConfig.batch_size,
#         sampler=RandomSampler(train_dataset),
#         pin_memory=False,
#         drop_last=True,
#         num_workers=TrainGlobalConfig.num_workers,
#         collate_fn=collate_fn,
#     )
#     val_loader = torch.utils.data.DataLoader(
#         validation_dataset, 
#         batch_size=TrainGlobalConfig.batch_size,
#         num_workers=TrainGlobalConfig.num_workers,
#         shuffle=False,
#         sampler=SequentialSampler(validation_dataset),
#         pin_memory=False,
#         collate_fn=collate_fn,
#     )

#     fitter = Fitter(model=net, device=device, config=TrainGlobalConfig)
#     fitter.fit(train_loader, val_loader)

In [22]:
def run_training_kfold(num_fold):
    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    #device = torch.device('cuda:0')
    TRAIN_ON_CHECKPOINT = True
    if Kaggle:
        device = torch.device('cuda:0')
    else:
        device = ('cuda' if torch.cuda.is_available() else 'cpu')
    for fold_number in range(num_fold):
        print('Fold: {}'.format(fold_number + 1))
        if fold_number<2:
            continue
        if fold_number == 3:
            continue
        # if fold_number==3:
        #     TRAIN_ON_CHECKPOINT = True
        # else:
        #     TRAIN_ON_CHECKPOINT = False
        print("TRAIN_ON_CHECKPOINT:", TRAIN_ON_CHECKPOINT)
        train_dataset = DatasetRetriever(
            image_ids=df_folds[df_folds['fold'] != fold_number].index.values,
            marking=video_labels,
            transforms=get_train_transforms(),
            test=False,
        )

        validation_dataset = DatasetRetriever(
            image_ids=df_folds[df_folds['fold'] == fold_number].index.values,
            marking=video_labels,
            transforms=get_valid_transforms(),
            test=True,
        )
        # print("preparing dataset done!")
        train_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=TrainGlobalConfig.batch_size,
            sampler=RandomSampler(train_dataset),
            pin_memory=False,
            drop_last=True,
            num_workers=TrainGlobalConfig.num_workers,
            collate_fn=collate_fn,
        )
        val_loader = torch.utils.data.DataLoader(
            validation_dataset, 
            batch_size=TrainGlobalConfig.batch_size,
            num_workers=TrainGlobalConfig.num_workers,
            shuffle=False,
            sampler=SequentialSampler(validation_dataset),
            pin_memory=False,
            collate_fn=collate_fn,
        )
        # print("preparing loader done")
        net, checkpoint_file = get_net(fold_number, TRAIN_ON_CHECKPOINT)
        net.to(device)
        TrainGlobalConfig.folder = os.path.join(outdir, f'effdet6-models/fold{fold_number}')
        fitter = Fitter(model=net, device=device, config=TrainGlobalConfig, checkpointfile=checkpoint_file, train_on_checkpoint = TRAIN_ON_CHECKPOINT)
        fitter.fit(train_loader, val_loader)

In [23]:
def get_net(fold_number, train_on_checkpoint):
    # config = get_efficientdet_config('tf_efficientdet_d5')
    # print("Enter get_net, TRAIN_ON_CHECKPOINT:", train_on_checkpoint)
    config = get_efficientdet_config('tf_efficientdet_d6')

    net = EfficientDet(config, pretrained_backbone=False)

    if train_on_checkpoint:
        # print(outdir)
        checkpointfile = os.path.join(outdir, f'effdet6-models/fold{fold_number}/last-checkpoint.bin')
    else:
        checkpoint = torch.load(os.path.join(PATH, 'nfl-models/efficientdet_d6-51cb0132.pth'))
        net.load_state_dict(checkpoint)
        checkpointfile = ''
    print("checkpointfile:", checkpointfile)
    # checkpoint = torch.load(os.path.join(PATH, 'nfl-models/efficientdet_d5-ef44aea8.pth'))
    # checkpoint = torch.load(os.path.join(PATH, 'nfl-models/efficientdet_d4-5b370b7a.pth'))
    # net.load_state_dict(checkpoint)
    config.num_classes = 2
    config.image_size = 512
    net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))
    return DetBenchTrain(net, config), checkpointfile

# net, checkpointfile = get_net()

In [24]:
# run_training()
run_training_kfold(num_fold=5)

Fold: 1
Fold: 2
Fold: 3
TRAIN_ON_CHECKPOINT: True
checkpointfile: NFL/res/efficientDet-res/ED6-512/org-epoch20-aug3-gkfold/effdet6-models/fold2/last-checkpoint.bin
Fitter prepared. Device is cuda
Loading from pretrained-model: NFL/res/efficientDet-res/ED6-512/org-epoch20-aug3-gkfold/effdet6-models/fold2/last-checkpoint.bin

2020-12-20T23:53:00.672244
LR: 5e-05
[RESULT]: Train. Epoch: 17, summary_loss: 0.40716, time: 1348.67126
[RESULT]: Val. Epoch: 17, summary_loss: 0.40704, time: 95.99240

2020-12-21T00:17:09.143415
LR: 2.5e-05
[RESULT]: Train. Epoch: 18, summary_loss: 0.39526, time: 1352.59059
[RESULT]: Val. Epoch: 18, summary_loss: 0.40439, time: 93.80027

2020-12-21T00:41:18.440923
LR: 2.5e-05
[RESULT]: Train. Epoch: 19, summary_loss: 0.38558, time: 1328.18512
[RESULT]: Val. Epoch: 19, summary_loss: 0.40910, time: 94.77549

2020-12-21T01:05:04.241688
LR: 1.25e-05
[RESULT]: Train. Epoch: 20, summary_loss: 0.38122, time: 1336.46257
[RESULT]: Val. Epoch: 20, summary_loss: 0.41520, tim

In [25]:
# clearing working dir
# be careful when running this code on local environment!
# !rm -rf *
# !mv * /tmp/train_images

# import shutil
# shutil.rmtree(out_dir)  