In [None]:
! nvidia-smi

In [None]:
%cd ../input/effdetapex/apex
!pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ 
%cd ../

In [None]:
!pip install --no-deps '/kaggle/input/efficientdetpytorch/efficientdet-pytorch/timm-0.1.30-py3-none-any.whl' > /dev/null
!pip install --no-deps '/kaggle/input/pycocotools/pycocotools-2.0-cp37-cp37m-linux_x86_64.whl' > /dev/null
!pip install --no-deps '/kaggle/input/efficientdetpytorch/efficientdet-pytorch/omegaconf-2.0.0-py3-none-any.whl' > /dev/null

## Dependencies and imports

In [None]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2,3"
import json
import zipfile
# import gluoncv as gcv
import torch
from datetime import datetime
import time
import random
import cv2
import pandas as pd
import numpy as np
import numba
import ast
from numba import jit
from typing import List, Union, Tuple
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler

from glob import glob
import logging
import math
import copy
from tqdm import tqdm
from timm.utils import *
from timm.optim import create_optimizer
from timm.scheduler import create_scheduler
from timm.data.distributed_sampler import OrderedDistributedSampler
from tqdm import tqdm

import sys
sys.path.insert(0, '/kaggle/input/efficientdetpytorch/efficientdet-pytorch')
sys.path.insert(0, '/kaggle/input/weightedboxesfusion')

from effdet import get_efficientdet_config, EfficientDet, DetBenchTrain,DetBenchPredict
from effdet.efficientdet import HeadNet
import ensemble_boxes
from ensemble_boxes import *
import gc
import re
import matplotlib.pyplot as plt
from itertools import product
try:
    from apex import amp
    from apex.parallel import DistributedDataParallel as DDP
    from apex.parallel import convert_syncbn_model
    has_apex = True
except ImportError:
    from torch.nn.parallel import DistributedDataParallel as DDP
    has_apex = False

# define seed and image size
SEED = 42
AREA_SMALL = 56 * 56
IMG_SIZE = 1024
RESIZE_IMG_SIZE = 1024
NUM_CLASSES = 1
IMAGENET_DEFAULT_MEAN = [x * 255 for x in (0.485, 0.456, 0.406)]
IMAGENET_DEFAULT_STD = [x * 255 for x in (0.229, 0.224, 0.225)]
TRAIN_IMAGE_PATH = '/kaggle/input/global-wheat-detection/train'
TEST_IMAGE_PATH = '/kaggle/input/global-wheat-detection/test'
TRAIN_CSV_PATH = '/kaggle/input/global-wheat-detection/train.csv'

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

## GlobalConfig

In [None]:
class GlobalConfig:
    num_workers = 2
    pseudo_train_batch_size = 2
    test_batch_size = 1
    n_epochs = 5 # n_epochs = 40
    lr = 1e-4
    warmup_epoch = 0
    checkpoint = False
    weight_decay = 4e-5
    eps = 1e-3
    nbs = 64
    folder = '/kaggle/working/effdet5-1024-pseudo'
    
    # multi-gpu training
    distributed = False
    
    verbose = True
    verbose_step = 1
 
    step_scheduler = True  # do scheduler.step after optimizer.step
    validation_scheduler = False  # do scheduler.step after validation stage loss

    SchedulerClass = torch.optim.lr_scheduler.CosineAnnealingLR
    
    checkpoint_path = '/kaggle/input/efficientded/effdet5-1024-last.bin'
    config_name = 'tf_efficientdet_d5'
    submission_path = '/kaggle/working/submission.csv'
    final_checkpoint_path='/kaggle/working/effdet5-1024-pseudo/best-checkpoint.bin'
    
    tta_score_thr = 0.05
    tta_wbf_iou_thr = 0.34
    tta_wbf_skip_box_thr =0.42
    
    
    final_tta_score_thr = 0.05
    final_tta_wbf_iou_thr = 0.34
    final_tta_wbf_skip_box_thr =0.48
    

## Data Augmentations

In [None]:
class BaseWheatTTA:
    """ author: @shonenkov """
    image_size = IMG_SIZE

    def augment(self, image):
        raise NotImplementedError
    
    def batch_augment(self, images):
        raise NotImplementedError
    
    def deaugment_boxes(self, boxes):
        raise NotImplementedError

class TTAHorizontalFlip(BaseWheatTTA):
    """ author: @shonenkov """

    def augment(self, image):
        return image.flip(1)
    
    def batch_augment(self, images):
        return images.flip(2)
    
    def deaugment_boxes(self, boxes):
        boxes[:, [1,3]] = self.image_size - boxes[:, [3,1]]
        return boxes

class TTAVerticalFlip(BaseWheatTTA):
    """ author: @shonenkov """
    
    def augment(self, image):
        return image.flip(2)
    
    def batch_augment(self, images):
        return images.flip(3)
    
    def deaugment_boxes(self, boxes):
        boxes[:, [0,2]] = self.image_size - boxes[:, [2,0]]
        return boxes
    
class TTARotate90(BaseWheatTTA):
    """ author: @shonenkov """
    
    def augment(self, image):
        return torch.rot90(image, 1, (1, 2))

    def batch_augment(self, images):
        return torch.rot90(images, 1, (2, 3))
    
    def deaugment_boxes(self, boxes):
        res_boxes = boxes.copy()
        res_boxes[:, [0,2]] = self.image_size - boxes[:, [1,3]]
        res_boxes[:, [1,3]] = boxes[:, [2,0]]
        return res_boxes

class TTACompose(BaseWheatTTA):
    """ author: @shonenkov """
    def __init__(self, transforms):
        self.transforms = transforms
        
    def augment(self, image):
        for transform in self.transforms:
            image = transform.augment(image)
        return image
    
    def batch_augment(self, images):
        for transform in self.transforms:
            images = transform.batch_augment(images)
        return images
    
    def prepare_boxes(self, boxes):
        result_boxes = boxes.copy()
        result_boxes[:,0] = np.min(boxes[:, [0,2]], axis=1)
        result_boxes[:,2] = np.max(boxes[:, [0,2]], axis=1)
        result_boxes[:,1] = np.min(boxes[:, [1,3]], axis=1)
        result_boxes[:,3] = np.max(boxes[:, [1,3]], axis=1)
        return result_boxes
    
    def deaugment_boxes(self, boxes):
        for transform in self.transforms[::-1]:
            boxes = transform.deaugment_boxes(boxes)
        return self.prepare_boxes(boxes)
    

def get_train_transforms():
    return A.Compose(
        [
            A.RandomSizedCrop(min_max_height=(800, 800), height=IMG_SIZE, width=IMG_SIZE, p=0.5),
            A.OneOf([
                A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, 
                                     val_shift_limit=0.2, p=0.9),
                A.RandomBrightnessContrast(brightness_limit=0.2, 
                                           contrast_limit=0.2, p=0.9),
            ],p=0.9),
            A.ToGray(p=0.01),
            A.HorizontalFlip(p=0.5),
            A.VerticalFlip(p=0.5),
            A.Resize(height=RESIZE_IMG_SIZE, width=RESIZE_IMG_SIZE, p=1),
            A.Cutout(num_holes=8, max_h_size=64, max_w_size=64, fill_value=0, p=0.5),
            ToTensorV2(p=1.0),
        ], 
        p=1.0,
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0, 
            min_visibility=0,
            label_fields=['labels']
        )
    )

def get_valid_transforms():
    return A.Compose([
            A.Resize(height=RESIZE_IMG_SIZE, width=RESIZE_IMG_SIZE, p=1.0),
            ToTensorV2(p=1.0)], 
            p=1.0)

def collate_fn(batch):
    return tuple(zip(*batch))

def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], 
                                                             j[1][2], j[1][3]))
    return " ".join(pred_strings)

## Dataset

In [None]:
class DatasetRetriever(Dataset):

    def __init__(self, marking, image_ids, transforms=None, test=False, pseudo_label=False):
        super().__init__()

        self.image_ids = image_ids
        self.marking = marking
        self.transforms = transforms
        self.pseudo_label = pseudo_label
        self.test = test

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        p_ratio = random.random()
        if self.pseudo_label:
            image,boxes = self.load_mixup_image_and_boxes(index)
        else:
            if self.test or p_ratio >0.5 :
                image, boxes = self.load_image_and_boxes(index)
            else:
                image, boxes = self.load_cutmix_image_and_boxes(index)

        # there is only one class
        labels = torch.ones((boxes.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['img_scale'] = torch.tensor([1.])
        target['image_id'] = torch.tensor([index])
        target['img_size'] = torch.tensor([(IMG_SIZE, IMG_SIZE)])

        if self.transforms:
            for i in range(10):
                sample = self.transforms(**{
                    'image': image,
                    'bboxes': target['boxes'],
                    'labels': labels
                })
                sample['bboxes'] = self.boxes_clean(sample['bboxes'])
                if len(sample['bboxes']) > 0:
                    image = sample['image']
                    target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
                    target['boxes'][:,[0,1,2,3]] = target['boxes'][:,[1,0,3,2]]  #yxyx: be warning
                    break

        return image, target, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

    def load_image_and_boxes(self, index):
        image_id = self.image_ids[index]
        if self.pseudo_label:
            image = cv2.imread(f'{TEST_IMAGE_PATH}/{image_id}.jpg', cv2.IMREAD_COLOR)
        else:
            image = cv2.imread(f'{TRAIN_IMAGE_PATH}/{image_id}.jpg', cv2.IMREAD_COLOR)
        if image.shape[0]!=IMG_SIZE or image.shape[1]!=IMG_SIZE:
            image = cv2.resize(image,(IMG_SIZE,IMG_SIZE))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        records = self.marking[self.marking['image_id'] == image_id]
        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        return image, boxes.astype(np.int32)

    def load_cutmix_image_and_boxes(self, index, imsize=IMG_SIZE):
        """ 
        This implementation of cutmix author:  https://www.kaggle.com/nvnnghia 
        Refactoring and adaptation: https://www.kaggle.com/shonenkov
        """
        w, h = imsize, imsize
        s = imsize // 2
    
        xc, yc = [int(random.uniform(imsize * 0.25, imsize * 0.75)) for _ in range(2)]  # center x, y
        indexes = [index] + [random.randint(0, self.image_ids.shape[0] - 1) for _ in range(3)]

        result_image = np.full((imsize, imsize, 3), 1, dtype=np.float32)
        result_boxes = []

        for i, index in enumerate(indexes):
            image, boxes = self.load_image_and_boxes(index)
            if i == 0:
                x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
            elif i == 1:  # top right
                x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
                x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
            elif i == 2:  # bottom left
                x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
            elif i == 3:  # bottom right
                x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
                x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)
            result_image[y1a:y2a, x1a:x2a] = image[y1b:y2b, x1b:x2b]
            padw = x1a - x1b
            padh = y1a - y1b

            boxes[:, 0] += padw
            boxes[:, 1] += padh
            boxes[:, 2] += padw
            boxes[:, 3] += padh

            result_boxes.append(boxes)
        if len(result_boxes):
            result_boxes = np.concatenate(result_boxes, 0)
            np.clip(result_boxes[:, 0:], 0, 2 * s, out=result_boxes[:, 0:])
        result_boxes = result_boxes.astype(np.int32)
        result_boxes = result_boxes[np.where((result_boxes[:,2]-result_boxes[:,0])*(result_boxes[:,3]-result_boxes[:,1]) > 0)]
        return result_image, result_boxes
    
 
    def load_mixup_image_and_boxes(self, index):
        image, boxes = self.load_image_and_boxes(index)
        r_image, r_boxes = self.load_image_and_boxes(random.randint(0, self.image_ids.shape[0] - 1))
        return (image+r_image)/2, np.vstack((boxes, r_boxes)).astype(np.int32)
    
    def boxes_clean(self,list_boxes):
        new_list_boxes = []
        for box in list_boxes:
            box_w = box[2]-box[0]
            box_h = box[3]-box[1]
            box_area = (box[2]-box[0])*(box[3]-box[1])
            box_wh_ratio = (box[2]-box[0])/(box[3]-box[1]+1e-16)
            box_hw_ratio = (box[3]-box[1])/(box[2]-box[0]+1e-16)
            if box_w>10.0 and box_h>10.0 and box_area> 300.0 and box_wh_ratio<10.0 and box_hw_ratio<10.0:
                new_list_boxes.append(box)
        return new_list_boxes

    
class DatasetRetriever_test(Dataset):

    def __init__(self, image_ids, transforms=None):
        super().__init__()
        self.image_ids = image_ids
        self.transforms = transforms

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        image = cv2.imread(f'{TEST_IMAGE_PATH}/{image_id}', cv2.IMREAD_COLOR)
        if image.shape[0]!=IMG_SIZE or image.shape[1]!=IMG_SIZE:
            image = cv2.resize(image,(IMG_SIZE,IMG_SIZE))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        if self.transforms:
            sample = {'image': image}
            sample = self.transforms(**sample)
            image = sample['image']
        
        target = {}
        target['img_scale'] = torch.tensor([1.])
        
        return image, image_id, target

    def __len__(self) -> int:
        return len(self.image_ids)

## Detection Function

In [None]:
def get_net(checkpoint_path, config_name, predict=True):
    config = get_efficientdet_config(config_name)
    net = EfficientDet(config, pretrained_backbone=False)

    config.num_classes = NUM_CLASSES
    config.image_size = RESIZE_IMG_SIZE
    net.class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01))
    if os.path.isfile(checkpoint_path):
        checkpoint = torch.load(checkpoint_path)
        net.load_state_dict(checkpoint['model_state_dict'])
    else:
        checkpoint = torch.load(GlobalConfig.checkpoint_path)
        net.load_state_dict(checkpoint['model_state_dict'])

    del checkpoint
    gc.collect()
    if predict:
        net = DetBenchPredict(net, config)
    else:
        net = DetBenchTrain(net, config)
    return net


def run_wbf(predictions, image_index, image_size, iou_thr, skip_box_thr, weights=None):
    boxes = [(prediction[image_index]['boxes']/(image_size-1)).tolist() for prediction in predictions]
    scores = [prediction[image_index]['scores'].tolist() for prediction in predictions]
    labels = [np.ones(prediction[image_index]['scores'].shape[0]).astype(int).tolist() for prediction in predictions]
    boxes, scores, labels = ensemble_boxes.ensemble_boxes_wbf.weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    boxes = boxes*(image_size-1)
    return boxes, scores, labels

def TTA_Compose():
    tta_transforms = []
    for tta_combination in product([TTAHorizontalFlip(), None], 
                               [TTAVerticalFlip(), None],
                               [TTARotate90(), None]):
        tta_transforms.append(TTACompose([tta_transform for tta_transform in tta_combination if tta_transform]))
    return tta_transforms

def make_tta_predictions(images,targets,net,score_threshold):
    with torch.no_grad():
        images = torch.stack(images).float().cuda()
        img_scale = torch.tensor([target['img_scale'].cuda() for target in targets])
        img_size = torch.tensor([(RESIZE_IMG_SIZE, RESIZE_IMG_SIZE) for target in targets]).cuda()
        tta_transforms = TTA_Compose()
        net = net.eval()
        net = net.cuda()
        predictions = []
        for tta_transform in tta_transforms:
            result = []
            det = net(tta_transform.batch_augment(images.clone()),
                      img_scales = img_scale,
                      img_size = img_size)
            for i in range(images.shape[0]):
                boxes = det[i].detach().cpu().numpy()[:,:4]    
                scores = det[i].detach().cpu().numpy()[:,4]
                indexes = np.where(scores > score_threshold)[0]
                boxes = boxes[indexes]
                boxes[:, 2] = boxes[:, 2] + boxes[:, 0]
                boxes[:, 3] = boxes[:, 3] + boxes[:, 1]
                boxes = tta_transform.deaugment_boxes(boxes.copy())
                result.append({
                    'boxes': boxes,
                    'scores': scores[indexes],
                })
            predictions.append(result)
    return predictions



def detect_images(data_loader, checkpoint_path, config_name, score_threshold, iou_thr, skip_box_thr):
    net = get_net(checkpoint_path, config_name, predict=True)
    results = []
    all_boxes = []
    all_scores = []
    all_image_ids = []
    pseudo_labels = []
    for images, image_ids, targets in data_loader:
        predictions = make_tta_predictions(images,targets,net,score_threshold)
        for i, image in enumerate(images):
            boxes, scores, labels = run_wbf(predictions, i, RESIZE_IMG_SIZE, iou_thr, skip_box_thr)
            boxes = (boxes*float(1024/1024)).round().astype(np.int32).clip(min=0, max=RESIZE_IMG_SIZE-1)
            
            image_id = image_ids[i]
        
            boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
            boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
            
            result = {
                'image_id': image_id[:-4],
                'PredictionString': format_prediction_string(boxes, scores)
            }
            
            for box in boxes:
                pseudo_label = {
                    'image_id': image_id[:-4],
                    'source': 'Libo',
                    'width':RESIZE_IMG_SIZE,
                    'height':RESIZE_IMG_SIZE,
                    'x': box[0],
                    'y': box[1],
                    'w': box[2],
                    'h': box[3]        
                }
                pseudo_labels.append(pseudo_label)
                
            results.append(result)
            all_boxes.append(boxes)
            all_scores.append(scores)
            all_image_ids.append(image_id)
    pseudo_labels_df = pd.DataFrame(pseudo_labels, columns=['image_id', 'width', 'height', 'source', 'x', 'y', 'w', 'h'])
    return all_image_ids, all_boxes, all_scores, results, pseudo_labels_df
   
    

## Visualization Function

In [None]:
def draw_rect_with_score(img, bboxes, scores,color=None):
    img = img.copy()
    bboxes = bboxes[:, :4]
    bboxes = bboxes.reshape(-1, 4)
    for bbox,score in zip(bboxes,scores):
        pt1, pt2 = (bbox[0], bbox[1]), (bbox[2], bbox[3])
        pt1 = int(pt1[0]), int(pt1[1])
        pt2 = int(pt2[0]), int(pt2[1])
        img = cv2.rectangle(img.copy(), pt1, pt2, color, int(max(img.shape[:2]) / 200))
        cv2.putText(img, '%.2f'%(score), pt1, cv2.FONT_HERSHEY_SIMPLEX,1, (255,255,255), 2, cv2.LINE_AA)                 
    return img

def draw_test_image(all_image_ids, all_boxes, all_scores, nums):
    fig, ax = plt.subplots(nums, 1, figsize=(120, 60))
    for i in range(nums):
        im0 = cv2.imread(f'{TEST_IMAGE_PATH}/{all_image_ids[i]}')[:,:,::-1]
        box0 = all_boxes[i]
        box0[:,2] = box0[:,2]+box0[:,0]
        box0[:,3] = box0[:,1]+box0[:,3]
        box0 = box0.clip(min=0,max=RESIZE_IMG_SIZE-1)
        score0 = np.array(all_scores[i])
        img = draw_rect_with_score(im0,np.array(box0),score0, color=(255,0,0))
        ax[i].imshow(img)

## Split Data and Create Dataloader

In [None]:
def format_train_csv(path):
    marking = pd.read_csv(path)
    bboxs = np.stack(marking['bbox'].apply(lambda x: np.fromstring(x[1:-1], sep=',')))
    for i, column in enumerate(['x', 'y', 'w', 'h']):
        marking[column] = bboxs[:,i]
    marking.drop(columns=['bbox'], inplace=True)
    return marking

def df_pre(marking,ispseudo=False):
    marking['w'] = marking['w'].astype(np.float32)
    marking['h'] = marking['h'].astype(np.float32)
    marking['wh_ratio'] = marking['w'] / (marking['h']+1e-16)
    marking['hw_ratio'] = marking['h'] / (marking['w']+1e-16)
    marking['area'] = marking['w'] * marking['h']
    if not ispseudo:
        bad_boxes = [3687,117344,173,113947,52868,2159,2169,121633,121634,147504,118211,52727,147552]
        marking = marking[~marking.index.isin(bad_boxes)]
        error_bbox = [100648.0, 145360.0, 149744.0, 119790.0, 106743.0]
        marking = marking[~marking['area'].isin(error_bbox)]
        marking = marking[marking['area']<154200.0]
    marking = marking[marking['area']>300.0]
    marking = marking[marking['w']>10.0]
    marking = marking[marking['h']>10.0]
    marking = marking[marking['wh_ratio']<10]
    marking = marking[marking['hw_ratio']<10]
    if not ispseudo:
        marking = marking.drop([4775,111973,113007,115239,40664,46030])
    return marking

def dataset_split(train_df,pseudo_df,sk_n,fold_n,pre=True):
    if pre:
        train_df_pre = df_pre(train_df,False)
        #pseudo_df_pre = df_pre(pseudo_df,True)
        pseudo_df_pre = pseudo_df
    else:
        train_df_pre = train_df
        pseudo_df_pre = pseudo_df
        
    skf = StratifiedKFold(n_splits=sk_n, shuffle=True, random_state=42)
    df_folds = train_df_pre[['image_id']].copy()
    df_folds.loc[:, 'bbox_count'] = 1
    df_folds = df_folds.groupby('image_id').count()
    df_folds.loc[:, 'source'] = train_df_pre[['image_id', 'source']].groupby('image_id').min()['source']
    df_folds.loc[:, 'stratify_group'] = np.char.add(
        df_folds['source'].values.astype(str),
        df_folds['bbox_count'].apply(lambda x: f'_{x // 15}').values.astype(str))
    df_folds.loc[:, 'fold'] = 0

    for fold_number, (train_index, val_index) in enumerate(skf.split(X=df_folds.index, y=df_folds['stratify_group'])):
        df_folds.loc[df_folds.iloc[val_index].index, 'fold'] = fold_number
    
    train_dataset = DatasetRetriever(image_ids=df_folds[df_folds['fold'] != fold_n].index.values,
                                     marking=train_df_pre,
                                     transforms=get_train_transforms(),
                                     test=False,
                                     pseudo_label=False)
    
    validation_dataset = DatasetRetriever(image_ids=df_folds[df_folds['fold'] == fold_n].index.values,
                                          marking=train_df_pre,
                                          transforms=get_valid_transforms(),
                                          test=True,
                                          pseudo_label=False)
    
    pseudo_dataset = DatasetRetriever(image_ids=pseudo_df_pre['image_id'].unique(),
                                       marking=pseudo_df_pre,
                                       transforms=get_train_transforms(),
                                       test=False,
                                       pseudo_label=True)
    
    return train_dataset, validation_dataset, pseudo_dataset


# for i in range(10,20):
#     image, target, image_id = train_dataset_all[i]
#     boxes = target['boxes'].cpu().numpy().astype(np.int32)

#     numpy_image = image.permute(1,2,0).cpu().numpy()

#     fig, ax = plt.subplots(1, 1, figsize=(12, 6))

#     for box in boxes:
#         cv2.rectangle(numpy_image, (box[1], box[0]), (box[3],  box[2]), (0, 1, 0), 2)
    
#     ax.set_axis_off()
#     ax.imshow(numpy_image);

## Pseudo Label Training

In [None]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [None]:
class ModelEMA:
    """ Model Exponential Moving Average from https://github.com/rwightman/pytorch-image-models
    Keep a moving average of everything in the model state_dict (parameters and buffers).
    This is intended to allow functionality like
    https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
    A smoothed version of the weights is necessary for some training schemes to perform well.
    E.g. Google's hyper-params for training MNASNet, MobileNet-V3, EfficientNet, etc that use
    RMSprop with a short 2.4-3 epoch decay period and slow LR decay rate of .96-.99 requires EMA
    smoothing of weights to match results. Pay attention to the decay constant you are using
    relative to your update count per epoch.
    To keep EMA from using GPU resources, set device='cpu'. This will save a bit of memory but
    disable validation of the EMA weights. Validation will have to be done manually in a separate
    process, or after the training stops converging.
    This class is sensitive where it is initialized in the sequence of model init,
    GPU assignment and distributed training wrappers.
    I've tested with the sequence in my own train.py for torch.DataParallel, apex.DDP, and single-GPU.
    """

    def __init__(self, model, decay=0.9999, device=''):
        # Create EMA
        self.ema = copy.deepcopy(model)  # FP32 EMA
        self.ema.eval()
        self.updates = 0  # number of EMA updates
        self.decay = lambda x: decay * (1 - math.exp(-x / 2000))  # decay exponential ramp (to help early epochs)
        self.device = device  # perform ema on different device from model if set
        if device:
            self.ema.to(device)
        for p in self.ema.parameters():
            p.requires_grad_(False)

    def update(self, model):
        # Update EMA parameters
        with torch.no_grad():
            self.updates += 1
            d = self.decay(self.updates)

            msd = model.state_dict()  # model state_dict
            for k, v in self.ema.state_dict().items():
                if v.dtype.is_floating_point:
                    v *= d
                    v += (1. - d) * msd[k].detach()

    def update_attr(self, model):
        # Update EMA attributes
        for k, v in model.__dict__.items():
            if not k.startswith('_') and k not in ["process_group", "reducer"]:
                setattr(self.ema, k, v)

In [None]:
import warnings

warnings.filterwarnings("ignore")

class Fitter:
    
    def __init__(self, model, config, train_loader, val_loader):
        self.config = config
        self.epoch = 0

        self.base_dir = f'{config.folder}'
        if not os.path.exists(self.base_dir):
            os.makedirs(self.base_dir)
        
        self.train_loader = train_loader
        self.val_loader = val_loader
        
        self.log_path = f'{self.base_dir}/log.txt'
        self.best_summary_loss = 10**5
        
        self.device = torch.device('cuda')
        self.model = model.to(self.device)
        #self.model = model.cuda()

        self.nbs = self.config.nbs  # nominal batch size
        self.accumulate = max(round(self.nbs / self.config.pseudo_train_batch_size), 1)

        self.nb = len(self.train_loader)
        self.n_burn = max(self.config.warmup_epoch*self.nb, 1e3)

        self.config.weight_decay /= self.config.lr
        parameters = self.add_weight_decay(self.model, self.config.weight_decay)
        self.config.weight_decay = 0.
        self.optimizer = torch.optim.AdamW(parameters, self.config.lr, weight_decay=self.config.weight_decay, eps=self.config.eps)
        self.scheduler = config.SchedulerClass(self.optimizer, T_max=(self.config.n_epochs-self.config.warmup_epoch), eta_min=self.config.lr*1e-3)
              
        if has_apex:
            print('Using apex')
            self.model.model, self.optimizer = amp.initialize(self.model.model, self.optimizer, opt_level="O1", verbosity=0)
            if self.config.distributed:
                print('Using DataParallel ,GPU nums:',torch.cuda.device_count())
                self.model.model = torch.nn.DataParallel(self.model.model)
        
        if self.config.checkpoint:
            self.load(f'{self.config.folder}/last-checkpoint.bin')
        self.ema = ModelEMA(self.model.model)
        self.log(f'Fitter prepared. Device is {self.device}')

        self.do_scheduler = True
    
    def add_weight_decay(self, model, weight_decay=1e-5, skip_list=()):
        decay = []
        no_decay = []
        for name, param in model.named_parameters():
            if not param.requires_grad:
                continue  # frozen weights
            if len(param.shape) == 1 or name.endswith(".bias") or name in skip_list:
                no_decay.append(param)
            else:
                decay.append(param)
        return [
            {'params': no_decay, 'weight_decay': 0.},
            {'params': decay, 'weight_decay': weight_decay}]

    def fit(self):
        for epoch in range(self.epoch, self.config.n_epochs):
            if epoch < self.config.warmup_epoch:
                lr_scale = min(1., float(epoch+1) / float(self.config.warmup_epoch))
                for pg in self.optimizer.param_groups:
                    pg['lr'] = lr_scale * self.config.lr
                self.do_scheduler = False
            else: 
                self.do_scheduler = True

            if self.config.verbose:
                lr = self.optimizer.param_groups[0]['lr']
                timestamp = datetime.utcnow().isoformat()
                self.log(f'\n{timestamp}\nLR: {lr}')

            t = time.time()
            summary_loss = self.train_one_epoch()
            
            if self.config.step_scheduler and self.do_scheduler:
                self.scheduler.step()

            self.log(f'[RESULT]: Train. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}')
            self.save(f'{self.base_dir}/last-checkpoint.bin')

            t = time.time()
            summary_loss = self.validation()

            self.log(f'[RESULT]: Val. Epoch: {self.epoch}, summary_loss: {summary_loss.avg:.5f}, time: {(time.time() - t):.5f}')
            if summary_loss.avg < self.best_summary_loss:
                self.best_summary_loss = summary_loss.avg
                self.model.eval()
                self.save(f'{self.base_dir}/best-checkpoint.bin')

            if self.config.validation_scheduler and self.do_scheduler:
                self.scheduler.step()
            self.ema.update_attr(self.model.model)

            self.epoch += 1



    def validation(self):
        self.model.eval()
        summary_loss = AverageMeter()
        t = time.time()
        val_loader = tqdm(self.val_loader, total=len(self.val_loader), desc="Validating")
        for step, (images, targets, image_ids) in enumerate(val_loader):
            with torch.no_grad():
                pred_res = {}
                images = torch.stack(images)
                batch_size = images.shape[0]
                images = images.to(self.device).float()
                boxes = [target['boxes'].to(self.device).float() for target in targets]
                labels = [target['labels'].to(self.device).float() for target in targets]
                img_scale = torch.tensor([target['img_scale'].to(self.device) for target in targets])
                img_size = torch.tensor([(IMG_SIZE, IMG_SIZE) for target in targets]).to(self.device).float()
                
                pred_res['bbox'] = boxes
                pred_res['cls'] = labels
                pred_res['img_scale'] = img_scale
                pred_res['img_size'] = img_size

                outputs = self.model(images, pred_res)
                loss = outputs['loss']
                
                summary_loss.update(loss.detach().item(), batch_size)
                val_loader.set_description(f'Valid Step {step}/{len(val_loader)}, ' + \
                            f'summary_loss: {summary_loss.avg:.5f}, ' + \
                            f'time: {(time.time() - t):.5f}')
        return summary_loss

    def train_one_epoch(self):
        self.model.train()
        summary_loss = AverageMeter()
        t = time.time()
        train_loader = tqdm(self.train_loader, total=len(self.train_loader), desc="Training")
        for step, (images, targets, image_ids) in enumerate(train_loader):
            target_res = {}
            ni = step + self.nb * self.epoch
            if ni <= self.n_burn:
                xi = [0, self.n_burn]  # x interp
                self.accumulate = max(1, np.interp(ni, xi, [1, self.nbs / self.config.pseudo_train_batch_size]).round())

            images = torch.stack(images)
            images = images.to(self.device).float()
            batch_size = images.shape[0]
            boxes = [target['boxes'].to(self.device).float() for target in targets]
            labels = [target['labels'].to(self.device).float() for target in targets]
            img_scale = torch.tensor([target['img_scale'] for target in targets]).to(self.device).float()
            img_size = torch.tensor([(IMG_SIZE, IMG_SIZE) for target in targets]).to(self.device).float()

            target_res['bbox'] = boxes
            target_res['cls'] = labels
            target_res['img_scale'] = img_scale
            target_res['img_size'] = img_size
            
            outputs = self.model(images, target_res)
            loss = outputs['loss']

            with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                scaled_loss.backward()
            
            summary_loss.update(loss.detach().item(), batch_size)

            # Optimize
            if ni % self.accumulate == 0:
                self.optimizer.step()
                self.optimizer.zero_grad()
                self.ema.update(self.model.model)
            
            train_loader.set_description(f'Train Step {step}/{len(train_loader)}, ' + \
                        f'Learning rate {self.optimizer.param_groups[0]["lr"]}, ' + \
                        f'summary_loss: {summary_loss.avg:.5f}, ' + \
                        f'time: {(time.time() - t):.5f}')

        return summary_loss
    
    def save(self, path):
        self.model.eval()
        if self.config.distributed:
            torch.save({
                'model_state_dict': self.model.model.module.state_dict(),
                'optimizer_state_dict': self.optimizer.state_dict(),
                'scheduler_state_dict': self.scheduler.state_dict(),
                'best_summary_loss': self.best_summary_loss,
                'epoch': self.epoch,
            }, path)
        else:
            torch.save({
                'model_state_dict': self.model.model.state_dict(),
                'optimizer_state_dict': self.optimizer.state_dict(),
                'scheduler_state_dict': self.scheduler.state_dict(),
                'best_summary_loss': self.best_summary_loss,
                'epoch': self.epoch,
            }, path)
        print('Saved epoch',self.epoch)

    def load(self, path):
        checkpoint = torch.load(path)
        if self.config.distributed:
            self.model.model.module.load_state_dict(checkpoint['model_state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
            self.best_summary_loss = checkpoint['best_summary_loss']
            self.epoch = checkpoint['epoch'] + 1
        else:
            self.model.model.load_state_dict(checkpoint['model_state_dict'])
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            self.scheduler.load_state_dict(checkpoint['scheduler_state_dict'])
            self.best_summary_loss = checkpoint['best_summary_loss']
            self.epoch = checkpoint['epoch'] + 1
        
    def log(self, message):
        if self.config.verbose:
            print(message)
        with open(self.log_path, 'a+') as logger:
            logger.write(f'{message}\n')

In [None]:
def run_training(train_loader,val_loader,checkpoint_path):
    # create net
    net = get_net(checkpoint_path, GlobalConfig.config_name, predict=False)
    # pseudo label training
    fitter = Fitter(model=net, config=GlobalConfig, train_loader=train_loader, val_loader=val_loader)
    fitter.fit()

In [None]:
# print(os.listdir(TEST_IMAGE_PATH))
test_dataset = DatasetRetriever_test(
    image_ids=os.listdir(TEST_IMAGE_PATH),
    transforms=get_valid_transforms()
)
test_dataloader = DataLoader(
    test_dataset,
    batch_size=GlobalConfig.test_batch_size,
    shuffle=True,
    num_workers=GlobalConfig.num_workers,
    pin_memory=False,
    drop_last=False,
    collate_fn=collate_fn
)
all_image_ids,all_boxes,all_scores,results, pseudo_df = detect_images(test_dataloader,GlobalConfig.checkpoint_path, 
                                                                             GlobalConfig.config_name, 
                                                                             GlobalConfig.tta_score_thr, GlobalConfig.tta_wbf_iou_thr, 
                                                                             GlobalConfig.tta_wbf_skip_box_thr)

draw_test_image(all_image_ids, all_boxes, all_scores, 10)
torch.cuda.empty_cache()

In [None]:
pseudo_df.head()

In [None]:
train_df = format_train_csv(TRAIN_CSV_PATH)
# train_df = train_df[:500]
train_dataset, validation_dataset,pseudo_dataset = dataset_split(train_df,pseudo_df,5,0,True)
#create dataloader
train_dataset_all = train_dataset + pseudo_dataset
# validation_dataset_all = validation_dataset+pseudo_dataset
train_loader = torch.utils.data.DataLoader(
               train_dataset_all,
               batch_size=GlobalConfig.pseudo_train_batch_size,
               sampler=RandomSampler(train_dataset_all),
               pin_memory=False,
               drop_last=False,
               num_workers=GlobalConfig.num_workers,
               collate_fn=collate_fn)

val_loader = torch.utils.data.DataLoader(
             validation_dataset, 
             batch_size=GlobalConfig.pseudo_train_batch_size,
             num_workers=GlobalConfig.num_workers,
             shuffle=False,
             sampler = SequentialSampler(validation_dataset),
             pin_memory=False,
             collate_fn=collate_fn)
# pseudo label training
if len(os.listdir(TEST_IMAGE_PATH)) > 10:
    run_training(train_loader,val_loader,GlobalConfig.checkpoint_path)
torch.cuda.empty_cache()

## Final Prediction

In [None]:
final_image_ids,final_boxes,final_scores,final_results, final_pseudo_df = detect_images(test_dataloader,GlobalConfig.final_checkpoint_path, 
                                                                                        GlobalConfig.config_name, 
                                                                                        GlobalConfig.final_tta_score_thr, 
                                                                                        GlobalConfig.final_tta_wbf_iou_thr, 
                                                                                        GlobalConfig.final_tta_wbf_skip_box_thr)

draw_test_image(final_image_ids,final_boxes,final_scores, 10)

In [None]:
# train_df = format_train_csv(TRAIN_CSV_PATH)
# # train_df = train_df[:500]
# train_dataset, validation_dataset,pseudo_dataset = dataset_split(train_df,final_pseudo_df,5,0,True)
# #create dataloader
# train_dataset_all = train_dataset + pseudo_dataset
# # validation_dataset_all = validation_dataset+pseudo_dataset
# train_loader = torch.utils.data.DataLoader(
#                train_dataset_all,
#                batch_size=GlobalConfig.pseudo_train_batch_size,
#                sampler=RandomSampler(train_dataset_all),
#                pin_memory=False,
#                drop_last=False,
#                num_workers=GlobalConfig.num_workers,
#                collate_fn=collate_fn)

# val_loader = torch.utils.data.DataLoader(
#              validation_dataset, 
#              batch_size=GlobalConfig.pseudo_train_batch_size,
#              num_workers=GlobalConfig.num_workers,
#              shuffle=False,
#              sampler = SequentialSampler(validation_dataset),
#              pin_memory=False,
#              collate_fn=collate_fn)
# # pseudo label training
# if len(os.listdir(TEST_IMAGE_PATH)) > 10:
#     run_training(train_loader,val_loader,GlobalConfig.final_checkpoint_path)

In [None]:
# final_image_ids,final_boxes,final_scores,final_results, final_pseudo_df = detect_images(test_dataloader,GlobalConfig.final_checkpoint_path, 
#                                                                                         GlobalConfig.config_name, 
#                                                                                         GlobalConfig.final_tta_score_thr, 
#                                                                                         GlobalConfig.final_tta_wbf_iou_thr, 
#                                                                                         GlobalConfig.final_tta_wbf_skip_box_thr)

# draw_test_image(final_image_ids,final_boxes,final_scores, 10)

In [None]:
final_results = pd.DataFrame(final_results, columns=['image_id', 'PredictionString'])
final_results.to_csv(GlobalConfig.submission_path, index=False)
final_results.head()

## OOF