In [9]:
!pip install timm
!pip install omegaconf
!pip install pycocotools
!pip install effdet

Collecting effdet
  Downloading effdet-0.2.4-py3-none-any.whl (111 kB)
[K     |████████████████████████████████| 111 kB 1.2 MB/s eta 0:00:01
Installing collected packages: effdet
Successfully installed effdet-0.2.4


In [4]:
import sys
import torch
import os
from datetime import datetime
import time
import random
import cv2
import pandas as pd
import numpy as np
import albumentations as A
import matplotlib.pyplot as plt
from albumentations.pytorch.transforms import ToTensorV2
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from glob import glob

SEED = 42

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
seed_everything(SEED)

In [8]:
marking = pd.read_csv('../input/label-train-df/train_df.csv')
for i, column in enumerate(['x_min', 'y_min', 'w', 'h']):
    marking[column] = marking[column]*512
marking.head()

Unnamed: 0.1,Unnamed: 0,image_id,class_id,x_min,y_min,x_max,y_max,x_mid,y_mid,w,h,area
0,0,9a5094b2563a1ef3ff50dc5c7ff71345,0,258.953846,156.712329,0.624519,0.413527,0.565144,0.359803,60.8,55.013699,0.01276
1,1,9a5094b2563a1ef3ff50dc5c7ff71345,11,440.369231,378.958904,0.901442,0.85274,0.880769,0.796447,21.169231,57.643836,0.004655
2,2,9a5094b2563a1ef3ff50dc5c7ff71345,10,440.369231,378.958904,0.901442,0.85274,0.880769,0.796447,21.169231,57.643836,0.004655
3,3,9a5094b2563a1ef3ff50dc5c7ff71345,3,170.010269,296.840179,0.797436,0.769549,0.564744,0.674658,238.276932,97.168976,0.088322
4,4,051132a778e61a86eb147c7c6f564dfe,11,353.777778,80.355556,0.782986,0.209722,0.736979,0.183333,47.111111,27.022222,0.004856


In [13]:
from sklearn.model_selection import StratifiedKFold
skf  =  StratifiedKFold(n_splits = 5, random_state = 42,shuffle = True)
folds = marking.copy()
for f,(tr_idx,val_idx) in enumerate(skf.split(folds,folds.class_id)):
    folds.loc[val_idx,'fold'] = int(f)
folds['fold'] = folds['fold'].astype(int)

folds.head()

Unnamed: 0.1,Unnamed: 0,image_id,class_id,x_min,y_min,x_max,y_max,x_mid,y_mid,w,h,area,fold
0,0,9a5094b2563a1ef3ff50dc5c7ff71345,0,258.953846,156.712329,0.624519,0.413527,0.565144,0.359803,60.8,55.013699,0.01276,0
1,1,9a5094b2563a1ef3ff50dc5c7ff71345,11,440.369231,378.958904,0.901442,0.85274,0.880769,0.796447,21.169231,57.643836,0.004655,1
2,2,9a5094b2563a1ef3ff50dc5c7ff71345,10,440.369231,378.958904,0.901442,0.85274,0.880769,0.796447,21.169231,57.643836,0.004655,0
3,3,9a5094b2563a1ef3ff50dc5c7ff71345,3,170.010269,296.840179,0.797436,0.769549,0.564744,0.674658,238.276932,97.168976,0.088322,4
4,4,051132a778e61a86eb147c7c6f564dfe,11,353.777778,80.355556,0.782986,0.209722,0.736979,0.183333,47.111111,27.022222,0.004856,0


In [14]:
def get_train_transforms():
    return A.Compose(
        [
            A.OneOf([
                A.HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit= 0.2, 
                                     val_shift_limit=0.2, p=0.9),
                A.RandomBrightnessContrast(brightness_limit=0.2, 
                                           contrast_limit=0.2, p=0.9),
            ],p=0.9),
            A.HorizontalFlip(p=0.5),
            A.Resize(height=512, width=512, p=1),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0, 
            min_visibility=0,
            label_fields=['labels']
        )
    )

def get_valid_transforms():
    return A.Compose(
        [
            A.Resize(height=512, width=512, p=1.0),
            ToTensorV2(p=1.0),
        ], 
        p=1.0, 
        bbox_params=A.BboxParams(
            format='pascal_voc',
            min_area=0, 
            min_visibility=0,
            label_fields=['labels']
        )
    )

In [None]:
TRAIN_ROOT_PATH = '../input/vinbigdata-512-image-dataset/vinbigdata/train'

class DatasetRetriever(Dataset):

    def __init__(self, marking, image_ids, transforms=None, test=False):
        super().__init__()

        self.image_ids = image_ids
        self.marking = marking
        self.transforms = transforms
        self.test = test

    def __getitem__(self, index: int):
        image_id = self.image_ids[index]
        
        if self.test or random.random() > 0.5:
            image, boxes, labels = self.load_image_and_boxes(index)
        else:
            image, boxes, labels = self.load_mosaic(index)

        target = {}
        target['boxes'] = boxes  # format x_min, y_min, x_max, y_max
        target['labels'] = labels
        target['image_id'] = torch.tensor([index])

        if self.transforms:
            for i in range(10):
                sample = self.transforms(**{
                    'image': image,
                    'bboxes': target['boxes'],
                    'labels': labels
                })
                if len(sample['bboxes']) > 0:
                    image = sample['image']
                    target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
                    target['boxes'][:,[0,1,2,3]] = target['boxes'][:,[1,0,3,2]]  #yxyx: be warning
                    break

        return image, target, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

    def load_image_and_boxes(self, index):
        
        image_id = self.image_ids[index]
        image = cv2.imread(f'{TRAIN_ROOT_PATH}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        records = self.marking[self.marking['image_id'] == image_id]
        labels = records['source'].values
        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
        return image, boxes , labels  # format x_min, y_min, x_max, y_max
    
    
    def load_mosaic(self, index):
    # loads images in a mosaic

    labels4 = []
    s = 512
    xc, yc = [int(random.uniform(s * 0.5, s * 1.5)) for _ in range(2)]  # mosaic center x, y
    indices = [index] + [random.randint(0, len(self.labels) - 1) for _ in range(3)]  # 3 additional image indices
    for i, index in enumerate(indices):
        # Load image
        img,  boxes , labels  = self.load_image_and_boxes(index)
        w, h = 512, 512
        # place img in img4
        if i == 0:  # top left
            img4 = np.full((s * 2, s * 2, img.shape[2]), 114, dtype=np.uint8)  # base image with 4 tiles
            x1a, y1a, x2a, y2a = max(xc - w, 0), max(yc - h, 0), xc, yc  # xmin, ymin, xmax, ymax (large image)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), h - (y2a - y1a), w, h  # xmin, ymin, xmax, ymax (small image)
        elif i == 1:  # top right
            x1a, y1a, x2a, y2a = xc, max(yc - h, 0), min(xc + w, s * 2), yc
            x1b, y1b, x2b, y2b = 0, h - (y2a - y1a), min(w, x2a - x1a), h
        elif i == 2:  # bottom left
            x1a, y1a, x2a, y2a = max(xc - w, 0), yc, xc, min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = w - (x2a - x1a), 0, max(xc, w), min(y2a - y1a, h)
        elif i == 3:  # bottom right
            x1a, y1a, x2a, y2a = xc, yc, min(xc + w, s * 2), min(s * 2, yc + h)
            x1b, y1b, x2b, y2b = 0, 0, min(w, x2a - x1a), min(y2a - y1a, h)

        img4[y1a:y2a, x1a:x2a] = img[y1b:y2b, x1b:x2b]  # img4[ymin:ymax, xmin:xmax]
        padw = x1a - x1b
        padh = y1a - y1b

        # Labels
        x = self.labels[index]
        labels = x.copy()
        if x.size > 0:  # Normalized xywh to pixel xyxy format
            labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
            labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
            labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
            labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
        labels4.append(labels)

    # Concat/clip labels
    if len(labels4):
        labels4 = np.concatenate(labels4, 0)
        # np.clip(labels4[:, 1:] - s / 2, 0, s, out=labels4[:, 1:])  # use with center crop
        np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:])  # use with random_affine

    # Augment
    # img4 = img4[s // 2: int(s * 1.5), s // 2:int(s * 1.5)]  # center crop (WARNING, requires box pruning)
    img4, labels4 = random_affine(img4, labels4,
                                  degrees=1.98 * 2,
                                  translate=0.05 * 2,
                                  scale=0.05 * 2,
                                  shear=0.641 * 2,
                                  border=-s // 2)  # border to remove

    return img4, labels4

[]
