In [None]:
from shutil import copyfile
copyfile(src="../input/augmented-rsdd/engine.py", dst="../working/engine.py")
copyfile(src="../input/augmented-rsdd/coco_eval.py", dst="../working/coco_eval.py")
copyfile(src="../input/augmented-rsdd/coco_utils.py", dst="../working/coco_utils.py")
copyfile(src="../input/augmented-rsdd/group_by_aspect_ratio.py", dst="../working/group_by_aspect_ratio.py")
copyfile(src="../input/augmented-rsdd/presets.py", dst="../working/presets.py")
copyfile(src="../input/augmented-rsdd/train.py", dst="../working/train.py")
copyfile(src="../input/augmented-rsdd/transforms.py", dst="../working/transforms.py")
copyfile(src="../input/augmented-rsdd/utils.py", dst="../working/utils.py")

# reinstall to fix "module 'torch.optim.lr_scheduler'
#  has no attribute 'LinearLR'"
!pip uninstall -y torch torchvision torchaudio torchtext
!pip install torch torchvision pycocotools 

import json
import os
from pathlib import Path
import glob
from itertools import product
import numpy as np
import torch
from torch import nn
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader
from torch.nn import functional
from PIL import Image, ImageEnhance
import pandas as pd
import torchvision
import torchvision.transforms as T
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from engine import train_one_epoch, evaluate
import utils
from matplotlib import pyplot as plt
from tqdm import tqdm
import shutil
from skimage.util import random_noise
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score

In [None]:
class RSDDataset(torch.utils.data.Dataset):
    def __init__(self, dataset_dir, sec_dir, imgs_info, dataset_type):
        self.dataset_dir = dataset_dir.joinpath('rtsd-frames')
        self.imgs = imgs_info['file_name'].apply(
            lambda x: (
                str(self.dataset_dir.joinpath(x)) if
                x not in ('2.3.5.jpg', '3_33.jpg') else
                str(sec_dir.joinpath(x))
            )
        ).tolist()
        self.bboxes = imgs_info['bbox'].tolist()
        self.images_w = imgs_info['width'].tolist()
        self.images_h = imgs_info['height'].tolist()
        self.y = imgs_info['category_id'].tolist()
        self.y2 = imgs_info['is_sign'].tolist()
        self.area = imgs_info['area'].tolist()
        self.iscrowd = imgs_info['iscrowd'].tolist()

    def __getitem__(self, idx):
        img_path = self.imgs[idx]
        img = Image.open(img_path).convert('RGB')
        
        w = torch.as_tensor(self.images_w[idx], dtype=torch.int16)
        h = torch.as_tensor(self.images_h[idx], dtype=torch.int16)
        boxes = torch.as_tensor(self.bboxes[idx], dtype=torch.float32)
        area = torch.as_tensor(self.area[idx], dtype=torch.int64)
        labels = torch.as_tensor(self.y2[idx], dtype=torch.int64)
        image_id = torch.tensor([idx])
        iscrowd = torch.as_tensor(self.iscrowd[idx], dtype=torch.int64)

        target = {}
        target['image_id'] = image_id
        target['boxes'] = boxes
        target['labels'] = labels
        target['iscrowd'] = iscrowd
        target['area'] = area
        target['y'] = torch.tensor(self.y[idx])
        
        
        return T.ToTensor()(img), target

    def __len__(self):
        return len(self.imgs)
    

def get_df_from_json_path(json_path: Path) -> pd.DataFrame:
    with open(str(json_path)) as json_file:
        jsn = json.load(json_file)
    df_imgs = pd.DataFrame(jsn['images'])
    df_anns = pd.DataFrame(jsn['annotations'])
    df_cats = pd.DataFrame(jsn['categories'])
    df = df_anns.merge(
        df_cats, left_on='category_id', 
        right_on='id', how='left'
    )
    df = df_imgs.merge(df, left_on='id', right_on='image_id', how='left')
    correct_columns = (
        list(df.columns[1:4].values) + 
        list(df.columns[6:10].values) +
        list([df.columns[11]])
    )
    df = (
        df[correct_columns]
        .groupby(['width', 'height', 'file_name'])
        .agg(list).reset_index()
    )
    df['bbox'] = df['bbox'].apply(
        lambda x: list(map(lambda y: [
            y[0], y[1], y[0]+y[2], y[1]+y[3]
        ], x))
    )
    return df

In [None]:
DATASET_DIR = (Path.cwd()
               .parents[0]
               .joinpath('input')
               .joinpath('rtsd-dataset'))

SECOND_DATASET_DIR = (
    Path.cwd().parents[0]
    .joinpath('input').joinpath('augmented-rsdd')
)

train_df = get_df_from_json_path(DATASET_DIR.joinpath('train_anno.json'))
test_df = get_df_from_json_path(DATASET_DIR.joinpath('val_anno.json'))
df = pd.concat([train_df, test_df])

# add samples to classes with only instance
row = {
    'width': 800, 'height': 400, 
    'file_name': '2.3.5.jpg', 'category_id': [141], 
    'area':[(745-675)*(210-135)], 'bbox': [[675, 135, 745, 210]],
    'iscrowd': [0], 'name': ['2_3_5']
      }
row2 = {
    'width': 2974, 'height': 1576, 
    'file_name': '3_33.jpg', 'category_id': [76],
    'area':[(2595-1795)*(905-95)], 'bbox': [[1795, 95, 2595, 905]],
    'iscrowd': [0], 'name': ['3_33']
       }
df = df.append(row, ignore_index = True)
df = df.append(row2, ignore_index = True)

# add target for detector
df['is_sign'] = df['category_id'].apply(lambda x: [1]*len(x))

train_df, test_df = train_test_split(df, test_size=0.1)
    
train_dataset = RSDDataset(DATASET_DIR, SECOND_DATASET_DIR, train_df, 'train')
test_dataset = RSDDataset(DATASET_DIR, SECOND_DATASET_DIR, test_df, 'test')

**DETECTOR**

In [None]:
# detector train
model = torchvision.models.detection.fasterrcnn_mobilenet_v3_large_fpn(
    pretrained=True
)
num_classes = 2  # 1 class (sign) + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
for p in model.roi_heads.box_predictor.parameters():
    p.requires_grad = False

device = (
    torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
)

data_loader = torch.utils.data.DataLoader(
    train_dataset, batch_size=10, shuffle=True, num_workers=2,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    test_dataset, batch_size=4, shuffle=True, num_workers=2,
    collate_fn=utils.collate_fn)

model.to(device)

params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(params, lr=0.00015, weight_decay=0)
lr_scheduler = torch.optim.lr_scheduler.StepLR(
    optimizer, step_size=3, gamma=0.1
)

num_epochs = 5
for epoch in range(num_epochs):
    metric_logger = train_one_epoch(
        model, optimizer, data_loader, device, epoch, print_freq=1000
    )
    evaluate(model, data_loader_test, device=device)
    lr_scheduler.step()

torch.save(model.state_dict(), 'detector_weights.pth')

In [None]:
def get_iou(bb1: list, bb2: list) -> float:
    assert bb1[0] < bb1[2]
    assert bb1[1] < bb1[3]
    assert bb2[0] < bb2[2]
    assert bb2[1] < bb2[3]
    
    x_left = max(bb1[0], bb2[0])
    y_top = max(bb1[1], bb2[1])
    x_right = min(bb1[2], bb2[2])
    y_bottom = min(bb1[3], bb2[3])

    if x_right < x_left or y_bottom < y_top:
        return 0.0
    
    intersection_area = (x_right - x_left) * (y_bottom - y_top)
    
    bb1_area = (bb1[2] - bb1[0]) * (bb1[3] - bb1[1])
    bb2_area = (bb2[2] - bb2[0]) * (bb2[3] - bb2[1])
    
    iou = intersection_area / float(bb1_area + bb2_area - intersection_area)
    assert iou >= 0.0
    assert iou <= 1.0
    return iou


def calculate_iou_and_get_boxes(gt_boxes: list, pred_boxes: list) -> tuple:
    # find intersected gt-boxes and pred-boxes
    is_intersect = list(map(lambda x: list([
        not (
            gt_boxes[x][2] < pred_boxes[y][0] or
            gt_boxes[x][0] > pred_boxes[y][2] or
            gt_boxes[x][1] > pred_boxes[y][3] or
            gt_boxes[x][3] < pred_boxes[y][1]
        ) for y in range(len(pred_boxes))
    ]), range(len(gt_boxes))))
    
    # find zero class boxes (without any part of signs)
    #  to generate 156 class (no sign) in classifier
    zero_class_boxes = np.array(is_intersect, dtype='int').sum(axis=0)
    zero_class_boxes = np.where(zero_class_boxes == 0)[0]
    zero_class_boxes = list(map(lambda x: pred_boxes[x], zero_class_boxes))
    
    # calculate iou for intersected boxes
    for i in range(len(gt_boxes)):
        for j in range(len(pred_boxes)):
            is_intersect[i][j] = (
                (get_iou(gt_boxes[i], pred_boxes[j]), j) if
                is_intersect[i][j] else (0, j)
            )
        is_intersect[i] = sorted(
            is_intersect[i], key=lambda x: x[0], reverse=True
        )
    
    # find by iou best pred boxes for each gt box
    used_pred_bboxes = []
    ious = []
    for row in is_intersect:
        iou = 0
        for row_idx in range(len(row)):
            iou, pred_box_index = row[row_idx]
            if iou == 0:
                break
            if pred_box_index not in used_pred_bboxes:
                used_pred_bboxes.append(row_idx)
                break
        ious.append(iou)
    return ious, zero_class_boxes


# get: 
# 1. detector iou;
# 2. zero class crops from test dataset for classifier learning
!mkdir foldder
common_iou = 0
to_pil = T.ToPILImage()
model.eval()
for i in tqdm(range(len(test_dataset))):
    x, gt_boxes = test_dataset[i]
    x = torch.unsqueeze(x, 0).to(device)
    pred = model(x)
    pred_boxes = pred[0]['boxes'].tolist()
    gt_boxes = gt_boxes['boxes'].tolist()
    boxes_iou, zero_boxes = calculate_iou_and_get_boxes(gt_boxes, pred_boxes)
    if zero_boxes:
        zero_boxes = list(map(lambda x: list(map(int, x)), zero_boxes))
        for j, box in enumerate(zero_boxes):
            # cut crop
            crop = (
                x[:, :, box[0]:box[2], box[1]:box[3]] if
                box[2] <= x.shape[2] else
                x[:, :, box[1]:box[3], box[0]:box[2]]
            )
            crop = torch.squeeze(crop, 0)
            filename = (
                './foldder/156_' +str(i).rjust(5, '0') + f'_{j}' + '.jpg'
            )
            to_pil(crop).save(
                filename, "JPEG", quality=100, optimize=True, progressive=True
            )
    common_iou = (
        (common_iou + (sum(boxes_iou) / len(boxes_iou))) / 2 if
        common_iou != 0 else 
        (sum(boxes_iou) / len(boxes_iou)) / 2
    )

# make archive for easy download
shutil.make_archive('zipped', 'zip', './foldder')
    
print(common_iou)

In [None]:
# get gt crops with labels from train dataset for classifier learning
!mkdir foldder
for i in tqdm(range(len(train_dataset))):
    x, gt_info = train_dataset[i]
    labels = gt_info['y']
    gt_boxes = gt_info['boxes'].tolist()
    for j, box in enumerate(gt_boxes):
        box = list(map(lambda x: int(x), box))
        crop = (
            x[:, box[0]:box[2], box[1]:box[3]] if
            box[2] <= x.shape[1] else
            x[:, box[1]:box[3], box[0]:box[2]]
        )
        file_name = (
            f'./foldder/{labels[j]}_' +
            str(i).rjust(5, '0') + f'_{j}' + '.jpg'
        )
        to_pil(crop).save(
            file_name, "JPEG", quality=100,
            optimize=True, progressive=True
        )
        
shutil.make_archive('zipped', 'zip', './foldder')

**CLASSIFIER**

In [None]:
zero_class_dataset_dir = SECOND_DATASET_DIR.joinpath('zipped-2')
zero_files = glob.glob(str(zero_class_dataset_dir)+'/*')
zero_files = list(map(lambda x: (x, 0), zero_files))

class_dataset_dir = SECOND_DATASET_DIR.joinpath('zipped-3')
files = glob.glob(str(class_dataset_dir)+'/*')
files = list(map(lambda x: (x, int(x.split('/')[-1].split('_')[0])), files))
files.extend(zero_files)
df = pd.DataFrame(files, columns=['image', 'target'])

# stratified train test split
train_df = None
test_df = None
used_indices = []
for t in df['target'].unique():
    indices = list(df[df['target'] == t].index)
    indices = list(filter(lambda x: x not in used_indices, indices))
    l = len(indices)
    l1 = int(l*0.9)
    if train_df is None:
        used_indices = indices
        train_df = df.loc[indices[:l1]]
        test_df = df.loc[indices[l1:]]
    else:
        used_indices.extend(indices)
        train_df = pd.concat([train_df, df.loc[indices[:l1]]])
        test_df = pd.concat([test_df, df.loc[indices[l1:]]])

In [None]:
def assign_augs(df: pd.DataFrame, suffs: list) -> pd.DataFrame:
    target_counts = df.groupby('target').count()
    to_aug_indices = target_counts[target_counts['image'] < 500].index.values
    to_aug_samples = df[df['target'].isin(to_aug_indices)].copy()
    
    no_aug_indices = target_counts[~(target_counts['image'] < 500)].index.values
    no_aug_samples = df[df['target'].isin(no_aug_indices)].copy()
    
    to_aug_samples['image'] = to_aug_samples['image'].apply(
        lambda x: [x.replace('.jpg', s) for s in suffs]
    )
    augmented = to_aug_samples.explode('image').reset_index(drop=True)
    df = pd.concat([augmented, no_aug_samples]).reset_index(drop=True)
    return df
    
indices = list(map(str, list(range(7))))
suffixes = list(product(indices, indices, indices))
suffixes = list(map(lambda x: f'___{x[0]}___{x[1]}___{x[2]}.jpg', suffixes))

# assign augs to classes with number of instances less than 500
train_df = assign_augs(train_df, suffixes)
test_df = assign_augs(test_df, suffixes)

In [None]:
def augs(filename: str) -> torch.Tensor:
    levels_of_bright = [1, 0.3, 0.5, 0.7, 1.3, 1.5, 1.7]
    levels_of_noise = [0, 0.001, 0.005, 0.01, 0.015, 0.02, 0.03]
    levels_of_rotation = [0, 10, 6, 3, -3, -6, -10]
    to_tensor = T.ToTensor()
    if '___' in filename:
        orig_img_path, b, n, r = filename.split('___')
        b, n, r = int(b), int(n), int(r[:-4])
        img = Image.open(orig_img_path + '.jpg')
        img = (
            ImageEnhance.Brightness(img).enhance(levels_of_bright[b])
        )
        img = to_tensor(img)
        img = torch.tensor(random_noise(
            img, mode='gaussian', mean=0,
            var=levels_of_noise[n], clip=True
        ))
        img = T.functional.rotate(img, levels_of_rotation[r])
    else:
        img = Image.open(filename)
        img = to_tensor(img)
    return img

def balance_classes(df: pd.DataFrame) -> pd.DataFrame:
    to_drop = []
    list(map(
        lambda t: (
            to_drop.extend(
                df[df['target'] == t]
                .sample(len(df[df['target'] == t]) - 300)
                .index.tolist()
            )
        ),
        df['target'].unique()
    ))
    return df.drop(to_drop)

# remain 300 instances for each class
train_df = balance_classes(train_df)
test_df = balance_classes(test_df)

# apply assigned augs
train_df['image'] = train_df['image'].apply(augs)
test_df['image'] = test_df['image'].apply(augs)

In [None]:
class ClassifierDataset(torch.utils.data.Dataset):
    def __init__(self, imgs_info):
        self.imgs = imgs_info['image'].tolist()
        self.y = imgs_info['target'].tolist()

    def __getitem__(self, idx):
        to_pil = T.ToPILImage()
        to_tensor = T.ToTensor()
    
        img = to_pil(self.imgs[idx]).resize((299, 299))
        img = to_tensor(img).float()
        
        target = torch.tensor(self.y[idx])
        return img, target

    def __len__(self):
        return len(self.imgs)


def train(
    n_epochs, train_data, val_data, model,
    loss_func, optimizer, dvc, bs, num_classes, scheduler
):
    for epoch in range(n_epochs):
        train_loader = DataLoader(
            dataset=train_data,
            batch_size=bs,
            shuffle=True,
            drop_last=True
        )
        val_loader = DataLoader(
            dataset=val_data,
            batch_size=bs,
            shuffle=True,
            drop_last=True
        )

        loss_accum = 0
        train_f1_accum = 0
        i_step = 0
        for i_step, batch in tqdm(enumerate(train_loader)):
            model.train()
            data = batch[0].to(dvc)
            trg = batch[1].to(dvc)
            pred = model(data).logits
            loss = loss_func(pred.view(-1, num_classes), trg.view(-1))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            loss_accum += loss
            train_f1_accum += f1_score(
                trg.view(-1).cpu(), 
                torch.max(pred, -1)[1].view(-1).cpu(),
                average='macro'
            )

        ave_loss = loss_accum / (i_step+1)
        train_f1 = train_f1_accum / (i_step+1)
        val_f1 = compute_f1(model, val_loader, dvc)
        scheduler.step(val_f1)

        print(f'Ave loss: {ave_loss}, Train f1: {train_f1}, Val f1: {val_f1}')

        
def compute_f1(model, loader, dvc):
    model.eval()

    f1_accum = 0
    i_step = 0
    for i_step, batch in tqdm(enumerate(loader)):
        data = batch[0].to(dvc)
        ground_truth = batch[1].to(dvc)
        pred = model(data)
        f1_accum += f1_score(
            ground_truth.view(-1).cpu(),
            torch.max(pred, -1)[1].view(-1).cpu(),
            average='micro'
        )
    return f1_accum / (i_step+1)

In [None]:
classifier = torchvision.models.inception_v3(pretrained=True)
for param in classifier.parameters():
    param.requires_grad = False
classifier.fc = nn.Linear(2048, 156)
for param in classifier.fc.parameters():
    param.requires_grad = True

num_epochs = 10
train_set = ClassifierDataset(train_df)
val_set = ClassifierDataset(test_df)
criterion = nn.CrossEntropyLoss()
optim = torch.optim.Adam(classifier.parameters(), 0.003)
device = (
    torch.device('cuda') if
    torch.cuda.is_available() else
    torch.device('cpu')
)
classifier.to(device)
batch_size = 128
n_classes = 156  # 155 sign + no sign
lr_scheduler = ReduceLROnPlateau(
    optim, patience=0, mode='max', factor=0.5,
    verbose=True, threshold=0.01
)
train(num_epochs, train_set, val_set, classifier,
      criterion, optim, device, batch_size, n_classes, lr_scheduler)

torch.save(classifier.state_dict(), 'classifier_weights.pth')