# Change Log
**Version 1:** added EDA, augmentations, Faster-RCNN, icevision training mode + pytorch lightning, only positive examples<br>
**Version 3:** added folds training, fixed class map object

=================================================================

This kernel works in GPU mode

In [None]:
!pip install icevision[all]
!pip install matplotlib==3.1.3
!pip install tqdm==4.45.0

In [None]:
import os
import random
import cv2
import warnings
import torch
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import pytorch_lightning as pl
from pytorch_lightning.loggers import CSVLogger

from sklearn.model_selection import GroupKFold
from icevision.all import *

warnings.filterwarnings('ignore')

In [None]:
SEED = 2021
DEBUG = False
IMG_DIM = 512
RESIZE_DIM = 384
PRESIZE = 512
BATCH_SIZE = 48
NUM_WORKERS = 4
N_FOLDS = 5
FOLDS_IDS = [0]
LR = 1e-5
WDECAY = 1e-4
NUM_EPOCHS = 60

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

# Brief EDA

In [None]:
source = Path("../input/vinbigdata-512-png")

train_df = pd.read_csv(source / 'train.csv')
test_df = pd.read_csv(source / 'test.csv')

train_df.head()

In [None]:
test_df.head()

In [None]:
print(f"Number of samples in train df: {train_df.shape[0]}")
print(f"Number of samples in test df: {test_df.shape[0]}")

In [None]:
train_df = train_df.fillna(0)
train_df = train_df.astype({'x_min': 'int32', 'x_max': 'int32', 'y_min': 'int32', 'y_max': 'int32'})

In [None]:
ncount = len(train_df)

plt.figure(figsize=(12,8))
ax = sns.countplot(x="class_name", data=train_df, order = train_df["class_name"].value_counts().index)
plt.title('Distribution of class names')

ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')

ax2=ax.twinx()

ax2.yaxis.tick_left()
ax.yaxis.tick_right()

ax.yaxis.set_label_position('right')
ax2.yaxis.set_label_position('left')

ax2.set_ylabel('Frequency [%]')

for p in ax.patches:
    x=p.get_bbox().get_points()[:,0]
    y=p.get_bbox().get_points()[1,1]
    ax.annotate('{:.1f}%'.format(100.*y/ncount), (x.mean(), y), 
            ha='center', va='bottom')

ax.yaxis.set_major_locator(ticker.LinearLocator(11))

ax2.set_ylim(0,100)
ax.set_ylim(0,ncount)

ax2.yaxis.set_major_locator(ticker.MultipleLocator(10))

ax2.grid(None);

In [None]:
train_df['image_id'].value_counts()[:10]

In [None]:
train_df['image_id'].value_counts().mean()

In [None]:
def recalculate_coordinate(original_size, new_size, original_coordinate):
  return int(original_coordinate / (original_size / new_size))

train_df['x_min_new'] = train_df.apply(lambda x: recalculate_coordinate(x['width'], IMG_DIM, x['x_min']), axis=1)
train_df['y_min_new'] = train_df.apply(lambda x: recalculate_coordinate(x['height'], IMG_DIM, x['y_min']), axis=1)
train_df['x_max_new'] = train_df.apply(lambda x: recalculate_coordinate(x['width'], IMG_DIM, x['x_max']), axis=1)
train_df['y_max_new'] = train_df.apply(lambda x: recalculate_coordinate(x['height'], IMG_DIM, x['y_max']), axis=1)

In [None]:
train_df.loc[train_df['image_id'] == '051132a778e61a86eb147c7c6f564dfe']

Good tutorial on images, bounding boxes and their augmentations visualisations:

[Using Albumentations to augment bounding boxes for object detection tasks](https://albumentations.ai/docs/examples/example_bboxes/)

In [None]:
def plot_original_images(img_id: str = '',
                         df: pd.DataFrame = None):
    image = cv2.imread(f'{"../input/vinbigdata-512-png/train"}/{img_id}.png')
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
    bboxes = df.loc[df['image_id'] == img_id, ['x_min_new',	'y_min_new', 'x_max_new', 'y_max_new']].values
    class_names = df.loc[df['image_id'] == img_id, ['class_name']].values
    
    fig, ax = plt.subplots(1, 1, figsize=(16, 10))
    for box, class_name in zip(bboxes, class_names):
        cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 1)
        
        ((text_width, text_height), _) = cv2.getTextSize(class_name[0], cv2.FONT_HERSHEY_SIMPLEX, 0.35, 1)    
        cv2.rectangle(image, (box[0], box[1] - int(1.3 * text_height)), (box[0] + text_width, box[1]), (0, 0, 255), -1)
        cv2.putText(
            image,
            text=class_name[0],
            org=(box[0], box[1] - int(0.3 * text_height)),
            fontFace=cv2.FONT_HERSHEY_SIMPLEX,
            fontScale=0.35, 
            color=(255, 255, 255), 
            lineType=cv2.LINE_AA,
        )
    
    ax.set_axis_off()
    ax.imshow(image / 255)

In [None]:
for idx in train_df['image_id'].value_counts()[:5].index:
    plot_original_images(idx, train_df)

# Group k-Fold

In [None]:
train_df = train_df[train_df.class_id!=14].reset_index(drop = True)

In [None]:
if DEBUG:
    train_df = train_df[:3000]
    test_df = test_df[:3000]

In [None]:
gkf  = GroupKFold(n_splits = N_FOLDS)
train_df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(gkf.split(train_df, groups = train_df.image_id.tolist())):
    train_df.loc[val_idx, 'fold'] = fold
train_df.head()

# Dataset class, augmentations visualization

In [None]:
class VinBigData(parsers.Parser, parsers.FilepathMixin, parsers.LabelsMixin, parsers.BBoxesMixin):
    pass

VinBigData.generate_template()

In [None]:
class VinBigData(parsers.FasterRCNN, parsers.FilepathMixin, parsers.SizeMixin):
    def __init__(self, df, source):
        self.df = df
        self.source = source

    def __iter__(self):
        yield from self.df.itertuples()

    def __len__(self):
        return len(self.df)

    def imageid(self, o) -> Hashable:
        return o.image_id

    def filepath(self, o) -> Union[str, Path]:
        return self.source / f"{o.image_id}.png"

    def image_width_height(self, o) -> Tuple[int, int]:
        return get_image_size(self.filepath(o))

    def labels(self, o) -> List[int]:
        return [o.class_id]

    def bboxes(self, o) -> List[BBox]:
        return [BBox.from_xyxy(*[o.x_min_new, o.y_min_new, o.x_max_new, o.y_max_new])]

In [None]:
IDX_TO_CLASS = dict(sorted(list(zip(list(train_df['class_id'].unique()), list(train_df['class_name'].unique())))))
IDX_TO_CLASS

In [None]:
ClassMap(list(IDX_TO_CLASS.values()), background=None)

In [None]:
class_map = ClassMap(list(IDX_TO_CLASS.values()), background=None)

parser = VinBigData(train_df[:20], source / "train")
train_rs, valid_rs = parser.parse()

In [None]:
train_rs[:1]

In [None]:
valid_rs[:1]

In [None]:
# no augmentations
show_records(train_rs[:3], ncols=3, class_map=class_map, figsize=(16, 8))

In [None]:
# imagenet normalization params are used
train_tfms = tfms.A.Adapter([*tfms.A.aug_tfms(size=RESIZE_DIM, presize=PRESIZE), tfms.A.Normalize()])
valid_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(RESIZE_DIM), tfms.A.Normalize()])

In [None]:
train_ds = Dataset(train_rs, train_tfms)
valid_ds = Dataset(valid_rs, valid_tfms)

In [None]:
samples = [train_ds[2] for _ in range(6)]
show_samples(samples, ncols=3, class_map=class_map, display_label=True)

# Faster-RCNN

In [None]:
class PL_Model(faster_rcnn.lightning.ModelAdapter):       
    def configure_optimizers(self):
        optimizer = torch.optim.AdamW(self.parameters(), lr=LR, weight_decay=WDECAY)
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 
                                                               factor=0.1, 
                                                               mode='min', 
                                                               patience=10)
        
#         scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 
#                                                                12,
#                                                                eta_min=0.01, 
#                                                                last_epoch=-1)
        return [optimizer], [{"scheduler": scheduler,
                              "interval": 'epoch',
                              'monitor': 'val_loss'}]

In [None]:
parser = VinBigData(train_df, source / "train")

for n_fold in FOLDS_IDS:
    # preparing folds
    train_fold = tuple(np.unique(train_df.loc[lambda train_df: train_df["fold"] != n_fold]['image_id'].values))
    val_fold = tuple(np.unique(train_df.loc[lambda train_df: train_df["fold"] == n_fold]['image_id'].values))

    presplits = [train_fold, val_fold]
    data_splitter = FixedSplitter(presplits)
    train_rs, valid_rs = parser.parse(data_splitter)

    # dataset classes based on defined group split
    train_ds = Dataset(train_rs, train_tfms)
    valid_ds = Dataset(valid_rs, valid_tfms)
    
    # corresponding dataloaders
    train_dl = faster_rcnn.train_dl(train_ds, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=True)
    valid_dl = faster_rcnn.valid_dl(valid_ds, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, shuffle=False)

    metrics = [COCOMetric(metric_type=COCOMetricType.bbox)]
    backbone = backbones.resnet_fpn.resnext50_32x4d(pretrained=True)
    model = faster_rcnn.model(backbone=backbone, num_classes=len(class_map))
    
    # fast ai
#     learn = faster_rcnn.fastai.learner(dls=[train_dl, valid_dl], model=model, metrics=metrics)
#     learn.fine_tune(NUM_EPOCHS, lr=LR)

    # pytorch lighting
    pl_model = PL_Model(model, metrics=metrics)
    
    early_stopping = pl.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    model_checkpoint = pl.callbacks.ModelCheckpoint(filepath = './',
                                                    verbose = True, 
                                                    save_top_k=1,
                                                    monitor = 'val_loss',
                                                    mode = 'min',
                                                    prefix = f'frcnn-best-model-fold{n_fold}'
                                                   )
    
    csv_logger = CSVLogger("./", name=f'frcnn-best-model-fold{n_fold}')
    
    trainer = pl.Trainer(gpus = 1,
                         logger=[csv_logger],
                         log_every_n_steps=50,
                         callbacks=[early_stopping, model_checkpoint],
                         check_val_every_n_epoch=1,
                         accumulate_grad_batches=2,
                         distributed_backend='dp',
                         gradient_clip_val=0.5,
                         max_epochs=NUM_EPOCHS,
                         num_sanity_val_steps=0,
                         profiler=False,
                         weights_summary=None)

    trainer.fit(pl_model, train_dl, valid_dl)


In [None]:
faster_rcnn.show_results(pl_model, valid_ds, class_map=class_map)

In [None]:
logs = pd.read_csv("./frcnn-best-model-fold0/version_0/metrics.csv")
logs.tail()