This kernel is based on my previous one [VinBigData: EDA, Faster-RCNN, icevision [training]](https://www.kaggle.com/mariazorkaltseva/vinbigdata-eda-faster-rcnn-icevision-training)

In [None]:
!pip install icevision[all]
!pip install matplotlib==3.1.3
!pip install tqdm==4.45.0

In [None]:
import os
import random
import cv2
import warnings
import torch
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import pytorch_lightning as pl

from tqdm import tqdm_notebook as tqdm
from sklearn.model_selection import GroupKFold
from icevision.all import *

warnings.filterwarnings('ignore')

In [None]:
SEED = 2021
DEBUG = False
IMG_DIM = 512
BATCH_SIZE = 16
NUM_WORKERS = 4
N_FOLDS = 5
FOLDS_IDS = [0]
LR = 1e-4
NUM_EPOCHS = 20
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(SEED)

In [None]:
source = Path("../input/vinbigdata-512-png")

train_df = pd.read_csv(source / 'train.csv')
test_df = pd.read_csv(source / 'test.csv')
submission_df = pd.read_csv(source / 'sample_submission.csv')

submission_df.head()

In [None]:
if DEBUG:
    submission_df = submission_df[:20]

In [None]:
class VinBigDataFilepathParser(parsers.Parser, parsers.FilepathMixin):
    pass

VinBigDataFilepathParser.generate_template()

In [None]:
class VinBigDataFilepathParser(parsers.Parser, parsers.FilepathMixin):
    def __init__(self, df, source):
        self.df = df
        self.source = source

    def __iter__(self):
        yield from self.df.itertuples()

    def __len__(self):
        return len(self.df)

    def imageid(self, o) -> Hashable:
        return o.image_id

    def filepath(self, o) -> Union[str, Path]:
        return self.source / f"{o.image_id}.png"

    def image_width_height(self, o) -> Tuple[int, int]:
        return get_image_size(self.filepath(o))

In [None]:
parser = VinBigDataFilepathParser(submission_df, source / "test")
records = parser.parse(data_splitter=None, idmap=None, autofix=True, show_pbar=True, cache_filepath=None)
test_rs = records[0] + records[1]

In [None]:
# imagenet normalization params are used

presize = 512
size = 384

infer_tfms = tfms.A.Adapter([*tfms.A.resize_and_pad(size), tfms.A.Normalize()])

In [None]:
infer_ds = Dataset(test_rs, infer_tfms)

In [None]:
train_df = train_df[train_df.class_id!=14].reset_index(drop = True)
IDX_TO_CLASS = dict(sorted(list(zip(list(train_df['class_id'].unique()), list(train_df['class_name'].unique())))))
class_map = ClassMap(list(IDX_TO_CLASS.values()), background=None)

In [None]:
backbone = backbones.resnet_fpn.resnext50_32x4d(pretrained=False)
model = faster_rcnn.model(backbone=backbone, num_classes=len(class_map))

checkpoint_path = '../input/vinbigdata-eda-faster-rcnn-icevision-training/frcnn-best-model-fold0-epoch=58.ckpt'
checkpoint = torch.load(checkpoint_path, map_location=torch.device(DEVICE))

new_state_dict = OrderedDict()
for key, value in checkpoint['state_dict'].items():
    if key.startswith('model.'):
        new_key = key[6:]
        new_state_dict[new_key] = value
    else:
        new_state_dict[key] = value
        
model.load_state_dict(new_state_dict, strict=False)

In [None]:
# predict in batches
infer_dl = faster_rcnn.infer_dl(infer_ds, batch_size=1, shuffle=False)
_, preds = faster_rcnn.predict_dl(model=model, infer_dl=infer_dl, detection_threshold=0.5)

In [None]:
# prepare submission file

for idx, pred in tqdm(enumerate(preds)):
    
    if len(pred['labels']) == 0:
        continue
        
    image_idx = dict(test_rs[idx])['imageid']
    image_id = submission_df.iloc[image_idx]['image_id']
    original_width, original_height = test_df.loc[test_df['image_id'] == image_id][['width', 'height']].values[0]
            
    s = ''
    for label, score, bbox in zip(pred['labels'], pred['scores'], pred['bboxes']):
        bbox = vars(bbox)
        xmin = int(bbox['xmin']*(original_width/size))
        ymin = int(bbox['ymin']*(original_height/size))
        xmax = int(bbox['xmax']*(original_width/size))     
        ymax = int(bbox['ymax']*(original_height/size))

        values_list = [label, np.round(score, 2), xmin, ymin, xmax, ymax]
        s += ' '.join(map(str, values_list))
        s += ' '
    
    submission_df.iloc[image_idx]['PredictionString'] = s.strip()

In [None]:
submission_df.head()

In [None]:
submission_df.to_csv('submission.csv', index=False)