In [None]:
import pandas as pd
import numpy as np
import cv2
import os
import re
import time

from sklearn.model_selection import GroupKFold

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN

from torch.utils.data import DataLoader, Dataset

from matplotlib import pyplot as plt

In [None]:
train_dir = '../input/vinbigdata-512-image-dataset/vinbigdata/train'
test_dir = '../input/vinbigdata-512-image-dataset/vinbigdata/test'
train_df = pd.read_csv('../input/vinbigdata-512-image-dataset/vinbigdata/train.csv')

In [None]:
train_df.head()

In [None]:
train_df = train_df[train_df['class_id'] != 14].reset_index(drop=True)
train_df.head()

In [None]:
train_df['image_path'] = '../input/vinbigdata-512-image-dataset/vinbigdata/train/'+train_df.image_id+'.png'
train_df.head()

## GROUP KFOLD

In [None]:
gkf  = GroupKFold(n_splits = 5)
train_df['fold'] = -1
for fold, (train_idx, val_idx) in enumerate(gkf.split(train_df, groups = train_df.image_id.tolist())):
    train_df.loc[val_idx, 'fold'] = fold
train_df.head()

In [None]:
train_df.groupby('fold')['image_id'].agg(lambda x: x.nunique()).reset_index()

In [None]:
IMG_SIZE = 512
train_df['xmin'] = (train_df['x_min']/train_df['width'])*IMG_SIZE
train_df['ymin'] = (train_df['y_min']/train_df['height'])*IMG_SIZE
train_df['xmax'] = (train_df['x_max']/train_df['width'])*IMG_SIZE
train_df['ymax'] = (train_df['y_max']/train_df['height'])*IMG_SIZE

In [None]:
assert train_df['xmin'].all() <= IMG_SIZE
assert train_df['ymin'].all() <= IMG_SIZE
assert train_df['xmax'].all() <= IMG_SIZE
assert train_df['ymax'].all() <= IMG_SIZE

In [None]:
train_df[train_df['image_id'] == '9a5094b2563a1ef3ff50dc5c7ff71345']

In [None]:
class_dict = dict(set(zip(train_df.class_id, train_df.class_name)))
classes = []
for key in sorted(class_dict.keys()): 
    classes.append(class_dict[key])

classes = ['_'] + classes   # adding background
classes

In [None]:
class VBDDataset(Dataset):
    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()

        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, idx):

        image_id = self.image_ids[idx]
        records = self.df[self.df['image_id'] == image_id]

        image = cv2.imread(f'{self.image_dir}/{image_id}.png', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        boxes = records[['xmin', 'ymin', 'xmax', 'ymax']].values
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)
        # all the labels are shifted by 1 to accomodate background
        labels = torch.squeeze(torch.as_tensor((records.class_id.values+1,), dtype=torch.int64))
        
        # suppose all instances are not crowd
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        # target['masks'] = None
        target['image_id'] = torch.tensor([idx])
        target['area'] = area
        target['iscrowd'] = iscrowd
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['boxes'] = torch.as_tensor(sample['bboxes'])

        return image, target, image_id

    def __len__(self):
        return self.image_ids.shape[0]

In [None]:
dt = VBDDataset(train_df, train_dir)
dt[0]

In [None]:
# Albumentations
def get_train_transform():
    return A.Compose([
        A.Flip(0.5),
        A.ShiftScaleRotate(rotate_limit=10, p=0.5),
        A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=1.0),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

def get_valid_transform():
    return A.Compose([
        A.ShiftScaleRotate(rotate_limit=10, p=0.5),
        A.RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=1.0),
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [None]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [None]:
num_classes = 15  # 14 classes + background

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
# A Class for keeping track of average
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

## Testing Sample

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_dataset = VBDDataset(train_df, train_dir, get_train_transform())
valid_dataset = VBDDataset(train_df, train_dir, get_valid_transform())


train_data_loader = DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
)

valid_data_loader = DataLoader(
    valid_dataset,
    batch_size=8,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn
)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

images, targets, image_ids = next(iter(train_data_loader))
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

boxes = targets[2]['boxes'].cpu().numpy().astype(np.int32)
sample = images[2].permute(1, 2, 0).cpu().numpy()

fig, ax = plt.subplots(1, 1, figsize=(16, 8))

for box in boxes:
    cv2.rectangle(sample,
                  (box[0], box[1]),
                  (box[2], box[3]),
                  (220, 0, 0), 3)
    
ax.set_axis_off()
ax.imshow(sample)

## Training

In [None]:
def get_dataloaders(df, trn_idx, val_idx):
    
    train_ = df.loc[trn_idx,:].reset_index(drop=True)
    valid_ = df.loc[val_idx,:].reset_index(drop=True)
        
    def collate_fn(batch):
        return tuple(zip(*batch))

    train_dataset = VBDDataset(train_, train_dir, get_train_transform())
    valid_dataset = VBDDataset(valid_, train_dir, get_valid_transform())


    train_data_loader = DataLoader(
        train_dataset,
        batch_size=16,
        shuffle=False,
        num_workers=4,
        collate_fn=collate_fn
    )

    valid_data_loader = DataLoader(
        valid_dataset,
        batch_size=8,
        shuffle=False,
        num_workers=4,
        collate_fn=collate_fn
    )
    
    return train_data_loader, valid_data_loader



def train_model(model, dataloader, device, epochs, optimizer, lr_scheduler, fold):
    
    best_loss = 1e10
    loss_hist = Averager()
    itr = 1
    all_losses = []

    for epoch in range(epochs):
        loss_hist.reset() 
    
        for images, targets, image_ids in dataloader:

            images = list(image.to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            loss_dict = model(images, targets)

            losses = sum(loss for loss in loss_dict.values())
            loss_value = losses.item()

            loss_hist.send(loss_value)
            all_losses.append(loss_value)
            
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

            if itr % 50 == 0:
                print(f"Iteration #{itr} loss: {loss_value}")

            itr += 1
        
        # saving the model based on training loss for now. - later can be moved to validation
        if loss_hist.value < best_loss:
            best_loss = loss_hist.value
            torch.save(model.state_dict(), f'fasterrcnn_model_{fold}.pt')

        # update the learning rate
        if lr_scheduler is not None:
            lr_scheduler.step()

        print(f"Epoch #{epoch} loss: {loss_hist.value}\n")
        
    return all_losses
        
        
def validate_model(model, dataloader, device):
    print("\n Starting Validation ... ")
    loss_hist = Averager()
    itr = 1

    loss_hist.reset() 

    for images, targets, image_ids in dataloader:

        images = list(image.to(device) for image in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()

        loss_hist.send(loss_value)

        if itr % 50 == 0:
            print(f"Iteration #{itr} loss: {loss_value}")

        itr += 1

    print(f"\nFinal loss: {loss_hist.value}")


    

In [None]:
def run_fold(fold):
    print(f"Starting fold {fold}")
    start = time.time()
    trn_idx = train_df[train_df['fold'] != fold].index
    val_idx = train_df[train_df['fold'] == fold].index
    
    
    trainloader, valloader = get_dataloaders(train_df, trn_idx, val_idx)
    
    loss_hist = train_model(model, trainloader, device, epochs, optimizer, lr_scheduler, fold)
    
    # plot training loss
    plt.figure(figsize=(8,5))
    plt.plot(loss_hist)
    plt.title("Training Loss Statistic", size=17)
    plt.xlabel("Iteration", size=15)
    plt.ylabel("Loss Value", size=15)
    plt.show()
    
    validate_model(model, valloader, device)
    
    print(f"Completed Fold {fold} in {round(time.time()-start, 2)} seconds")


In [None]:
model.to(device)

# set params for model
params = [p for p in model.parameters() if p.requires_grad]

# set optimizer
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# set lr scheduler
lr_scheduler = None

# set epochs
epochs = 20

# set folds
num_folds = 1

In [None]:
for fold in range(num_folds):
    run_fold(fold)

## Visualize Model

In [None]:
len(valid_data_loader)

In [None]:
device

In [None]:
model0 = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
model0.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
model0.load_state_dict(torch.load(model_dir + 'fasterrcnn_model_4.pt'))
model0.to(device)

In [None]:
def show_output(model, num_imgs, score_threshold=0.9):
    img_cnt = 0
    for images, targets, image_ids in iter(valid_data_loader):
        if img_cnt == num_imgs:
            break
        img_cnt += 1

        print(f"----------{img_cnt}-----------")

        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        boxes = targets[1]["boxes"].cpu().numpy().astype(np.int32)
        sample = images[1].permute(1, 2, 0).cpu().numpy()
        clss = targets[1]["labels"].cpu().numpy().astype(np.int32)

        for c in range(len(clss)):
          print(clss[c], boxes[c])
        # print("boxes:")
        # print(boxes)
        # print("classes:")
        # print(clss)

        # clss_boxes_dict = {}
        # ## first box
        # for i in range(len(clss)):
        #   if clss[i] not in clss_boxes_dict.keys():
        #     clss_boxes_dict[clss[i]] = boxes[i]
        # print(clss_boxes_dict)

        # clss_boxes_dict = {}
        # ## average box
        # for i in range(len(clss)):
        #   clas = clss[i]
        #   curr_box = boxes[i]

        #   if clas not in clss_boxes_dict.keys():
        #     clss_boxes_dict[clas] = (curr_box, 1)

        #   else:
        #     curr_sum, curr_len = clss_boxes_dict[clas]
        #     clss_boxes_dict[clas] = (curr_sum + curr_box, curr_len + 1)

        # for clas in clss_boxes_dict.keys():
        #   clss_boxes_dict[clas] = (clss_boxes_dict[clas][0] / clss_boxes_dict[clas][1]).astype(np.int32)
        # print(clss_boxes_dict)

        model.eval()
        cpu_device = torch.device("cpu")

        outputs = model(images)
        outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]

        # outputs_class_boxes prepares labels and boxes to be plotted

        outputs_class_boxes = {}
        for output_dict in outputs:
            boxes, labels, scores = (
                output_dict["boxes"],
                output_dict["labels"],
                output_dict["scores"],
            )
            for s in range(len(scores)):
                box, label, score = (
                    boxes[s].detach().numpy().astype(np.int32),
                    labels[s].item(),
                    scores[s].item(),
                )
                if scores[s] > score_threshold:
                    if labels[s] not in outputs_class_boxes.keys():
                        outputs_class_boxes[label] = [box]
                    else:
                        print(f"adding more boxes for label {label}")
                        outputs_class_boxes[label] += [box]
                        
        for clas in sorted(outputs_class_boxes.keys()):
            print(clas, outputs_class_boxes[clas])

        fig, ax = plt.subplots(1, 1, figsize=(16, 8))

        for box, clas in zip(boxes, clss):
            cv2.putText(
                sample,
                #f"{classes[clas]}",
                f"{clas}",
                (box[0], box[1] + 20),
                cv2.FONT_HERSHEY_DUPLEX,
                1.0,
                (127, 0, 127),
                1,
            )
            cv2.rectangle(sample, (box[0], box[1]), (box[2], box[3]), (220, 0, 0), 1)

        for clas in outputs_class_boxes.keys():
            for box in outputs_class_boxes[clas]:
                cv2.putText(
                    sample,
                    #f"{classes[clas]}",
                    f"{clas}",
                    #((box[0] + box[2]) // 2, (box[1] + box[3]) // 2),
                    (box[0], box[3]),
                    cv2.FONT_HERSHEY_DUPLEX,
                    1.0,
                    (0, 0, 255),
                    1,
                )
                cv2.rectangle(
                    sample, (box[0], box[1]), (box[2], box[3]), (0, 255, 127), 2
                )

        ax.set_axis_off()
        ax.imshow(sample)

        for _ in range(3):
          print()



In [None]:
show_output(model0, 5, 0.95)

In [None]:
images, targets, image_ids = next(iter(valid_data_loader))

images = list(img.to(device) for img in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

boxes = targets[1]['boxes'].cpu().numpy().astype(np.int32)
sample = images[1].permute(1,2,0).cpu().numpy()
clss = targets[1]['labels'].cpu().numpy().astype(np.int32)

model.eval()
cpu_device = torch.device("cpu")

outputs = model(images)
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]

fig, ax = plt.subplots(1, 1, figsize=(16, 8))

for box, clas in zip(boxes, clss):
    cv2.putText(sample, f"{classes[clas]}", (box[0], box[1]), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)
    cv2.rectangle(sample,
                  (box[0], box[1]),
                  (box[2], box[3]),
                  (220, 0, 0), 1)
    
ax.set_axis_off()
ax.imshow(sample)

In [None]:
device

## Acknowledgements
Notebook Heavily inspired by this Notebook - https://www.kaggle.com/pestipeti/pytorch-starter-fasterrcnn-train/notebook.

**If the kernel helps you in any way, kindly Upvote**