# 1. Import

In [1]:
import os

import torch
import torchvision
import torch.utils.data
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

import cv2
from PIL import Image
import matplotlib.pyplot as plt

from modules import utils
from modules import transforms as T
from modules.engine import train_one_epoch, evaluate

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# !pip install cython
# !pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

In [3]:
# !git clone https://github.com/pytorch/vision.git

# !cp vision/references/detection/utils.py .
# !cp vision/references/detection/transforms.py .
# !cp vision/references/detection/coco_eval.py .
# !cp vision/references/detection/engine.py .
# !cp vision/references/detection/coco_utils.py .

In [4]:
import config

# 2. Fine-tuning

## a. Modifying the model to add a different backbone

In [5]:
min_size = config.min_size

In [6]:
class_map = config.class_map
num_classes = len(class_map)

In [7]:
backbone = torchvision.models.vgg16(
                    weights="VGG16_Weights.IMAGENET1K_V1").features
backbone.out_channels = 512

In [8]:
anchor_size = config.anchor_size
anchor_ratio = config.anchor_ratio

anchor_generator = AnchorGenerator(sizes=(anchor_size,),
                                   aspect_ratios=(anchor_ratio,))

# if rpn_anchor_generator is None:
#     rpn_anchor_generator = _default_anchorgen()

# def _default_anchorgen():
#     anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
#     aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
#     return AnchorGenerator(anchor_sizes, aspect_ratios)            

In [9]:
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"],
                                                output_size=7,
                                                sampling_ratio=2)

# if box_roi_pool is None:
#     box_roi_pool = MultiScaleRoIAlign(featmap_names=["0", "1", "2", "3"], output_size=7, sampling_ratio=2)

In [10]:
model = FasterRCNN(backbone=backbone,
                   num_classes=num_classes,
                   min_size=min_size,
                   rpn_anchor_generator=anchor_generator,
                   box_roi_pool=None)

In [11]:
model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(600,), max_size=1333, mode='bilinear')
  )
  (backbone): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(in

In [12]:
# from torchviz import make_dot

# model.eval()
# x = [torch.rand(3, 300, 400)]
# predictions = model(x)

# make_dot(predictions[0]['boxes'], params=dict(model.named_parameters()))

## b. Dataset Class

In [13]:
root = 'dataset'

In [14]:
def get_transform(train):
    transforms = []
    transforms.append(T.PILToTensor())
    transforms.append(T.ConvertImageDtype(torch.float))
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))  # Follow Fast R-CNN paper
    return T.Compose(transforms)

In [15]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.transforms = transforms

        self.imgs = list(sorted(os.listdir(os.path.join(root, "images"))))
        self.labels = list(sorted(os.listdir(os.path.join(root, "labels"))))
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        label_path = os.path.join(self.root, "labels", self.labels[idx])
        
        img = Image.open(img_path).convert("RGB")
        with open(label_path) as file:
            label = [line.rstrip() for line in file]

        num_objs = len(label)
        boxes = []
        labels = []
        for i in range(num_objs):
            xmin, ymin, xmax, ymax, cls = [int(j) for j in label[i].split(', ')]
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(cls)

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [16]:
train_ratio = config.train_ratio
val_ratio = config.val_ratio
train_val_batch = config.train_val_batch
test_batch = config.test_batch

train_dataset = CustomDataset(root, get_transform(train=True))
val_test_dataset = CustomDataset(root, get_transform(train=False))

torch.manual_seed(1)

train_num = round(train_ratio*len(train_dataset))
val_num = round(val_ratio*len(train_dataset))
indices = torch.randperm(len(train_dataset)).tolist()

train_dataset = torch.utils.data.Subset(train_dataset, indices[:train_num])
val_dataset = torch.utils.data.Subset(val_test_dataset, indices[train_num:train_num+val_num])
test_dataset = torch.utils.data.Subset(val_test_dataset, indices[train_num+val_num:])

train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_val_batch,
                                          shuffle=True, num_workers=4,
                                          collate_fn=utils.collate_fn)

val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=train_val_batch,
                                               shuffle=False, num_workers=4,
                                               collate_fn=utils.collate_fn)

len(train_dataset), len(val_dataset), len(test_dataset)

(550, 157, 79)

## c. Training

In [17]:
train_mode = config.mode=='train'

In [18]:
if train_mode:
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    # construct an optimizer - SGD follow Faster R-CNN paper
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

    # and a learning rate scheduler which decreases the learning rate by
    # 10x every 3 epochs
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [19]:
if train_mode:
    num_epochs = config.num_epochs

    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=10)
        
        # update the learning rate
        lr_scheduler.step()
        
        # evaluate on the test dataset
        # evaluate(model, val_data_loader, device=device)

Epoch: [0]  [  0/550]  eta: 0:39:51  lr: 0.000014  loss: 2.5637 (2.5637)  loss_classifier: 1.3563 (1.3563)  loss_box_reg: 0.0008 (0.0008)  loss_objectness: 0.6944 (0.6944)  loss_rpn_box_reg: 0.5122 (0.5122)  time: 4.3491  data: 0.7145  max mem: 1064
Epoch: [0]  [ 10/550]  eta: 0:04:56  lr: 0.000105  loss: 2.7970 (3.3905)  loss_classifier: 1.3420 (1.3417)  loss_box_reg: 0.0045 (0.0107)  loss_objectness: 0.6954 (0.7009)  loss_rpn_box_reg: 0.7607 (1.3373)  time: 0.5482  data: 0.1073  max mem: 1459
Epoch: [0]  [ 20/550]  eta: 0:03:24  lr: 0.000196  loss: 3.6558 (3.6484)  loss_classifier: 1.3238 (1.2953)  loss_box_reg: 0.0101 (0.0106)  loss_objectness: 0.6992 (0.7010)  loss_rpn_box_reg: 1.8417 (1.6414)  time: 0.1872  data: 0.0612  max mem: 1757
Epoch: [0]  [ 30/550]  eta: 0:02:44  lr: 0.000287  loss: 3.2579 (3.3482)  loss_classifier: 1.1844 (1.2251)  loss_box_reg: 0.0047 (0.0103)  loss_objectness: 0.6928 (0.6946)  loss_rpn_box_reg: 1.4664 (1.4182)  time: 0.1876  data: 0.0556  max mem: 1757


  ar = np.asanyarray(ar)


AssertionError: Results do not correspond to current coco set

In [None]:
if train_mode:
    file_num = len([i for i in os.listdir('weights')])
    torch.save(model, f'weights/exp_{file_num}.pt')

## d. Testing

In [None]:
from seaborn import color_palette


def make_color_map():
    '''
        Create a color map for each class
    '''
    names = sorted(set(list(class_map.values())))
    n = len(names)
    cp = color_palette("Paired", n)

    cp[:] = [tuple(int(255*c) for c in rgb) for rgb in cp]

    return dict(zip(names, cp))

if not train_mode:   
    color_map = make_color_map()

In [None]:
def nms(boxes, scores, iou_thresh=0.5):
    return torchvision.ops.nms(boxes, scores, iou_thresh)

In [None]:
def plot_result(image, boxes, cls, scr):
    color = color_map[cls]

    x1, y1, x2, y2 = boxes.cpu().numpy().astype("int")
    cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)

    cv2.putText(image, ' '.join([cls, scr]), (x1, y1-10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2,
                cv2.LINE_AA)
    cv2.putText(image, ' '.join([cls, scr]), (x1, y1-10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 1,
                cv2.LINE_AA)

In [None]:
def infer(model, img):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    
    # put the model in evaluation mode
    model.eval()
    with torch.no_grad():
        pred = model([img.to(device)])

    image = img.numpy().transpose(1, 2, 0)
    image = (image*255).astype('uint8')

    boxes = pred[0]['boxes']
    scores = pred[0]['scores']
    keep = nms(boxes, scores, 0.1)

    labels = pred[0]['labels']

    for i in keep:
        cls = class_map[int(labels[i])]
        scr = str(round(float(scores[i]), 2))
        plot_result(image, boxes[i], cls, scr)

    image = image[:,:,::-1]
    cv2.imwrite('tmp.tif', image)

    plt.figure(figsize=(20,20))
    plt.imshow(image[:,:,::-1])

In [None]:
if not train_mode:
    # pick one image from the test set
    img, _ = test_dataset[0]

In [None]:
if not train_mode:
    # load model from weight
    model1 = torch.load('weights/exp.pt')
    model2 = torch.load('weights/exp_1.pt')
    # model2 = torch.load('weights/exp_2.pt')

In [None]:
if not train_mode:
    # AnchorGenerator(sizes=((64, 128, 256, 512),),
    #                                aspect_ratios=((0.5, 1.0, 2.0),))
    # min_size = 600
    infer(model1, img)

In [None]:
if not train_mode:
    # AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
    #                                aspect_ratios=((0.5, 1.0, 2.0),))
    # min_size = 1200
    infer(model2, img)

In [None]:
# if not train_mode:
#     # AnchorGenerator(sizes=((8, 16, 32, 64, 128, 256, 512),),
#     #                                aspect_ratios=((0.5, 1.0, 2.0),))
#     # min_size = 1200
#     infer(model3, img)