# 1. Import

In [1]:
import os

import torch
import torchvision
import torch.utils.data
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

import cv2
from PIL import Image
import matplotlib.pyplot as plt

from modules import utils
from modules import transforms as T
from modules.engine import train_one_epoch, evaluate

import config

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# !pip install cython
# !pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'

# !git clone https://github.com/pytorch/vision.git

# !cp vision/references/detection/utils.py .
# !cp vision/references/detection/transforms.py .
# !cp vision/references/detection/coco_eval.py .
# !cp vision/references/detection/engine.py .
# !cp vision/references/detection/coco_utils.py .

# 2. Fine-tuning

## a. Modifying the model to add a different backbone

In [3]:
backbone = torchvision.models.vgg16(weights="VGG16_Weights.IMAGENET1K_V1").features
backbone.out_channels = 512

anchor_size = config.anchor_size
anchor_ratio = config.anchor_ratio
anchor_generator = AnchorGenerator(sizes=(anchor_size,),
                                   aspect_ratios=(anchor_ratio,))

class_map = config.class_map
num_classes = len(class_map)

min_size = config.min_size
max_size = config.max_size

model = FasterRCNN(backbone=backbone,
                   num_classes=num_classes,
                   min_size=min_size,
                   max_size=max_size,
                   rpn_anchor_generator=anchor_generator,
                   )

model

FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(600,), max_size=800, mode='bilinear')
  )
  (backbone): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inp

## b. Dataset Class

In [4]:
def get_transform(train):
    transforms = []
    transforms.append(T.PILToTensor())
    transforms.append(T.ConvertImageDtype(torch.float))
    if train:
        transforms.append(T.RandomHorizontalFlip(0.5))  # Follow Fast R-CNN paper
    return T.Compose(transforms)


class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, root, transforms=None):
        self.root = root
        self.label_root = root.replace('/hdd/thaihq/qnet_search/ori_data', '/hdd/nguyenlc/ai_training/faster_rcnn/qnet_search/correct_labels')
        self.transforms = transforms

        self.imgs = list(sorted(os.listdir(os.path.join(root, "images"))))
        # self.labels = list(sorted(os.listdir(os.path.join(self.label_root, "labels"))))
        self.labels = list(sorted(os.listdir(self.label_root)))
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.root, "images", self.imgs[idx])
        label_path = os.path.join(self.label_root, self.labels[idx])
        
        img = Image.open(img_path).convert("RGB")
        with open(label_path) as file:
            label = [line.rstrip() for line in file]

        num_objs = len(label)
        boxes = []
        labels = []
        for i in range(num_objs):
            # xmin, ymin, xmax, ymax, cls = [int(j) for j in label[i].split(', ')]
            # boxes.append([xmin, ymin, xmax, ymax])
            xmin, ymin, width, height, cls = [int(j) for j in label[i].split(', ')]
            boxes.append([xmin, ymin, xmin+width, ymin+height])
            labels.append(cls-1)  #**********

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        image_id = torch.tensor([idx])
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        target = {}
        target["boxes"] = boxes
        target["labels"] = labels
        target["image_id"] = image_id
        target["area"] = area
        target["iscrowd"] = iscrowd
        target["image_name"] = os.path.basename(img_path).split('.')[0]

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target

    def __len__(self):
        return len(self.imgs)

In [5]:
# train_ratio = config.train_ratio
# val_ratio = config.val_ratio
# train_val_batch = config.train_val_batch
# test_batch = config.test_batch

# train_dataset = CustomDataset(root, get_transform(train=True))
# val_test_dataset = CustomDataset(root, get_transform(train=False))

# torch.manual_seed(1)

# train_num = round(train_ratio*len(train_dataset))
# val_num = round(val_ratio*len(train_dataset))
# indices = torch.randperm(len(train_dataset)).tolist()

# train_dataset = torch.utils.data.Subset(train_dataset, indices[:train_num])
# val_dataset = torch.utils.data.Subset(val_test_dataset, indices[train_num:train_num+val_num])
# test_dataset = torch.utils.data.Subset(val_test_dataset, indices[train_num+val_num:])

# train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_val_batch,
#                                           shuffle=True, num_workers=4,
#                                           collate_fn=utils.collate_fn)

# val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=train_val_batch,
#                                                shuffle=False, num_workers=4,
#                                                collate_fn=utils.collate_fn)

# len(train_dataset), len(val_dataset), len(test_dataset)

In [6]:
train_batch = config.train_batch
val_test_batch = config.val_test_batch

train_dataset = CustomDataset('/hdd/thaihq/qnet_search/ori_data/train', get_transform(train=True))
val_dataset = CustomDataset('/hdd/thaihq/qnet_search/ori_data/val', get_transform(train=False))
test_dataset = CustomDataset('/hdd/thaihq/qnet_search/ori_data/test', get_transform(train=False))

train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch,
                                          shuffle=True, num_workers=4,
                                          collate_fn=utils.collate_fn)

val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=val_test_batch,
                                               shuffle=False, num_workers=4,
                                               collate_fn=utils.collate_fn)

test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=val_test_batch,
                                               shuffle=False, num_workers=4,
                                               collate_fn=utils.collate_fn)

len(train_dataset), len(val_dataset), len(test_dataset)

(11214, 2804, 14018)

## c. Training

In [7]:
train_mode = config.mode=='train'

def create_folder(root, train_mode):
    max_model = 0
    for root, j, _ in os.walk(root):
        for dirs in j:
            try:
                temp = int(dirs)
                if temp > max_model:
                    max_model = temp
            except:
                continue
        break
    max_model += 1

    log_path = os.path.join(root, str(max_model))
    os.makedirs(log_path)

    if train_mode:
        weight_path = os.path.join(log_path, 'weights')
        os.makedirs(weight_path)
    else:
        weight_path = None

    return log_path, weight_path

if train_mode:
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    model.to(device)

    # construct an optimizer - SGD follow Faster R-CNN paper
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = torch.optim.SGD(params, lr=config.learning_rate, momentum=0.9, weight_decay=0.0005)

    # and a learning rate scheduler which decreases the learning rate by
    # 10x every 3 epochs
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [8]:
if train_mode:
    from early_stopping import EarlyStopping
    
    stopper = EarlyStopping(config.patience)

    log_folder, weight_folder = create_folder('training', train_mode)

    num_epochs = config.num_epochs

    logs = []
    for epoch in range(num_epochs):
        # train for one epoch, printing every 10 iterations
        log = train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=100)  # val_data_loader for debugging

        log_path = os.path.join(log_folder, 'log.txt')
        stopper.log(log_path, str(log))

        last_weight_path = os.path.join(weight_folder, 'last.pt')
        torch.save(model, last_weight_path)

        # loss_1 = float(str(log).split('  ')[1].split(' ')[1])
        loss_2 = float(str(log).split('  ')[1].split(' ')[2][1:-1])
        loss = loss_2  # loss_1 for debugging

        stop = stopper(epoch, loss)
        if stop:
            best_weight_path = os.path.join(weight_folder, 'best.pt')
            torch.save(model, best_weight_path)
            break
        
        # update the learning rate
        lr_scheduler.step()
        
        # evaluate on the test dataset
        # evaluate(model, val_data_loader, device=device)

Epoch: [0]  [   0/1402]  eta: 1:11:39  lr: 0.000002  loss: 2.9816 (2.9816)  loss_classifier: 2.1161 (2.1161)  loss_box_reg: 0.0069 (0.0069)  loss_objectness: 0.6911 (0.6911)  loss_rpn_box_reg: 0.1675 (0.1675)  time: 3.0665  data: 1.1960  max mem: 5281
Epoch: [0]  [ 100/1402]  eta: 0:07:38  lr: 0.000102  loss: 2.5163 (2.8316)  loss_classifier: 1.6472 (1.9411)  loss_box_reg: 0.0030 (0.0028)  loss_objectness: 0.6851 (0.6888)  loss_rpn_box_reg: 0.1910 (0.1990)  time: 0.3180  data: 0.0118  max mem: 5831
Epoch: [0]  [ 200/1402]  eta: 0:06:47  lr: 0.000202  loss: 0.5663 (2.0064)  loss_classifier: 0.1768 (1.2327)  loss_box_reg: 0.0170 (0.0086)  loss_objectness: 0.2346 (0.5927)  loss_rpn_box_reg: 0.1287 (0.1724)  time: 0.3231  data: 0.0118  max mem: 5831
Epoch: [0]  [ 300/1402]  eta: 0:06:10  lr: 0.000302  loss: 0.4110 (1.4781)  loss_classifier: 0.1465 (0.8727)  loss_box_reg: 0.0284 (0.0123)  loss_objectness: 0.1207 (0.4424)  loss_rpn_box_reg: 0.0921 (0.1506)  time: 0.3269  data: 0.0125  max me

KeyboardInterrupt: 

In [None]:
def plot(log_folder):
    path = os.path.join(log_folder, 'log.txt')
    if not os.path.exists(path):
        return
    
    file = open(path, 'r')
    lines = file.readlines()

    loss_1 = [float(str(i).split('  ')[1].split(' ')[1]) for i in lines]
    loss_2 = [float(str(i).split('  ')[1].split(' ')[2][1:-1]) for i in lines]

    plt.figure(figsize=(15, 10), tight_layout=True)
    plt.plot(range(len(loss_1)), loss_1, label='loss_1')
    plt.plot(range(len(loss_2)), loss_2, label='loss_2')

    min_loss = min(loss_2)
    min_index = loss_2.index(min(loss_2))
    plt.plot(min_index, min_loss, '*', label='best value')
    
    plt.title('Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Value')
    plt.legend()

    save_path = os.path.join(log_folder, 'loss.png')
    plt.savefig(save_path)

    plt.show()

if train_mode:
    # log_folder = 'training/1'
    plot(log_folder)

## d. Testing

In [None]:
# train_mode = False

In [None]:
def nms(boxes, scores, iou_thresh=0.5):
    return torchvision.ops.nms(boxes, scores, iou_thresh)

def plot_result(image, boxes, cls, scr):
    color = color_map[cls]

    x1, y1, x2, y2 = boxes.cpu().numpy().astype("int")
    cv2.rectangle(image, (x1, y1), (x2, y2), color, 1)

    cv2.putText(image, ' '.join([cls, scr]), (x1, y1-10),
                cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1,
                cv2.LINE_AA)
    # cv2.putText(image, ' '.join([cls, scr]), (x1, y1-10),
    #             cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 1,
    #             cv2.LINE_AA)


from seaborn import color_palette

def make_color_map():
    '''
        Create a color map for each class
    '''
    names = sorted(set(list(class_map.keys())))
    n = len(names)
    cp = color_palette("Paired", n)

    cp[:] = [tuple(int(255*c) for c in rgb) for rgb in cp]

    return dict(zip(names, cp))

if not train_mode:   
    color_map = make_color_map()

In [None]:
def infer(model, img):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    
    # put the model in evaluation mode
    model.eval()
    with torch.no_grad():
        pred = model([img.to(device)])

    image = img.numpy().transpose(1, 2, 0)
    image = (image*255).astype('uint8')

    boxes = pred[0]['boxes']
    scores = pred[0]['scores']
    keep = nms(boxes, scores, 0.1)

    labels = pred[0]['labels']

    for i in keep:
        cls = list(class_map.keys())[int(labels[i])]
        scr = str(round(float(scores[i]), 2))
        plot_result(image, boxes[i], cls, scr)

    image = image[:,:,::-1]
    # cv2.imwrite('tmp.tif', image)

    plt.figure(figsize=(20,20))
    plt.imshow(image[:,:,::-1])

In [None]:
if not train_mode:
    # pick one image from the test set
    img, target = test_dataset[0]
    print(target['image_name'])

In [None]:
if not train_mode:
    model_1 = torch.load('training/1/weights/best.pt')
    infer(model_1, img)

In [None]:
if not train_mode:
    model_2 = torch.load('training/2/weights/best.pt')
    infer(model_2, img)

In [None]:
def test(model):
    from tqdm import tqdm

    with torch.no_grad():
        path, _ = create_folder('predictions', train_mode)

        device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
        model.eval()

        for i in tqdm(range(len(test_dataset))):
            img, target = test_dataset[i]

            img_name = target['image_name']
            save_path = os.path.join(path, f'{img_name}.txt')
            file = open(save_path, "w")

            pred = model([img.to(device)])[0]
            
            boxes = pred['boxes']
            scores = pred['scores']
            labels = pred['labels']
            
            zip_lists = zip(labels, scores, boxes)

            r = len(boxes)
            for i, x in enumerate(zip_lists):                
                class_name = list(class_map.keys())[int(x[0])]
                score = round(float(x[1]), 2)

                x, y, x2, y2 = x[2].cpu().numpy().astype("int")
                w = x2 - x
                h = y2 - y

                s = ' '.join(str(i) for i in [class_name, score, x, y, w, h])

                if i<r-1:
                    s += '\n'

                file.writelines(s)

In [None]:
if not train_mode:
    model_1 = torch.load('training/1/weights/best.pt')
    test(model_1)

In [None]:
if not train_mode:
    model_2 = torch.load('training/2/weights/best.pt')
    test(model_2)