In [15]:
import os
import sys
import torch as t
from model import FasterRCNN
from data.dataset import DOTADataset
from utils import convertor as at
from utils.trainer import FasterRCNNTrainer
from utils.evaluation import eval_detection_voc

from tqdm.notebook import tqdm
import numpy as np
from PIL import Image
import glob
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from torch.utils.data import sampler, random_split
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
from torchnet.meter import AverageValueMeter
import torchvision
%matplotlib inline

In [11]:
device = t.device("cuda" if t.cuda.is_available() else "cpu")
faster_rcnn = FasterRCNN(n_fg_class=16)
trainer = FasterRCNNTrainer(faster_rcnn).to(device)

In [12]:
mean_nums = [0.485, 0.456, 0.406]
std_nums = [0.229, 0.224, 0.225]
train_transforms = transforms.Compose([
        transforms.RandomResizedCrop(size=(800, 800)),
        transforms.RandomRotation(degrees=10),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.ColorJitter(brightness=0.15, contrast=0.15),
        transforms.ToTensor(),
        transforms.Normalize(mean_nums, std_nums)
])

validation_transforms = transforms.Compose([
        transforms.Resize((800, 800)),
        transforms.CenterCrop((800, 800)),
        transforms.ToTensor(),
        transforms.Normalize(mean_nums, std_nums)
        ])

In [13]:
label_dict = {'small-vehicle': 0, 
              'large-vehicle': 1, 
              'ship': 2, 
              'harbor': 3, 
              'bridge': 4, 
              'plane': 5, 
              'basketball-court': 6, 
              'tennis-court': 7, 
              'swimming-pool': 8, 
              'soccer-ball-field': 9, 
              'ground-track-field': 10, 
              'storage-tank': 11, 
              'baseball-diamond': 12, 
              'helicopter': 13, 
              'roundabout': 14, 
              'container-crane': 15}


In [None]:
train_dataset = DOTADataset('train_images/', 'train_y/', train_transforms)
val_dataset = DOTADataset('val_images/images/', 'val_y/', validation_transforms)

In [17]:
def train(train_dataset, val_dataset, trainer, num_epochs=6):
    for epoch in range(num_epochs):
        trainer.reset_meters()
        print(f'Epoch: {epoch+1}/{num_epochs}')
        dataloader = DataLoader(train_dataset, \
                            batch_size=1, \
                            shuffle=True, \
                            num_workers=1)
        
        trainer.faster_rcnn.train() 
        
        current_loss = 0.0
        num_img = 0
        for img, labels in dataloader:
            img = img.cuda().float()
            for label_data in labels:
                x1, y1, x2, y2, x3, y3, x4, y4, label, _ = label_data
                y_min = np.min([float(y1[0]), float(y2[0]), float(y3[0]), float(y4[0])])
                y_max = np.max([float(y1[0]), float(y2[0]), float(y3[0]), float(y4[0])])
                x_min = np.min([float(x1[0]), float(x2[0]), float(x3[0]), float(x4[0])])
                x_max = np.max([float(x1[0]), float(x2[0]), float(x3[0]), float(x4[0])])
                label = [label]
                bbox_ =  np.array([[y_min, x_min, y_max, x_max]])
                bbox_ = np.array([np.stack(bbox_).astype(np.float32)])
                bbox = t.FloatTensor(bbox_)
                
                losses = trainer.train_step(img, bbox, label, 1.)
                current_loss += losses.total_loss 
                num_img += 1
        
        epoch_loss = current_loss / num_img

        print(f'Epoch loss: {loss:.2f}'.format(loss=epoch_loss))

        pred_bboxes, pred_labels, pred_scores = list(), list(), list()
        gt_bboxes, gt_labels, gt_difficults = list(), list(), list()
        dataloader = DataLoader(val_dataset, \
                            batch_size=1, \
                            shuffle=True, \
                            num_workers=1)
        
        for ii, (imgs,  labels) in enumerate(dataloader):
            gt_labels_ = []
            gt_bboxes_ = []
            for label_data in labels:
                x1, y1, x2, y2, x3, y3, x4, y4, label, _ = label_data
                y_min = np.min([float(y1[0]), float(y2[0]), float(y3[0]), float(y4[0])])
                y_max = np.max([float(y1[0]), float(y2[0]), float(y3[0]), float(y4[0])])
                x_min = np.min([float(x1[0]), float(x2[0]), float(x3[0]), float(x4[0])])
                x_max = np.max([float(x1[0]), float(x2[0]), float(x3[0]), float(x4[0])])
                gt_labels_.append(label)
                bbox_ =  np.array([[y_min, x_min, y_max, x_max]])
                bbox_ = np.array([np.stack(bbox_).astype(np.float32)])
                gt_bboxes_.append(t.FloatTensor(bbox_))
            
            imgs = imgs.to(device)
            
            pred_bboxes_, pred_labels_, pred_scores_ = trainer.faster_rcnn.predict(imgs, [(800, 800)])
            gt_bboxes += gt_bboxes_
            gt_labels += gt_labels_
            pred_bboxes += pred_bboxes_
            pred_labels += pred_labels_
            pred_scores += pred_scores_
        gt_difficults = None
        
        result = eval_detection_voc(
            pred_bboxes, pred_labels, pred_scores,
            gt_bboxes, gt_labels, gt_difficults,
            use_07_metric=True)
        print('mAP: {map:.2f}'.format(map=result['map']))

            

In [18]:
train(train_dataset, val_dataset, trainer)

Epoch:1/6
Epoch loss: 11.42
mAP:0.68
Epoch:2/6
Epoch loss: 8.97
mAP:0.71
Epoch:3/6
Epoch loss: 6.34
mAP:0.72
Epoch:4/6
Epoch loss: 5.98
mAP:0.74
Epoch:5/6
Epoch loss: 5.91
mAP:0.73
Epoch:6/6
Epoch loss: 5.92
mAP:0.73
