In [1]:
import numpy as np
import torchvision
import time
import os
import copy
import pdb
import time
import argparse

import sys
import cv2

from utils_rcnn.eval_tool import eval_detection_voc

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, models, transforms

from dataloader import VESSELBboxDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer
from PIL import Image, ImageDraw

import glob
%matplotlib inline
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import voc_eval

import skimage.io
import skimage.transform
import skimage.color
import skimage

assert torch.__version__.split('.')[1] == '4'


print('CUDA available: {}'.format(torch.cuda.is_available()))

CUDA available: True


In [2]:
def print_inline(line):
    sys.stdout.write(f'\r{line}')
    sys.stdout.flush()

In [3]:
use_gpu = True
retinanet = torch.load('./checkpoint/ckpt.pt')

if use_gpu:
    retinanet = retinanet.cuda()

retinanet.eval()
# paths = glob.glob('images/*.jpg')
data_dir = "/media/nasir/Drive1/datasets/SAR/SAR-Ship-Dataset"
paths = glob.glob(f'{data_dir}/JPEGImages/*.jpg')

In [4]:
def str2int(a):
    return [int(x) for x in a]

def extract_boxes(fname):
    with open(fname) as f:
        content = f.readlines()
        f.close()
        content = [x.strip() for x in content]
        content = [str2int(x.split(' ')[-4:]) for x in content]
        return content

In [5]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.485,0.456,0.406), (0.229,0.224,0.225))
])

def transform_resize(image, annots, scale=1):
    rows, cols, cns = image.shape
    image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
    x = transform(image)
    x = x.unsqueeze(0)
    annots_scaled = (np.array(annots)*scale).astype(int)
    

    return x, annots_scaled

In [6]:
def save_results(paths):
    for index, path in enumerate(paths):
        img = np.array(Image.open(path).convert('RGB'))
        gt_boxes = extract_boxes(path.replace('JPEGImages','ground-truth').replace('.jpg', '.txt'))

        x, gt_boxes = transform_resize(img, gt_boxes, 0.6)
        print(x.shape)

        with torch.no_grad():
            st = time.time()
            scores, classification, transformed_anchors = retinanet(x.cuda().float())
            idxs = np.where(scores>0.5)
            img = np.array(img).copy()

            f, axarr = plt.subplots(1,2)

            axarr[0].imshow(x[0,0,:,:], cmap='gray')
            axarr[0].axis('off')
            axarr[1].imshow(x[0,0,:,:], cmap='gray')
            axarr[1].axis('off')
    #         gt_boxes = extract_boxes(path.replace('.jpg', '.txt'))

            for box in gt_boxes:
                rect = patches.Rectangle((box[0],box[1]),box[2],box[3],linewidth=2,edgecolor='g',facecolor='none')
                axarr[0].add_patch(rect)

            for j in range(idxs[0].shape[0]):
                box = transformed_anchors[idxs[0][j], :]
                rect = patches.Rectangle((box[0],box[1]),box[2]-box[0],box[3]-box[1],linewidth=2,edgecolor='r',facecolor='none')
                axarr[1].add_patch(rect)
            print_inline(f"{index}/{len(paths)}, {path}")
            axarr[0].title.set_text('Ground Truth')
            axarr[1].title.set_text('Detected')

            plt.savefig(path.replace('JPEGImages', 'results'))
            plt.close('all')



In [7]:
def evaluate_voc(dataset, model, threshold=0.5):
    print_inline('\n\n')
    model.eval()
    
    pred_bboxes, pred_labels, pred_scores = list(), list(), list()
    gt_bboxes, gt_labels, gt_difficults = list(), list(), list()

    with torch.no_grad():

        for index in range(len(dataset)):
            data = dataset[index]
            # scale = data['scale']

            # run network
            scores, labels, boxes = model(data['img'].cuda().float().unsqueeze(dim=0))
            scores = scores.cpu()
            labels = labels.cpu()
            boxes  = boxes.cpu()


            gt_bboxes.append((data['annot'][:,:4].numpy()))
            gt_labels.append((data['annot'][:,4].numpy()))
            
            gt_difficults += [0 for x in range(data['annot'].shape[0])]
            pred_bboxes.append((np.array([box.numpy() for box in boxes])))
            pred_labels.append((np.array([lable.numpy() for lable in labels])))
            pred_scores.append((np.array([s.numpy() for s in scores])))

            print_inline('{}/{}'.format(index, len(dataset)))
        

        result = eval_detection_voc(
            pred_bboxes, pred_labels, pred_scores,
            gt_bboxes, gt_labels, None,
            use_07_metric=True)


        return result

In [8]:
dataset_val = VESSELBboxDataset(split='test', scale=0.6)
eval_result = evaluate_voc(dataset_val, retinanet)
print_inline(f'\n\n map: ----->    {eval_result["map"]} \n')



3809/3810

 map: ----->    0.41465131528578736 


In [29]:
dataset_val = VESSELBboxDataset(split='test', scale=0.85)
eval_result = evaluate_voc(dataset_val, retinanet)
print_inline(f'\n\n map: ----->    {eval_result["map"]} \n')



3809/3810

 map: ----->    0.6703695705028253 


In [30]:
dataset_val = VESSELBboxDataset(split='test', scale=1)
eval_result = evaluate_voc(dataset_val, retinanet)
print_inline(f'\n\n map: ----->    {eval_result["map"]} \n')



3809/3810

 map: ----->    0.783440169198 


In [31]:
dataset_val = VESSELBboxDataset(split='test', scale=1.10)
eval_result = evaluate_voc(dataset_val, retinanet)
print_inline(f'\n\n map: ----->    {eval_result["map"]} \n')



3809/3810

 map: ----->    0.7414039903619322 


In [38]:
dataset_val = VESSELBboxDataset(split='test', scale=1.23)
eval_result = evaluate_voc(dataset_val, retinanet)
print_inline(f'\n\n map: ----->    {eval_result["map"]} \n')



3809/3810

 map: ----->    0.7460285268594192 


In [39]:
dataset_val = VESSELBboxDataset(split='test', scale=1.25)
eval_result = evaluate_voc(dataset_val, retinanet)
print_inline(f'\n\n map: ----->    {eval_result["map"]} \n')



3809/3810

 map: ----->    0.7526550331308853 


In [50]:
dataset_val = VESSELBboxDataset(split='test', scale=1.35)
eval_result = evaluate_voc(dataset_val, retinanet)
print_inline(f'\n\n map: ----->    {eval_result["map"]} \n')



3809/3810

 map: ----->    0.7373150255258241 


In [61]:
dataset_val = VESSELBboxDataset(split='test', scale=1.48)
eval_result = evaluate_voc(dataset_val, retinanet)
print_inline(f'\n\n map: ----->    {eval_result["map"]} \n')



3809/3810

 map: ----->    0.698996258589648 


In [62]:
dataset_val = VESSELBboxDataset(split='test', scale=2)
eval_result = evaluate_voc(dataset_val, retinanet)
print_inline(f'\n\n map: ----->    {eval_result["map"]} \n')



3809/3810

 map: ----->    0.5565673555605288 
