## Imports and routines

In [7]:
import sys
import os
import importlib
import shutil
import numpy as np
import threading
# import keras
import tqdm
from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor

import selectivesearch
importlib.reload(selectivesearch)
from selectivesearch import get_selective_search_regions

import pickle
import PIL

import utils
importlib.reload(utils)
from utils import *

import pascal_voc_reader

import bbox_transform
importlib.reload(bbox_transform)
from bbox_transform import *

import torch
import torchvision
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.init as weight_init
import torch.nn.functional as F

import rcnn_utils
importlib.reload(rcnn_utils)
from rcnn_utils import *
import data_utils
importlib.reload(data_utils)
from data_utils import *
from nms.nms_wrapper import nms

from IPython.core.debugger import Tracer
from IPython.display import clear_output

%matplotlib inline

import voc_eval; importlib.reload(voc_eval)
from voc_eval import voc_eval

In [8]:
def unnormalize_deltas(deltas, dataset):
    ''' Unnormalize deltas using mean and targets from dataset.
    Args:
        deltas: A ndarray with size (batch_size, num_fg_classes * 4)
        dataset: The current dataset. Used to retrieve the mean and stds.
    Returns:
        The unnormalized deltas.
    '''
    # Mean and std skip first row (its the background class).
    # Then get shaped into num_fg_classes * 4 to be added to the deltas.
    means = dataset.targets_mean[1:].reshape(-1)
    stds = dataset.targets_std[1:].reshape(-1)
    return deltas * stds + means


def test_image(image, rois, dataset,
               top_class_only=True,
               class_detection_thresh=0.05,
               nms_thresh=0.3):
    ''' Test a single image on the net.
    Args:
        image: A preprocessed image or precomputed features of 
            the image. As ndarray.
        rois: RoIs for the image. Ndarray: (image_index, x1, y1, x2, y2)
        dataset: The currently used dataset.
        top_class_only: Whether to use only top class for each roi,
            or any class over a certain threshhold.
        class_detection_thresh: If the softmax for this class is 
            above class_detection_thresh, it's considered detected
            in the roi.
    '''
    image_var = np_to_var(image.astype(np.float32))
    # image_var = Variable(image.cuda())
    rois_var = np_to_var(rois.astype(np.int32))

    # Run the img through the network
    out = model(image_var, rois_var)
    # predicted deltas
    deltas = out[1].data.cpu().numpy()
    deltas = unnormalize_deltas(deltas, dataset)

    # transform rois using predicted deltas
    boxes = rois[:, 1:]
    bboxes_inv_transformed = bbox_transform_inv(boxes, deltas)

    class_probas, class_indexes = torch.max(out[0], 1)
    indexes_np = np.squeeze(class_indexes.data.cpu().numpy())
#     print('Total FG RoIs Detected: ', np.sum(indexes_np > 0))

    scores = out[0].data.cpu().numpy()
    scores = np.exp(scores)

    # clip rois to image size
    bboxes_inv_transformed = clip_boxes(bboxes_inv_transformed,
                                        dataset.im_size)

    all_boxes = nms_boxes(bboxes_inv_transformed, scores,
                          top_class_only=top_class_only,
                          class_detection_thresh=class_detection_thresh,
                          nms_thresh=nms_thresh)
    return all_boxes


def nms_boxes(boxes, scores, num_classes=21,
              top_class_only=False,
              class_detection_thresh=0.05,
              nms_thresh=0.3):
    all_boxes = [[] for _ in range(num_classes)]
    # skip j = 0, because it's the background class
    for class_id in range(1, num_classes):
        # Whether to use only the top class for each box or
        # all classes over a certain threshhold.
        if top_class_only:
            detection_criterion = (np.argmax(scores, axis=1) == class_id)
        else:
            detection_criterion = (
                scores[:, class_id] > class_detection_thresh)
        class_detected_indexes = np.where(detection_criterion)[0]

        cls_scores = scores[class_detected_indexes, class_id]
        class_box_start = (class_id - 1) * 4
        class_box_end = class_box_start + 4
        cls_boxes = boxes[class_detected_indexes,
                          class_box_start:class_box_end]

        cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
            .astype(np.float32, copy=False)

        if len(cls_dets) > 1:
            keep = nms(cls_dets, nms_thresh, force_cpu=True)
            cls_dets = cls_dets[keep, :]
        all_boxes[class_id] = cls_dets
    return all_boxes


def get_display_boxes(all_boxes):
    display_boxes = []
    display_classes = []
    for class_id, class_boxes in enumerate(all_boxes):
        for box in class_boxes:
            display_boxes.append(box)
            display_classes.append(class_id)
    return np.asarray(display_boxes), np.asarray(display_classes)


def display_detections(rois, classes, dataset, show_gt_boxes=True):
    ''' Display detected foreground rois for the previous image in dataset.
    Args:
        rois: Detected RoIs as ndarray of (x, y, w, h)
        classes: Class labels of each RoI as ndarray of class_id.
        dataset: The dataset that was used to get the image, we will get 
            the previous image from it and display the rois on it.
        show_gt_boxes: Show the ground truth boxes as well.
    '''
    detected_roi = np.append(rois,
                             classes[:, None],
                             axis=1)
    index = dataset.index - 1
    if show_gt_boxes:
        gt_boxes = dataset.gt_boxes[index]
    else:
        gt_boxes = None

    image_arr = dataset.images[index]
    image = PIL.Image.fromarray(image_arr.astype('uint8'))

#     for class_id in detected_roi[:, 4]:
#         print(dataset.class_id_to_name[class_id])

    display_image_regions(image, detected_roi, gt_boxes,
                          class_id_to_name=dataset.class_id_to_name)

## Load data

In [3]:
image_size = (500, 500)
all_data = RCNN_All_Data(image_size)
# dataset.unload_set('valid')
train = RCNN_Set(all_data, 'train')
valid = RCNN_Set(all_data, 'valid')

Parsing annotation files
[Errno 2] No such file or directory: './data/VOC2012/ImageSets/Main/test.txt'


In [266]:
dataset = train

## Setup the network

In [5]:
# Init the model
try:
    del model
except NameError:
    pass

model = Fast_RCNN_model(dropout_p=0.1).cuda()
model.train(mode=False)
fast_rcnn_weights_init(model)
load_weights(model, 'intermediate/voc/weights-regression-300.pth')

## Test a single image 

In [360]:
dataset.reset_index()

In [None]:
%%debug
images, rois, targets = dataset.next_batch(
    images_per_batch=1,
    roi_batch_size=64,
    use_features=True,
    loop_over=False)

all_boxes = test_image(images, rois, dataset, top_class_only=False,
                       class_detection_thresh=0.05, nms_thresh=0.3)
display_boxes, display_classes = get_display_boxes(all_boxes)
if len(display_boxes) == 0:
    print('Nothing detected for this image.')
else:
    display_detections(
        RCNN_Set.transform_regions_width_height(display_boxes),
        display_classes,
        dataset, show_gt_boxes=True)

##### Evaluate a single image AP

In [None]:
gt_boxes = # load from file
for all classes
    for each detection 
        compute iou of detection and gt boxes for this class
        get max iou
        
        
        if ovmax > ovthresh:
            if not R['difficult'][jmax]:
                if not R['det'][jmax]:
                    tp[d] = 1.
                    R['det'][jmax] = 1
                else:
                    fp[d] = 1.
        else:
            fp[d] = 1.

##  Test all images

In [263]:
def test(dataset):

    images_per_batch = 1
    roi_batch_size = 64
    n_batches = (dataset.sample_count // images_per_batch)
    
    all_boxes = [[[] for _ in range(n_batches)]
                 for _ in range(dataset.num_classes)]
    
    for image_index in tqdm.tqdm_notebook(range(n_batches)):
        # Get next batch
        batch = dataset.next_batch(images_per_batch=images_per_batch,
                          roi_batch_size=roi_batch_size,
                          use_features=True, loop_over=False)
        images, rois, targets = batch
        if len(images) == 0 or len(rois) == 0 or len(targets) == 0:
            # no more samples
            break

        # Forward Pass
        image_boxes = test_image(images, rois, dataset, top_class_only=False,
                       class_detection_thresh=0.05, nms_thresh=0.3)

        for class_id, class_boxes in enumerate(image_boxes):
            all_boxes[class_id][image_index] = class_boxes

    return all_boxes

In [267]:
dataset.reset_index()

In [268]:
all_boxes = test(dataset)




In [208]:
output_dir = 'intermediate/voc/'
det_file = os.path.join(output_dir, 'detections_no_reg.pkl')
with open(det_file, 'wb') as f:
    pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)


## Evaluate the results

In [258]:
_classes = [dataset.class_id_to_name[index] 
            for index in range(dataset.num_classes)]
_devkit_path = 'data/VOCdevkit/'

In [269]:
dataset_filenames = (np.array(dataset.filenames)
                       [dataset.enough_samples_mask]
                       [dataset.train_indexes])

names_only = [name.replace('./data/VOC2012/JPEGImages/',''
                          ).replace('.jpg', '')
              for name in dataset_filenames]
len(names_only)

12871

In [278]:
imageset_filename = 'slav_train.txt'
save_dir = 'data/VOCdevkit/VOC2012/ImageSets/Main/'
fullpath = os.path.join(save_dir, imageset_filename)
with open(fullpath, 'w') as f:
    for line in names_only:
        f.write('{:s}\n'.format(line))

In [279]:
_write_voc_results_file(_classes, names_only, all_boxes, _devkit_path)

Writing bg VOC results file
Writing person VOC results file




Writing bottle VOC results file
Writing motorbike VOC results file
Writing sheep VOC results file
Writing car VOC results file
Writing bus VOC results file
Writing dog VOC results file
Writing horse VOC results file
Writing train VOC results file
Writing cow VOC results file
Writing aeroplane VOC results file
Writing bird VOC results file
Writing pottedplant VOC results file
Writing bicycle VOC results file
Writing cat VOC results file
Writing chair VOC results file
Writing boat VOC results file
Writing sofa VOC results file
Writing diningtable VOC results file
Writing tvmonitor VOC results file


In [371]:
_do_python_eval(_devkit_path, _classes, imageset_filename=imageset_filename)

AP for person = 0.0755
AP for bottle = 0.0184
AP for motorbike = 0.1016
AP for sheep = 0.0458
AP for car = 0.0461
AP for bus = 0.0990
AP for dog = 0.1293
AP for horse = 0.0751
AP for train = 0.1066
AP for cow = 0.0529
AP for aeroplane = 0.0629
AP for bird = 0.0832
AP for pottedplant = 0.0658
AP for bicycle = 0.0639
AP for cat = 0.2040
AP for chair = 0.0323
AP for boat = 0.0339
AP for sofa = 0.0866
AP for diningtable = 0.0457
AP for tvmonitor = 0.0961
Mean AP = 0.0762
~~~~~~~~
Results:
0.075
0.018
0.102
0.046
0.046
0.099
0.129
0.075
0.107
0.053
0.063
0.083
0.066
0.064
0.204
0.032
0.034
0.087
0.046
0.096
0.076
~~~~~~~~

--------------------------------------------------------------
Results computed with the **unofficial** Python eval code.
Results should be very close to the official MATLAB eval code.
Recompute with `./tools/reval.py --matlab ...` for your paper.
-- Thanks, The Management
--------------------------------------------------------------


In [217]:
def _write_voc_results_file(classes, filenames, all_boxes, _devkit_path):
    for cls_ind, cls in enumerate(classes):
        if cls == 'bg':
            continue
        print('Writing {} VOC results file'.format(cls))
        filename = _get_voc_results_file_template(_devkit_path).format(cls)
        with open(filename, 'wt') as f:
            for im_ind, index in enumerate(filenames):
                dets = all_boxes[cls_ind][im_ind]
                if dets == []:
                    continue
                # the VOCdevkit expects 1-based indices
                for k in range(dets.shape[0]):
                    f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
                            format(index, dets[k, -1],
                                   dets[k, 0] + 1, dets[k, 1] + 1,
                                   dets[k, 2] + 1, dets[k, 3] + 1))


In [222]:
def _get_voc_results_file_template(_devkit_path):
    # VOCdevkit/results/VOC2007/Main/<comp_id>_det_test_aeroplane.txt
    filename = '_det_' + 'trainval' + '_{:s}.txt'
    path = os.path.join(
        _devkit_path,
        'results',
        'VOC2012',
        'Main',
        filename)
    return path


In [277]:
def _do_python_eval(_devkit_path, _classes, output_dir = 'output', 
                   imageset_filename='slav_train.txt'):
    annopath = os.path.join(
        _devkit_path,
        'VOC2012',
        'Annotations',
        '{:s}.xml')
    imagesetfile = os.path.join(
        _devkit_path,
        'VOC2012',
        'ImageSets',
        'Main',
        imageset_filename)
    cachedir = os.path.join(_devkit_path, 'annotations_cache')
    aps = []
    # The PASCAL VOC metric changed in 2010
    use_07_metric = False

    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)
    for i, cls in enumerate(_classes):
        if cls == 'bg':
            continue
        filename = _get_voc_results_file_template(_devkit_path).format(cls)
        rec, prec, ap = voc_eval(
            filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
            use_07_metric=use_07_metric)
        aps += [ap]
        print('AP for {} = {:.4f}'.format(cls, ap))
        with open(os.path.join(output_dir, cls + '_pr.pkl'), 'wb') as f:
            pickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
    print('Mean AP = {:.4f}'.format(np.mean(aps)))
    print('~~~~~~~~')
    print('Results:')
    for ap in aps:
        print('{:.3f}'.format(ap))
    print('{:.3f}'.format(np.mean(aps)))
    print('~~~~~~~~')
    print('')
    print('--------------------------------------------------------------')
    print('Results computed with the **unofficial** Python eval code.')
    print('Results should be very close to the official MATLAB eval code.')
    print('Recompute with `./tools/reval.py --matlab ...` for your paper.')
    print('-- Thanks, The Management')
    print('--------------------------------------------------------------')


In [258]:
# TODO check how we perform with and without regression targets