In [1]:
%matplotlib inline
import numpy as np
from scipy.misc import imread
import pickle
import matplotlib.pyplot as plt
from scipy.misc import imread, imresize
import tensorflow as tf
from keras.preprocessing import image
from keras.backend.tensorflow_backend import set_session
from ssd import SSD300
from keras.applications.imagenet_utils import preprocess_input
from ssd_utils import BBoxUtility
import matplotlib.pyplot as plt
from SSD_tester import calc_detection_prec_rec, calc_detection_ap
from SSD_RGBD import RGBD_SSD300
from depth_preprocess import hole_filling
import cv2

config = tf.ConfigProto(
    gpu_options=tf.GPUOptions(
        allow_growth=True 
    )
)
sess = sess = tf.Session(config=config)



Using TensorFlow backend.


In [2]:
NYU_CLASSES = ['bathtub', 'bed', 'bookshelf', 'box', 'chair', 'counter', 'desk', 'door', 'dresser',
               'garbage_bin', 'lamp', 'monitor', 'night_stand', 'pillow', 'sink', 'sofa', 'table', 'tv', 'toilet']
NUM_CLASSES = len(NYU_CLASSES) + 1
rgb_input_shape = (300, 300, 3) #channel lastde
depth_input_shape = (300, 300, 1
                    )

In [3]:
model = RGBD_SSD300(rgb_input_shape, depth_input_shape, num_classes=NUM_CLASSES)
model.load_weights('/data/jun/checkpoints/SUNRGBD/RGBD/v2-4/weights-v2-4.33-2.43.hdf5')

In [4]:
rgb_gt = pickle.load(open('../pkls/SUNRGBD/RGB_v8.pkl', 'rb'))
depth_gt = pickle.load(open('../pkls/SUNRGBD/Depth_v8.pkl', 'rb'))

rgb_keys = sorted(rgb_gt.keys())
depth_keys = sorted(depth_gt.keys())
num_train = int(round(0.9 * len(rgb_keys)))
rgb_train_keys = rgb_keys[:num_train]
rgb_val_keys = rgb_keys[num_train:]
depth_train_keys = depth_keys[:num_train]
depth_val_keys = depth_keys[num_train:]
num_val = len(rgb_val_keys)

In [5]:
path_prefix = '/data/jun/dataset/'
rgb_inputs = []
depth_inputs = []
images = []
# img_path = path_prefix + sorted(val_keys )[100]
# img = image.load_img(img_path, target_size=(300, 300))
# img = image.img_to_array(img)
# images.append(imread(img_path))
# inputs.append(img.copy())
# inputs = preprocess_input(np.array(inputs))

for rgb_key, depth_key in zip(rgb_val_keys, depth_val_keys):
    rgb_img_path = path_prefix + rgb_key
    depth_img_path = path_prefix + depth_key
    rgb_img = imread(rgb_img_path).astype('float32')
    depth_img = imread(depth_img_path, mode='L').astype('float32')
    rgb_img = imresize(rgb_img, (300, 300)).astype('float32')
    depth_img = imresize(depth_img, (300, 300)).astype('float32')
    depth_img = depth_img / np.max(depth_img)
    depth_img = np.sqrt(depth_img)
    depth_img = np.array(depth_img*256, dtype=int)
    depth_img = hole_filling(depth_img)
    depth_img = np.uint8(depth_img)
    depth_img = cv2.Canny(depth_img, 70, 110)
    depth_img = cv2.bilateralFilter(depth_img, d=5, sigmaColor=5, sigmaSpace=2)
    depth_img = np.expand_dims(depth_img, axis=2)
    std = np.std(rgb_img)
    rgb_img -= np.mean(rgb_img)
    rgb_img /= std

    
    rgb_inputs.append(rgb_img.copy())
    depth_inputs.append(depth_img.copy())
    
inputs = [np.array(rgb_inputs), np.array(depth_inputs)]

`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  from ipykernel import kernelapp as app
`imread` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``imageio.imread`` instead.
  app.launch_new_instance()
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.
`imresize` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0.
Use ``skimage.transform.resize`` instead.


In [6]:
priors = pickle.load(open('../pkls/prior_boxes_ssd300.pkl', 'rb'))
bbox_util = BBoxUtility(NUM_CLASSES, priors)
preds = model.predict(inputs, batch_size=16, verbose=1)



In [7]:
results = bbox_util.detection_out(preds, confidence_threshold=0.6)

In [8]:
gt_bboxes = []
gt_labels = []
gt_scores = []
for key in rgb_val_keys:
    index = np.where(rgb_gt[key][:, 4:] == 1)
    gt_bboxes.append(rgb_gt[key][:, :4])
    gt_labels.append((index[1]).reshape(len(index[1]), 1))
    gt_scores.append(np.ones((len(index[1]), 1)))
gt_bboxes = np.array(gt_bboxes)
gt_labels = np.array(gt_labels)
gt_scores = np.array(gt_scores)

In [9]:
pred_labels = []
pred_scores = []
pred_bboxes = []
for result in results:
    if len(result) != 0:
        nm = len(result[:, 1])
        pred_labels.append((result[:, 0]-1).reshape(nm, 1))
        pred_scores.append(result[:, 1:2].reshape(nm, 1))
        pred_bboxes.append(result[:, 2:].reshape(nm, 4))
    else:
        pred_labels.append(np.array([]).reshape(0, 1))
        pred_scores.append(np.array([]).reshape(0, 1))
        pred_bboxes.append(np.array([]).reshape(0, 1))
pred_labels = np.array(pred_labels)
pred_scores = np.array(pred_scores)

pred_bboxes = np.array(pred_bboxes)

In [10]:
gt_labels.shape

(966,)

In [11]:
prec, rec = calc_detection_prec_rec(pred_labels, pred_scores, pred_bboxes, gt_bboxes, gt_labels, iou_thresh=0.3)

In [12]:
ap = calc_detection_ap(prec, rec, use_07_metric=True)
{'ap': ap, 'map': np.nanmean(ap)}

{'ap': array([ 0.27272727,  0.4239952 ,  0.43528139,  0.12702366,  0.61683616,
         0.27986443,  0.23825798,  0.20377804,  0.17467532,  0.29707609,
         0.21165067,  0.1384535 ,  0.        ,  0.23712647,  0.34781145,
         0.51544657,  0.42311764,  0.16942149,  0.7701049 ]),
 'map': 0.30961306395317917}

In [None]:
for i, img in enumerate(images):
    if len(results[i]) == 0:
        continue
    det_label = results[i][:, 0]
    det_conf = results[i][:, 1]
    det_xmin = results[i][:, 2]
    det_ymin = results[i][:, 3]
    det_xmax = results[i][:, 4]
    det_ymax = results[i][:, 5]

    # Get detections with confidence higher than 0.6.
    top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.5]

    top_conf = det_conf[top_indices]
    top_label_indices = det_label[top_indices].tolist()
    top_xmin = det_xmin[top_indices]
    top_ymin = det_ymin[top_indices]
    top_xmax = det_xmax[top_indices]
    top_ymax = det_ymax[top_indices]

    colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()
    plt.imshow(img / 255.)
    currentAxis = plt.gca()


    for i in range(top_conf.shape[0]):
        xmin = int(round(top_xmin[i] * img.shape[1]))
        ymin = int(round(top_ymin[i] * img.shape[0]))
        xmax = int(round(top_xmax[i] * img.shape[1]))
        ymax = int(round(top_ymax[i] * img.shape[0]))
        score = top_conf[i]
        label = int(top_label_indices[i])
        label_name = NYU_CLASSES[label - 1]
        display_txt = '{:0.2f}, {}'.format(score, label_name)
        coords = (xmin, ymin), xmax-xmin, ymax-ymin
        color = colors[label]
        currentAxis.add_patch(plt.Rectangle(*coords, fill=False, edgecolor=color, linewidth=2))
        currentAxis.text(xmin, ymin, display_txt, bbox={'facecolor':color, 'alpha':0.5})
    
    plt.show()

In [None]:
#calc_map()
y_true = []
for key in val_keys:
    y_true.append(gt[key])
y_true = np.array(y_true)
print(y_true.shape)

In [None]:
inputs = []
images = []
for key in val_keys:
    img_path = path_prefix + key
    img = image.load_img(img_path, target_size=(300, 300))
    img = image.img_to_array(img)
    images.append(imread(img_path))
    inputs.append(img.copy())
inputs = preprocess_input(np.array(inputs))
preds = model.predict(inputs, batch_size=1, verbose=1)
results = bbox_util.detection_out(preds)

In [None]:
#calc_map(y_true, results)
print(results[0])