# Object Detection Demo
Welcome to the object detection inference walkthrough!  This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/installation.md) before you start.

# Imports

In [2]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
sys.path.append("..")
import tarfile
import tensorflow as tf
import zipfile
from object_detection.eval_util import evaluate_detection_results_pascal_voc

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
%matplotlib inline
%load_ext autoreload
%autoreload 2
# This is needed since the notebook is stored in the object_detection folder.

from utils import label_map_util

from utils import visualization_utils as vis_util

## Object detection imports
Here are the imports from the object detection module.

In [3]:
from utils import label_map_util

from utils import visualization_utils as vis_util
def get_annotations(image_path):
    img_id = os.path.basename(image_path)[:-4]
    annotation_path = os.path.join(
    os.path.split(os.path.dirname(image_path))[0], 'Annotations',
    '{}.xml'.format(img_id)
    )
    return xml_to_dict(annotation_path)
from utils.kitti import show_groundtruth, create_results_list
from utils.kitti import visualize_predictions
import glob

# Model preparation 

## Variables

Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.  

By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.

In [5]:
# What model to download.
#MODEL_NAME = 'kitti_frozen'
# atrous_train_check
# Path tofrozen detection graph. This is the actual model that is used for the object detection.
#PATH_TO_CKPT = 'kitti_mobilenet_frozen/frozen_inference_graph.pb'
PATH_TO_CKPT = 'atrous_train_check_frozen/frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'kitti_map.pbtxt')

NUM_CLASSES = 9

## Load a (frozen) Tensorflow model into memory.

In [6]:
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')


## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [7]:
PATH_TO_LABELS = os.path.join('data', 'kitti_map.pbtxt')
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, 
                                                            max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

## Helper code

In [8]:
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

In [9]:
with open('kitti_data/train.txt') as f:
    train_ids = f.readlines()[0].split(',')
with open('kitti_data/valid.txt') as f:
    valid_ids = f.readlines()[0].split(',')

In [10]:
len(train_ids)

6732

In [11]:
len(valid_ids)

749

# Detection

In [12]:
PATH_TO_TEST_IMAGES_DIR = 'voc_kitti_valid/VOC2012/JPEGImages/'
p = 'voc_kitti_valid/VOC2012/JPEGImages/1023.jpg'
TEST_IMAGE_PATHS = [ p]
FIGSIZE = (20, 20)

In [13]:
import glob
def glob_base(pat): return list(map(os.path.basename, glob.glob(pat)))

In [14]:
glob_base('voc_kitti/VOC2012/JPEGImages/*.jpg')[0]

'5393.jpg'

In [15]:
from create_dataset import *

### Check that valid files dont overlap with train files

In [16]:
valid_ids = glob_base(VOC_VALID_DIR + '/VOC2012/JPEGImages/*.jpg')
train_ids = glob_base(VOC_TRAIN_DIR+ '/VOC2012/JPEGImages/*.jpg')

assert len(pd.Index(valid_ids).intersection(train_ids)) == 0

0

In [19]:
test_dir = 'voc_kitti_valid/VOC2012/JPEGImages/'
test_image_paths = [os.path.join(test_dir, x) for x in valid_ids]

In [20]:
len(test_image_paths)

749

In [29]:
train_labs= glob.glob('kitti_data/training/label_2/*.txt')
test_labs = glob.glob('kitti_data/valid/label_2/*.txt')

In [None]:
len(test_image_paths)

In [31]:
#perf.to_frame('ssd_map').round(3).to_csv('~/Desktop/ssd_mobilenet_mAP_by_category.csv')

In [None]:
!cat ssd_mobilenet_mAP_by_category.csv

In [35]:
#pd.DataFrame(img_scores).T.to_msgpack('ssd_mobilenet_img_scores.mp')

In [30]:
%%time
with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        res = create_results_list(test_image_paths, sess, detection_graph)

CPU times: user 5min 2s, sys: 27.6 s, total: 5min 30s
Wall time: 8min 9s


In [32]:
import pandas as pd
perf = pd.Series(evaluate_detection_results_pascal_voc(res, categories))

  num_images_correctly_detected_per_class / num_gt_imgs_per_class)


In [36]:
perf

PerformanceByCategory/mAP@0.5IOU/car               0.959948
PerformanceByCategory/mAP@0.5IOU/cyclist           0.846211
PerformanceByCategory/mAP@0.5IOU/dontcare          0.339320
PerformanceByCategory/mAP@0.5IOU/misc              0.844625
PerformanceByCategory/mAP@0.5IOU/pedestrian        0.792805
PerformanceByCategory/mAP@0.5IOU/person_sitting    0.670089
PerformanceByCategory/mAP@0.5IOU/tram              0.940657
PerformanceByCategory/mAP@0.5IOU/truck             0.943405
PerformanceByCategory/mAP@0.5IOU/van               0.936856
Precision/mAP@0.5IOU                               0.808213
dtype: float64

### Make nice performance table

In [39]:
def clean_idx(perf):
    x = list(perf.index.map(lambda x: x[33:]))
    x[-1] = 'Total'
    perf.index = x
    return perf

In [40]:
perf = clean_idx(perf)

In [41]:
perf.to_frame('rcnn_mAP').round(3).to_csv('~/Desktop/faster_rcnn_mAP_by_category.csv')

In [44]:
ssd_map = pd.read_csv('ssd_mobilenet_mAP_by_category.csv', index_col=0)

In [46]:
ssd_map.assign(faster_rcnn=perf).round(3).to_csv('both_models_map_by_category.csv')

In [None]:
def get_dict_slice(res, slc_obj):
    output = {}
    for k in res.keys():
        output[k] = res[k][slc_obj]
    return output
        
        

In [None]:
%%capture
img_scores = {image_id: evaluate_detection_results_pascal_voc(
    get_dict_slice(res, slice(i, i+1)), categories)
              for i, image_id in enumerate((res['image_id'][:-1]))}
                                           
                                           

In [None]:
cut = len('PerformanceByCategory/mAP@0.5IOU/')

In [None]:
perf

In [None]:
OVERALL_PERF_KEY = 'Precision/mAP@0.5IOU'



In [None]:
import pickle

In [None]:
pickle.dump(res, open('mobile_net_valid_results_dct.pkl', 'wb'))

In [None]:
#re2 = pickle.load(open('valid_results_dct.pkl', 'rb'))

In [None]:
del re2

In [None]:
categories

In [None]:
[idx for idx, i in enumerate(res['groundtruth_classes']) if 4 in i][:3]

In [None]:
res['image_id'][0]

In [None]:
#Image(test_image_paths[0]

In [None]:
import pandas as pd
pd.Series(evaluate_detection_results_pascal_voc(res, categories))

In [None]:
len(test_image_paths)

In [None]:
320/ 7491

In [None]:
image = Image.open(image_path)
plt.figure(figsize=FIGSIZE)
plt.imshow(image)

In [None]:
from kitti_constants import name_to_id

In [51]:
from object_detection.utils.visualization_utils import visualize_boxes_and_labels_on_image_array

In [52]:
from utils.kitti import get_boxes_scores_classes

In [None]:
plt.imshow(plt.imread('worst_prediction.jpg'))

In [None]:
img_scores = pd.read_msgpack('i')

In [48]:
def get_img_scores(image_path):
    imageid = os.path.basename(image_path)[:-4]
    return pd.Series(img_scores[imageid]).round(2).dropna()
        

In [53]:
%%time
%precision 4
import time
with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        image_path = np.random.choice(test_image_paths)
       
        #image_path = test_image_paths[650]
        image = Image.open(image_path)
        image_np = load_image_into_numpy_array(image)
        start = time.time()
        boxes, scores, classes, num_detections = get_boxes_scores_classes(image_np, sess, detection_graph)
        print('inference time: {} seconds'.format(
            np.round(time.time() -  start, 2)))
        print ('scores\n{}'.format(get_img_scores(image_path)))
        image_process = detect_objects(image_np, sess, detection_graph)
        # [x['name'] for x in data['object']]
        plt.figure(figsize=FIGSIZE)
        plt.imshow(image_process)
        plt.title('Model', fontsize=16)
        #plt.imsave(image_process, 'worst_prediction labs.jpg')
        plt.figure(figsize=FIGSIZE)
        truth_img = show_groundtruth(image_path)
        plt.imshow(truth_img)
        plt.title('Human Labels', fontsize=16)
        plt.figure(figsize=FIGSIZE)
        plt.imshow(load_image_into_numpy_array(Image.open(image_path)))
        plt.title('Raw Image')
        #plt.savefig('worst_prediction labs.jpg')

inference time: 1.94 seconds


NameError: name 'img_scores' is not defined

In [None]:
img_scores['3189']

In [None]:
label_paths = glob.glob(os.path.join('kitti_data', '*','label_2', '*.txt'))

In [None]:

#label_paths

In [None]:
for path in label_paths:
    strip_leading_zeroes(path)

In [None]:
from_kitti = {'image': {'path': 'kitti_data/training/image_2/2456.jpg', 'height': 370, 'segmented_path': None, 'id': '2456', 'width': 1224}, 'detections': [{'top': 136.7, 'left': 275.0, 'right': 353.9, 'bottom': 299.54, 'label': 'person'}]}

In [None]:
from create_pascal_tf_record import kitti_dict_to_tf_example

In [None]:
kitti_dict_to_tf_example(from_kitti)

In [None]:
#create_records()

### Kitti Port

In [None]:
import glob
images = glob.glob('/Users/shleifer/kitti_trunc/training/image_2/*')


In [None]:
labels = glob.glob('/Users/shleifer/kitti_trunc/training/label_2/*')

In [None]:
dets = glob.glob('/Users/shleifer/kitti_trunc/training/det_2/*')

In [None]:
orig_image_paths = glob.glob('/Users/shleifer/kitti_data/training/image_2/*.png')

In [None]:
from create_dataset import convert_to_jpg_and_save
%load_ext autoreload
%autoreload 2

In [None]:
def convert_to_jpg_and_save(png_path):
    im = Image.open(png_path)
    rgb_im = im.convert('RGB')
    new_path = '{}.jpg'.format(png_path[:-4])
    rgb_im.save(new_path)
    return new_path
Image.open(convert_to_jpg_and_save(orig_image_paths[0]))

In [None]:
import shutil

In [None]:
import pandas as pd
pd.value_counts(list(map(lambda x: len(os.path.basename(x)),images)))

In [None]:
Image.open(images[0])

In [None]:
root_dir = '/Users/shleifer/voc_kitti/VOC2012/'
img_dir = os.path.join(root_dir, 'JPEGImages')
ann_dir = os.path.join(root_dir, 'Annotations')
set_dir = os.path.join(root_dir, 'ImageSets', 'Main')

In [None]:
labels = glob.glob('/Users/shleifer/kitti_trunc/training/label_2/*')

In [None]:
xml_p = glob.glob('/Users/shleifer/voc_kitti/VOC2012/Annotations/1023.xml')[0]

In [None]:
voc_annot = tf.gfile.GFile(xml_p, 'r').read()

In [None]:
%load_ext autoreload
%autoreload 2
from create_pascal_tf_record import dict_to_tf_example

In [None]:
import sys; sys.path.append('..')

In [None]:
from object_detection.create_pascal_tf_record import dict_to_tf_example

In [None]:
dataset_directory =  '/Users/shleifer/voc_kitti/'
image_subdirectory='JPEGImages'

In [None]:
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
import hashlib
import io
import logging
import os

from lxml import etree
import PIL.Image
import tensorflow as tf


writer = tf.python_io.TFRecordWriter('data/train.tfrecord')
label_map_dict = label_map_util.get_label_map_dict('data/my_map.pbtxt')
annotations_dir = '/Users/shleifer/voc_kitti/VOC2012/Annotations/'
examples_path = '/Users/shleifer/voc_kitti/VOC2012/ImageSets/Main/trainval.txt'


examples_path = '/Users/shleifer/voc_kitti/VOC2012/ImageSets/Main/trainval.txt'

#examples_list

def create_records(examples_path):
    examples_list = dataset_util.read_examples_list(examples_path)
    for example in examples_list[1:11]:
        path = os.path.join(annotations_dir, example + '.xml')
        #print(path)
        with tf.gfile.GFile(path, 'r') as fid:
            xml_str = fid.read()
        xml = etree.fromstring(xml_str)
        data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
        tf_example = dict_to_tf_example(data, 
                                        dataset_directory,
                                        label_map_dict,
                                        False)
        writer.write(tf_example.SerializeToString())
    writer.close()

In [None]:
create_records(examples_path)

In [None]:
data['folder']

In [None]:
os.path.join('/Users/shleifer/voc_kitti/VOC2012/JPEGImages/')

In [None]:
data['filename']

In [None]:
f = 'data/train.tfrecord'
for serialized_example in tf.python_i o.tf_record_iterator(f):
    example = tf.train.Example()
    example.ParseFromString(serialized_example)
    print(example)
    break

    # Read data in specified format
    label = example.features.feature["image/object/class/label"].float_list.value
    ids = example.features.feature["ids"].int64_list.value
    values = example.features.feature["values"].float_list.value
    print(label, id, values)

In [None]:
example.features.feature.keys()

In [None]:
Image.open('/Users/shleifer/voc_kitti/VOC2012/JPEGImages/5308.jpg')

In [None]:
ls data/train.tfrecord

In [None]:
#examples_list

In [None]:
label_map_dict

In [None]:
data['object']

In [None]:
#full_path

In [None]:
  with tf.gfile.GFile(full_path, 'rb') as fid:
    encoded_jpg = fid.read()

In [None]:
import io
encoded_jpg

In [None]:
from cv2 import imread

In [None]:
import io

In [None]:
img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
full_path = os.path.join(dataset_directory, img_path)

In [None]:
data['folder']

In [None]:
/Users/shleifer/voc_kitti/VOC2012/JPEGImages/

In [None]:
data

In [None]:
dict_to_tf_example()

In [None]:
examples_path = '~/sh'

In [None]:
ls data

In [None]:
!cat {labels[0]}

In [None]:
# list image sets
all_files = !ls {set_dir}
image_sets = sorted(list(set([filename.replace('.txt', '').strip().split('_')[0] for filename in all_files])))
image_sets

In [None]:
ann_dir