In [14]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops

if tf.__version__ < '1.4.0':
    raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')


In [15]:
%matplotlib inline

In [16]:
# from utils import label_map_util

# from utils import visualization_utils as vis_util

In [17]:
PATH_TO_CKPT = 'output/frozen_inference_graph.pb'

In [18]:
PATH_TO_LABELS = 'data/object-detection.pbtxt'

In [19]:
NUM_CLASSES = 1

In [20]:
# load graph
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

In [21]:
# load labels
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

In [22]:
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
        (im_height, im_width, 3)).astype(np.uint8)

In [78]:
PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = []
for f in os.listdir(PATH_TO_TEST_IMAGES_DIR):
    TEST_IMAGE_PATHS.append(os.path.join(PATH_TO_TEST_IMAGES_DIR, f))
#TEST_IMAGE_PATHS = ["test_images/frame779.jpg"]
print(TEST_IMAGE_PATHS)

# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

['test_images/frame779.jpg', 'test_images/frame1249.jpg', 'test_images/frame1496.jpg', 'test_images/frame1251.jpg', 'test_images/frame1038.jpg', 'test_images/frame1485.jpg', 'test_images/frame757.jpg', 'test_images/frame1483.jpg']


In [47]:
def run_inference_for_single_image(image, graph):
    with graph.as_default():
        with tf.Session() as sess:
            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {output.name for op in ops for output in op.outputs}
            tensor_dict = {}
            for key in [
                'num_detections', 'detection_boxes', 'detection_scores',
                'detection_classes', 'detection_masks'
            ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name)
            if 'detection_masks' in tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
                detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, image.shape[0], image.shape[1])
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)
            image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

            # Run inference
            output_dict = sess.run(tensor_dict,
                                   feed_dict={image_tensor: np.expand_dims(image, 0)})

            # all outputs are float32 numpy arrays, so convert types as appropriate
            output_dict['num_detections'] = int(output_dict['num_detections'][0])
            output_dict['detection_classes'] = output_dict[
                'detection_classes'][0].astype(np.uint8)
            output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
            output_dict['detection_scores'] = output_dict['detection_scores'][0]
            if 'detection_masks' in output_dict:
                output_dict['detection_masks'] = output_dict['detection_masks'][0]
    return output_dict

In [91]:
def cut_head(image_path):
    image = Image.open(image_path)
    # the array based representation of the image will be used later in order to prepare the
    # result image with boxes and labels on it.
    image_np = load_image_into_numpy_array(image)
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    # Actual detection.
    output_dict = run_inference_for_single_image(image_np, detection_graph)
    im_width, im_height = image.size
    detection_boxes = output_dict['detection_boxes']
    detection_classes = output_dict['detection_classes']
    detection_scores = output_dict['detection_scores']
    score = detection_scores[0]
    print("highest score = {}".format(detection_scores[0]))
    box = detection_boxes[0]
    ymin, xmin, ymax, xmax = box
    (left, right, top, bottom) = (int(xmin * im_width), int(xmax * im_width),
                                  int(ymin * im_height), int(ymax * im_height))
    
    area = (left, top, right, bottom)
    print("area = {}".format(area))
    # Visualization of the results of a detection.
#     print(output_dict['detection_classes'])
#     print(output_dict['detection_boxes'])
#     print('s============s')
#     print('s============s')
#     print('s============s')
#     print('s============s')
#     print(output_dict['detection_scores'])
#     draw = ImageDraw.Draw(image)
#     draw.line([(left, top), (left, bottom), (right, bottom),
#              (right, top), (left, top)], width=4, fill='red')
#     print(type(image))
    cropped = image.crop(area)
    n = np.array(cropped)
#     print(n)
    return (n, score)
#     w1, h1 = image.size
#     w2, h2 = cropped.size
#     print(type(cropped))
#     print(image.size)
#     print(cropped.size)
#     n = np.asarray(cropped)
#     cropped.show()
#     print(n)
#     print(cropped)
#     cropped.save('test1.jpg')
#     from IPython.display import display
    
#     plt.figure(figsize=IMAGE_SIZE)
#     plt.imshow(cropped)
#     vis_util.visualize_boxes_and_labels_on_image_array(
#         image_np,
#         output_dict['detection_boxes'],
#         output_dict['detection_classes'],
#         output_dict['detection_scores'],
#         category_index,
#         instance_masks=output_dict.get('detection_masks'),
#         use_normalized_coordinates=True,
#         line_thickness=8)
#     plt.imshow(image)

In [93]:
for image_path in TEST_IMAGE_PATHS:
    score, img_n_arr = cut_head(image_path)

highest score = 0.20356473326683044
area = (336, 183, 506, 357)
highest score = 0.27769407629966736
area = (327, 196, 392, 262)
highest score = 0.29602181911468506
area = (410, 197, 482, 268)
highest score = 0.3388868272304535
area = (327, 198, 388, 260)
highest score = 0.8475614190101624
area = (489, 204, 554, 264)
highest score = 0.7066797614097595
area = (484, 195, 552, 268)
highest score = 0.3371533751487732
area = (154, 205, 219, 262)
highest score = 0.8011513352394104
area = (495, 194, 563, 268)
