# Object Detection Demo
Welcome to the object detection inference walkthrough!  This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md) before you start.

# Imports

In [1]:
import numpy as np
import pandas as pd
import os
import cv2
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile

from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops

if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
  raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')


## Env setup

In [2]:
# This is needed to display the images.
%matplotlib inline

## Object detection imports
Here are the imports from the object detection module.

In [3]:
from utils import label_map_util
from utils import visualization_utils as vis_util

# Model preparation 

## Variables

Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_FROZEN_GRAPH` to point to a new .pb file.  

By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.

In [4]:
# What model to download.
#MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
#MODEL_NAME = 'faster_rcnn_resnet101_kitti_2018_01_28'
#MODEL_NAME = 'mask_rcnn_inception_v2_coco_2018_01_28'
MODEL_NAME = 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')

## Download Model

In [5]:
#opener = urllib.request.URLopener()
#opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
  file_name = os.path.basename(file.name)
  if 'frozen_inference_graph.pb' in file_name:
    tar_file.extract(file, os.getcwd())

## Load a (frozen) Tensorflow model into memory.

In [6]:
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [7]:
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

## Helper code

In [8]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

# Detection

In [9]:
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 4) ]

# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

In [10]:
def get_fixation_frames(TRIAL):
    fixations_path = os.path.join(str(TRIAL), 'fixations.csv')
    df = pd.read_csv(fixations_path)
    num_rows = df.shape[0]
    
    frame_list = []
    for i in range(num_rows):
        fixation_start = df['start_frame_index'][i]
        fixation_end = df['end_frame_index'][i]
        frame_list.append((fixation_start + fixation_end) // 2)
    
    return frame_list

In [11]:
bumper_fixations, windshield_fixations, display_fixations = 0, 0, 0
car_fixations = 0
fixation_df_indices = {'car': [], 'display': [], 'windshield': [], 'bumper': []}
output_data = []

def init_fixation_counts():
    global bumper_fixations, windshield_fixations, display_fixations, car_fixations, output_data
    
    bumper_fixations, windshield_fixations, display_fixations = 0, 0, 0
    car_fixations = 0
    fixation_df_indices = {'car': [], 'display': [], 'windshield': [], 'bumper': []}
    output_data = []

In [12]:
def run_inference_for_single_image(image, graph, x_fix, y_fix, fix_index, display_type, df):
  global bumper_fixations, windshield_fixations, display_fixations, car_fixations, fixation_df_indices, output_data
  with graph.as_default():
    with tf.Session() as sess:
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      im_height, im_width, im_depth = image.shape
      
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
      ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
              tensor_name)
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

      # Run inference
      output_dict = sess.run(tensor_dict,
                             feed_dict={image_tensor: np.expand_dims(image, 0)})

      # all outputs are float32 numpy arrays, so convert types as appropriate
      output_dict['num_detections'] = int(output_dict['num_detections'][0])
      output_dict['detection_classes'] = output_dict[
          'detection_classes'][0].astype(np.uint8)
      output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
      output_dict['detection_scores'] = output_dict['detection_scores'][0]
      if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
        
        #  if (category_index.get(i)['id'] in [1, 3])
      
      car_indices = [i for i in range(output_dict['num_detections']) if (output_dict['detection_classes'][i] == 3 and output_dict['detection_scores'][i] > 0.65)]
      people_indices = [i for i in range(output_dict['num_detections']) if (output_dict['detection_classes'][i] == 1 and output_dict['detection_scores'][i] > 0.7)]
      #print([(category_index.get(i)['id'], category_index.get(i)['name']) for i in output_dict['detection_classes']])
      
      print('car bounding boxes:')
      for i in car_indices:
        bb = output_dict['detection_boxes'][i]
        ymin, xmin, ymax, xmax = bb[0], bb[1], bb[2], bb[3]
        (left, right, top, bottom) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height)
        print("Bounding boxes coords:")
        print(left, right, top, bottom)
        print("Scores: ")
        print(output_dict['detection_scores'][i])
        #print(output_dict['detection_classes'])
        print()
        
        image_area = im_width * im_height * 1.0
        fixation = Point(x_fix, y_fix)
        car_bb = Polygon([(left, top - (0.15 * (bottom - top))), 
                          (right, top - (0.15 * (bottom - top))), 
                          (right, bottom), (left, bottom)])
        
        car_area = car_bb.area
        closeby = False
        
        if float(car_area) / float(image_area) >= 0.25: # we care about slant of different fixation regions. car is close.
            closeby = True
            
        coord_1 = (left, top - (0.15 * (bottom - top)))
        coord_3 = (left, top + (0.1 * (bottom - top)))
        coord_5 = (left, top + (0.25 * (bottom - top)))
        coord_7 = (left, bottom)
        
        if not closeby:
            coord_1 = (left, top - (0.35 * (bottom - top)))
            coord_2 = (right, top - (0.35 * (bottom - top)))
            coord_4 = (right, top + (0.1 * (bottom - top)))
            coord_6 = (right, top + (0.25 * (bottom - top)))
            coord_8 = (right, bottom)
            
        else: # shift the right coords up more
            coord_2 = (right * 0.75, top - (0.175 * (bottom - top)))
            coord_4 = (right * 0.825, top + (0.05 * (bottom - top)))
            coord_6 = (right, top + (0.2 * (bottom - top)))
            coord_8 = (right, bottom)
            
        display_bb = Polygon([coord_1, coord_2, coord_4, coord_3])
        if 'manual' in str(display_type):
            display_bb = Polygon([(-1,-1), (-1,-1), (-1,-1), (-1,-1)])
        
        windshield_bb = Polygon([coord_3, coord_4, coord_6, coord_5])
        bumper_bb = Polygon([coord_5, coord_6, coord_8, coord_7])
        
        # updated car polygon after analyzing ratio & placement on screen
        car_bb = Polygon([coord_1, coord_2, coord_4, coord_6, 
                          coord_8, coord_7, coord_5, coord_3])
        
        print('Account for slant: ' + str(closeby))
        print('Fixation in car? ' + str(car_bb.contains(fixation)))
        print('Display: ' + str(display_bb.contains(fixation)))
        print('Windshield: ' + str(windshield_bb.contains(fixation)))
        print('Bumper: ' + str(bumper_bb.contains(fixation)))
        
        output_row = df.iloc[[fix_index]].values.tolist()[0]
        
        if car_bb.contains(fixation):
            car_fixations += 1
            fixation_df_indices['car'].append(fix_index)
            
        if display_bb.contains(fixation):
            display_fixations += 1
            fixation_df_indices['display'].append(fix_index)
            output_row.append('interface')
            
        elif windshield_bb.contains(fixation):
            windshield_fixations += 1
            fixation_df_indices['windshield'].append(fix_index)
            output_row.append('windshield')
            
        elif bumper_bb.contains(fixation):
            bumper_fixations += 1
            fixation_df_indices['bumper'].append(fix_index)
            output_row.append('bumper')
            
        else:
            output_row.append('other fixation')
            
        output_data.append(output_row)
      
      print()
      print('people bounding boxes:')
      for i in people_indices:
        bb = output_dict['detection_boxes'][i]
        ymin, xmin, ymax, xmax = bb[0], bb[1], bb[2], bb[3]
        (left, right, top, bottom) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height)
        print("Bounding boxes coords:")
        print(left, right, top, bottom)
        print("Scores: ")
        print(output_dict['detection_scores'][i])
        #print(output_dict['detection_classes'])
        print()
  print('-----------')
        
  return output_dict

In [13]:
'''
date, trial_num, display_type, intersection = TRIAL.split('_')
output_data = []

if ANALYZE_VIDEO: # we modify TEST_IMAGE_PATHS to be a set of video frames
    TEST_IMAGE_PATHS = []
    print('analyzing video...')
    vidObj = cv2.VideoCapture(os.path.join(str(TRIAL), VIDEO_PATH))
    count = 1
    success = 1
    fixations = get_fixation_frames(TRIAL) 
    
    while success:
        success, image = vidObj.read() 
  
        # Saves the frames with frame-count 
        if count in fixations:
            cv2.imwrite(os.path.join(str(TRIAL), 'frame{}.jpg'.format(count)), image)
            TEST_IMAGE_PATHS.append(os.path.join(str(TRIAL), 'frame{}.jpg'.format(count)))
  
        count += 1
print("Number of frames for this video: {}".format(len(TEST_IMAGE_PATHS)))
'''

'\ndate, trial_num, display_type, intersection = TRIAL.split(\'_\')\noutput_data = []\n\nif ANALYZE_VIDEO: # we modify TEST_IMAGE_PATHS to be a set of video frames\n    TEST_IMAGE_PATHS = []\n    print(\'analyzing video...\')\n    vidObj = cv2.VideoCapture(os.path.join(str(TRIAL), VIDEO_PATH))\n    count = 1\n    success = 1\n    fixations = get_fixation_frames(TRIAL) \n    \n    while success:\n        success, image = vidObj.read() \n  \n        # Saves the frames with frame-count \n        if count in fixations:\n            cv2.imwrite(os.path.join(str(TRIAL), \'frame{}.jpg\'.format(count)), image)\n            TEST_IMAGE_PATHS.append(os.path.join(str(TRIAL), \'frame{}.jpg\'.format(count)))\n  \n        count += 1\nprint("Number of frames for this video: {}".format(len(TEST_IMAGE_PATHS)))\n'

In [14]:
def run_all(TRIAL, f1, f2):
    VIDEO_PATH = 'worldwithoutgaze.mp4'
    ANALYZE_VIDEO = True
    date, trial_num, display_type, intersection = TRIAL.split('_')
    global output_data
    output_data = []

    if ANALYZE_VIDEO: # we modify TEST_IMAGE_PATHS to be a set of video frames
        TEST_IMAGE_PATHS = []
        print('analyzing video...')
        
        if os.path.isfile(os.path.join(str(TRIAL), VIDEO_PATH)):
            vidObj = cv2.VideoCapture(os.path.join(str(TRIAL), VIDEO_PATH))
            
        else:
            vidObj = cv2.VideoCapture(os.path.join(str(TRIAL), 'world.mp4'))
            
        count = 1
        success = 1
        fixations = get_fixation_frames(TRIAL) 
    
        while success:
            success, image = vidObj.read() 
  
            # Saves the frames with frame-count 
            if count in fixations:
                #print(count)
                cv2.imwrite(os.path.join(str(TRIAL), 'frame{}.jpg'.format(count)), image)
                TEST_IMAGE_PATHS.append(os.path.join(str(TRIAL), 'frame{}.jpg'.format(count)))
  
            count += 1
    print('Count: ' + str(count))
    print("Number of frames for this video: {}".format(len(TEST_IMAGE_PATHS)))
    
    init_fixation_counts()
    fixations_path = os.path.join(str(TRIAL), 'fixations.csv')
    df = pd.read_csv(fixations_path)
    num_rows = df.shape[0]
    print(num_rows)
    print(fixations[f1:f2])

    for i in fixations[f1:f2]:
      for j in range(num_rows):
        fixation_start = df['start_frame_index'][j]
        fixation_end = df['end_frame_index'][j]

        if i in range(fixation_start, fixation_end + 1):
          break

      image_path = os.path.join(str(TRIAL), 'frame{}.jpg'.format(i))
      image = Image.open(image_path)
      # the array based representation of the image will be used later in order to prepare the
      # result image with boxes and labels on it.
      image_np = load_image_into_numpy_array(image)
      # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
      image_np_expanded = np.expand_dims(image_np, axis=0)

      im_height, im_width, im_depth = image_np.shape

      x_fixation = df['norm_pos_x'][j] * im_width
      y_fixation = im_height * (1.0 - df['norm_pos_y'][j])
      print(x_fixation)
      print(y_fixation)
      print()

      # Actual detection.
      output_dict = run_inference_for_single_image(image_np, detection_graph, x_fixation, y_fixation, j, display_type, df)
      # Visualization of the results of a detection.
      vis_util.visualize_boxes_and_labels_on_image_array(
          image_np,
          output_dict['detection_boxes'],
          output_dict['detection_classes'],
          output_dict['detection_scores'],
          category_index,
          instance_masks=output_dict.get('detection_masks'),
          use_normalized_coordinates=True,
          line_thickness=8)
      plt.figure(figsize=IMAGE_SIZE)
      #plt.imshow(image_np)

      plt.imsave(os.path.join(str(TRIAL), 'classified{}.jpg'.format(i)), image_np)
        
    # Takes in an array of DF indices. 
    # Returns the average length - in seconds - of fixation durations at those indices.

    def find_average_fixation(arr):
        total = 0

        for i in arr:
            length = df['end_frame_index'][i] - df['start_frame_index'][i] + 1
            total += length

        total_seconds = float(total) / 28.49

        if len(arr) > 0:
            return total_seconds / len(arr)

        return 'None'
    
    # generate histogram of fixations on different parts of the car

    names = ['Total Car Fixations', 'AV Interface Fixations', 'Windshield fixations', 'Bumper fixations']
    fixation_counts = np.array([[display_fixations + windshield_fixations + bumper_fixations, display_fixations, windshield_fixations, bumper_fixations], 
                       [display_fixations, display_fixations, 0, 0], 
                       [windshield_fixations, 0, windshield_fixations, 0], 
                       [bumper_fixations, 0, 0, bumper_fixations]])

    x_pos = [i for i, _ in enumerate(names)]

    averages = []
    #print(fixation_df_indices)
    for key in fixation_df_indices.keys():
        averages.append(find_average_fixation(fixation_df_indices[key]))

    plt.clf()
    fig0 = plt.bar(names, fixation_counts[0], color=['orange'])

    # Clear fig0. we only needed it for the heights of the 3 fixation types.
    plt.clf()

    for i, rect in enumerate(fig0):
        avg = averages[i]
        if avg == 'None':
            plt.text(rect.get_x() + rect.get_width()/2.0, rect.get_height(), 'Average fixation \nduration: \n\nN / A\n', ha='center', va='bottom', fontsize=15)

        else:
            plt.text(rect.get_x() + rect.get_width()/2.0, rect.get_height(), 'Average fixation \nduration: \n\n%.3f sec\n' % avg, ha='center', va='bottom', fontsize=15)

    fig1 = plt.bar(names, fixation_counts[1], bottom=[0,0,0,0], color=['red'])
    fig2 = plt.bar(names, fixation_counts[2], bottom=fixation_counts[1], color=['blue'])
    fig3 = plt.bar(names, fixation_counts[3], bottom=fixation_counts[2] + fixation_counts[1], color=['green'])

    plot_title = date[:2] + '/' + date[2:] + '\nAV Interface Type: ' + display_type.title() + '\nIntersection: ' + str(int(intersection))
    #plot_title = 'Fixation Frequencies\ngh\ngfh'
    plt.title(plot_title, fontsize=24)
    plt.xlabel("\nCategory of Fixation on Car", fontsize=15)
    plt.ylabel("Frequency\n", fontsize=15)
    plt.xticks(x_pos, names, fontsize=12)
    plt.yticks(fontsize=12)
    plt.gca().set_ylim(0, display_fixations + windshield_fixations + bumper_fixations + 2)
    plt.subplots_adjust(top=0.7)

    plt.savefig(os.path.join(str(TRIAL), 'fixations.png'))
    
    cols = ['id',
     'start_timestamp',
     'duration',
     'start_frame_index',
     'end_frame_index',
     'norm_pos_x',
     'norm_pos_y',
     'dispersion',
     'confidence',
     'method',
     'gaze_point_3d_x',
     'gaze_point_3d_y',
     'gaze_point_3d_z',
     'base_data', 'fixation_type']

    output_df = pd.DataFrame(output_data, columns=cols)
    output_df.to_csv(os.path.join(str(TRIAL), 'plot_data.csv'), sep='\t')

In [15]:
trials = ['1108_1446_eyes_02', '1108_1457_symbols_03', '1108_1504_eyes_02',
    '1108_1510_manual_02', '1108_1512_symbols_02', '1108_1519_eyes_03', '1108_1534_symbols_01', 
          '1108_1540_text_03', '1108_1548_text_01', '1108_1552_eyes_01', '1108_1557_text_02']

f1_s = [150, 150, 120,
    -37, -7, -37, -23,
    -18, -8, -5, -20]

f2_s = [160, 183, 142,
    -28, -1, -30, -15,
    -13, -4, None, -12]

for i in range(-11, -10):
    TRIAL = trials[i]
    f1 = f1_s[i]
    f2 = f2_s[i]
    run_all(TRIAL, f1, f2)

analyzing video...
Count: 5859
Number of frames for this video: 227
227
[3801, 3814, 3840, 3865, 3896, 3920, 3958, 3975, 4002, 4068]
483.32146721381326
271.7983266382454

car bounding boxes:

people bounding boxes:
-----------
20353.54389529453
11957.819993500572

car bounding boxes:

people bounding boxes:
-----------
-50978.425940349436
-28827.04053581023

car bounding boxes:

people bounding boxes:
-----------
-31566.49726556576
-22515.986705305204

car bounding boxes:
Bounding boxes coords:
527.8768157958984 581.8902587890625 355.57783126831055 391.45368576049805
Scores: 
0.7017226

Account for slant: False
Fixation in car? False
Display: False
Windshield: False
Bumper: False

people bounding boxes:
-----------
1074.489102383593
338.31437975524784

car bounding boxes:

people bounding boxes:
-----------
445.1639612028507
287.29090667832884

car bounding boxes:

people bounding boxes:
-----------
502.6516791321221
284.9101577564136

car bounding boxes:
Bounding boxes coords:
438.108

In [16]:
'''init_fixation_counts()
fixations_path = os.path.join(str(TRIAL), 'fixations.csv')
df = pd.read_csv(fixations_path)
num_rows = df.shape[0]

for i in fixations[-7:None]:
  for j in range(num_rows):
    fixation_start = df['start_frame_index'][j]
    fixation_end = df['end_frame_index'][j]
    
    if i in range(fixation_start, fixation_end):
      break
    
  image_path = os.path.join(str(TRIAL), 'frame{}.jpg'.format(i))
  image = Image.open(image_path)
  # the array based representation of the image will be used later in order to prepare the
  # result image with boxes and labels on it.
  image_np = load_image_into_numpy_array(image)
  # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
  image_np_expanded = np.expand_dims(image_np, axis=0)
    
  im_height, im_width, im_depth = image_np.shape

  x_fixation = df['norm_pos_x'][j] * im_width
  y_fixation = im_height * (1.0 - df['norm_pos_y'][j])
  print(x_fixation)
  print(y_fixation)
  print()
  
  # Actual detection.
  output_dict = run_inference_for_single_image(image_np, detection_graph, x_fixation, y_fixation, j, display_type)
  # Visualization of the results of a detection.
  vis_util.visualize_boxes_and_labels_on_image_array(
      image_np,
      output_dict['detection_boxes'],
      output_dict['detection_classes'],
      output_dict['detection_scores'],
      category_index,
      instance_masks=output_dict.get('detection_masks'),
      use_normalized_coordinates=True,
      line_thickness=8)
  plt.figure(figsize=IMAGE_SIZE)
  #plt.imshow(image_np)
  
  plt.imsave(os.path.join(str(TRIAL), 'classified{}.jpg'.format(i)), image_np)'''

"init_fixation_counts()\nfixations_path = os.path.join(str(TRIAL), 'fixations.csv')\ndf = pd.read_csv(fixations_path)\nnum_rows = df.shape[0]\n\nfor i in fixations[-7:None]:\n  for j in range(num_rows):\n    fixation_start = df['start_frame_index'][j]\n    fixation_end = df['end_frame_index'][j]\n    \n    if i in range(fixation_start, fixation_end):\n      break\n    \n  image_path = os.path.join(str(TRIAL), 'frame{}.jpg'.format(i))\n  image = Image.open(image_path)\n  # the array based representation of the image will be used later in order to prepare the\n  # result image with boxes and labels on it.\n  image_np = load_image_into_numpy_array(image)\n  # Expand dimensions since the model expects images to have shape: [1, None, None, 3]\n  image_np_expanded = np.expand_dims(image_np, axis=0)\n    \n  im_height, im_width, im_depth = image_np.shape\n\n  x_fixation = df['norm_pos_x'][j] * im_width\n  y_fixation = im_height * (1.0 - df['norm_pos_y'][j])\n  print(x_fixation)\n  print(y_fix

In [17]:
'''# Takes in an array of DF indices. 
# Returns the average length - in seconds - of fixation durations at those indices.

def find_average_fixation(arr):
    total = 0
    
    for i in arr:
        length = df['end_frame_index'][i] - df['start_frame_index'][i] + 1
        total += length
        
    total_seconds = float(total) / 30.0
    
    if len(arr) > 0:
        return total_seconds / len(arr)
    
    return 'None'
    '''

"# Takes in an array of DF indices. \n# Returns the average length - in seconds - of fixation durations at those indices.\n\ndef find_average_fixation(arr):\n    total = 0\n    \n    for i in arr:\n        length = df['end_frame_index'][i] - df['start_frame_index'][i] + 1\n        total += length\n        \n    total_seconds = float(total) / 30.0\n    \n    if len(arr) > 0:\n        return total_seconds / len(arr)\n    \n    return 'None'\n    "

In [18]:
'''
# generate histogram of fixations on different parts of the car

names = ['Total Car Fixations', 'AV Interface Fixations', 'Windshield fixations', 'Bumper fixations']
fixation_counts = np.array([[display_fixations + windshield_fixations + bumper_fixations, display_fixations, windshield_fixations, bumper_fixations], 
                   [display_fixations, display_fixations, 0, 0], 
                   [windshield_fixations, 0, windshield_fixations, 0], 
                   [bumper_fixations, 0, 0, bumper_fixations]])

x_pos = [i for i, _ in enumerate(names)]

averages = []
for key in fixation_df_indices.keys():
    averages.append(find_average_fixation(fixation_df_indices[key]))

plt.clf()
fig0 = plt.bar(names, fixation_counts[0], color=['orange'])

# Clear fig0. we only needed it for the heights of the 3 fixation types.
plt.clf()

for i, rect in enumerate(fig0):
    avg = averages[i]
    if avg == 'None':
        plt.text(rect.get_x() + rect.get_width()/2.0, rect.get_height(), 'Average fixation \nduration: \n\nN / A\n', ha='center', va='bottom', fontsize=15)
        
    else:
        plt.text(rect.get_x() + rect.get_width()/2.0, rect.get_height(), 'Average fixation \nduration: \n\n%.3f sec\n' % avg, ha='center', va='bottom', fontsize=15)

fig1 = plt.bar(names, fixation_counts[1], bottom=[0,0,0,0], color=['red'])
fig2 = plt.bar(names, fixation_counts[2], bottom=fixation_counts[1], color=['blue'])
fig3 = plt.bar(names, fixation_counts[3], bottom=fixation_counts[2] + fixation_counts[1], color=['green'])

plot_title = date[:2] + '/' + date[2:] + '\nAV Interface Type: ' + display_type.title() + '\nIntersection: ' + str(int(intersection))
#plot_title = 'Fixation Frequencies\ngh\ngfh'
plt.title(plot_title, fontsize=24)
plt.xlabel("\nCategory of Fixation on Car", fontsize=15)
plt.ylabel("Frequency\n", fontsize=15)
plt.xticks(x_pos, names, fontsize=12)
plt.yticks(fontsize=12)
plt.gca().set_ylim(0, display_fixations + windshield_fixations + bumper_fixations + 2)
plt.subplots_adjust(top=0.7)

plt.savefig(os.path.join(str(TRIAL), 'fixations.png'))
'''

'\n# generate histogram of fixations on different parts of the car\n\nnames = [\'Total Car Fixations\', \'AV Interface Fixations\', \'Windshield fixations\', \'Bumper fixations\']\nfixation_counts = np.array([[display_fixations + windshield_fixations + bumper_fixations, display_fixations, windshield_fixations, bumper_fixations], \n                   [display_fixations, display_fixations, 0, 0], \n                   [windshield_fixations, 0, windshield_fixations, 0], \n                   [bumper_fixations, 0, 0, bumper_fixations]])\n\nx_pos = [i for i, _ in enumerate(names)]\n\naverages = []\nfor key in fixation_df_indices.keys():\n    averages.append(find_average_fixation(fixation_df_indices[key]))\n\nplt.clf()\nfig0 = plt.bar(names, fixation_counts[0], color=[\'orange\'])\n\n# Clear fig0. we only needed it for the heights of the 3 fixation types.\nplt.clf()\n\nfor i, rect in enumerate(fig0):\n    avg = averages[i]\n    if avg == \'None\':\n        plt.text(rect.get_x() + rect.get_widt

In [19]:
'''cols = ['id',
 'start_timestamp',
 'duration',
 'start_frame_index',
 'end_frame_index',
 'norm_pos_x',
 'norm_pos_y',
 'dispersion',
 'confidence',
 'method',
 'gaze_point_3d_x',
 'gaze_point_3d_y',
 'gaze_point_3d_z',
 'base_data', 'fixation_type']

output_df = pd.DataFrame(output_data, columns=cols)
output_df.to_csv(os.path.join(str(TRIAL), 'plot_data.csv'), sep='\t')
'''

"cols = ['id',\n 'start_timestamp',\n 'duration',\n 'start_frame_index',\n 'end_frame_index',\n 'norm_pos_x',\n 'norm_pos_y',\n 'dispersion',\n 'confidence',\n 'method',\n 'gaze_point_3d_x',\n 'gaze_point_3d_y',\n 'gaze_point_3d_z',\n 'base_data', 'fixation_type']\n\noutput_df = pd.DataFrame(output_data, columns=cols)\noutput_df.to_csv(os.path.join(str(TRIAL), 'plot_data.csv'), sep='\t')\n"

In [20]:
def fixations_by_interface(fix_type):   
    interfaces = {}
    bumper_fixations = 0
    windshield_fixations = 0
    display_fixations = 0
    
    fixation_lengths = [0,0,0,0]
    num_fixations = [0,0,0,0]
    
    subs = [x[0] for x in os.walk(os.getcwd()) if 
            ('1108' in os.path.basename(x[0]) or '1108' in os.path.basename(x[0])) and
               (fix_type in os.path.basename(x[0]))]
    
    for subdir in subs:
        # track # of times each interface was tested
        print(subdir)
        
        date, trial_num, display_type, intersection = os.path.basename(subdir).split('_')
        if display_type in interfaces.keys():
            interfaces[display_type] += 1
        else:
            interfaces[display_type] = 0
        
        fixations_count = 0
        f = os.path.join(subdir, 'plot_data.csv')
        if os.path.exists(f):
            plot_data = pd.read_csv(f, sep='\t')
            for j in range(plot_data.shape[0]):
                fixation = plot_data['fixation_type'][j]
                length = plot_data['end_frame_index'][j] - plot_data['start_frame_index'][j] + 1
                total = length
                total_seconds = float(total) / 28.49
            
                if fixation == 'interface':
                    num_fixations[1] += 1
                    fixation_lengths[1] += total_seconds
                    
                elif fixation == 'windshield':
                    num_fixations[2] += 1
                    fixation_lengths[2] += total_seconds
                    
                elif fixation == 'bumper':
                    num_fixations[3] += 1
                    fixation_lengths[3] += total_seconds
                    
    fixation_lengths[0] = sum(fixation_lengths)
    num_fixations[0] = sum(num_fixations)
                    
    names = ['Total Car Fixations', 'AV Interface Fixations', 'Windshield fixations', 'Bumper fixations']
    fixation_counts = np.array([[num_fixations[0], num_fixations[1], num_fixations[2], num_fixations[3]], 
                       [num_fixations[1], num_fixations[1], 0, 0], 
                       [num_fixations[2], 0, num_fixations[2], 0], 
                       [num_fixations[3], 0, 0, num_fixations[3]]]) / float(len(subs))

    x_pos = [i for i, _ in enumerate(names)]

    plt.clf()
    fig0 = plt.bar(names, fixation_counts[0], color=['orange'])
    plt.figure(figsize=(15,10))

    # Clear fig0. we only needed it for the heights of the 3 fixation types.
    plt.clf()

    for i, rect in enumerate(fig0):
        if num_fixations[i] == 0:
            plt.text(rect.get_x() + rect.get_width()/2.0, rect.get_height(), 'Average fixation \nduration across \nall subjects, all trials: \n\nN / A\n', ha='center', va='bottom', fontsize=15)

        else:
            plt.text(rect.get_x() + rect.get_width()/2.0, rect.get_height(), 'Average fixation \nduration across \nall subjects, all trials: \n\n%.3f sec\n' % (float(fixation_lengths[i]) / num_fixations[i]), ha='center', va='bottom', fontsize=15)

    fig1 = plt.bar(names, fixation_counts[1], bottom=[0,0,0,0], color=['red'])
    fig2 = plt.bar(names, fixation_counts[2], bottom=fixation_counts[1], color=['blue'])
    fig3 = plt.bar(names, fixation_counts[3], bottom=fixation_counts[2] + fixation_counts[1], color=['green'])

    plot_title = 'Interface Type: ' + fix_type.title() + '\n'
    if fix_type == 'nodisplay':
        plot_title = 'Interface Type: ' + 'No Display ' + '\n'
    
    plt.title(plot_title, fontsize=24)
    plt.xlabel("\nCategory of Fixation on Car", fontsize=15)
    plt.ylabel("Frequency\n", fontsize=15)
    plt.xticks(x_pos, names, fontsize=12)
    plt.yticks(fontsize=12)
    plt.gca().set_ylim(0, 7)
    plt.subplots_adjust(top=0.7)

    plt.savefig(fix_type + 'fixations.png')

In [21]:
#types = ['text', 'symbol', 'eyes', 'nodisplay', 'manual']
types = ['text', 'symbol', 'eyes', 'manual']
for t in types:
    fixations_by_interface(t)

/Users/shaantam/Documents/BDD/tensorflow/models/research/object_detection/1108_1540_text_03
/Users/shaantam/Documents/BDD/tensorflow/models/research/object_detection/1108_1548_text_01
/Users/shaantam/Documents/BDD/tensorflow/models/research/object_detection/1108_1557_text_02
/Users/shaantam/Documents/BDD/tensorflow/models/research/object_detection/1108_1457_symbols_03
/Users/shaantam/Documents/BDD/tensorflow/models/research/object_detection/1108_1534_symbols_01
/Users/shaantam/Documents/BDD/tensorflow/models/research/object_detection/1108_1512_symbols_02
/Users/shaantam/Documents/BDD/tensorflow/models/research/object_detection/1108_1504_eyes_02
/Users/shaantam/Documents/BDD/tensorflow/models/research/object_detection/1108_1446_eyes_02
/Users/shaantam/Documents/BDD/tensorflow/models/research/object_detection/1108_1552_eyes_01
/Users/shaantam/Documents/BDD/tensorflow/models/research/object_detection/1108_1519_eyes_03
/Users/shaantam/Documents/BDD/tensorflow/models/research/object_detecti