In [None]:
!pip install natsort



In [5]:
import json
import numpy as np
import matplotlib.pyplot as plt
import os

from matplotlib.patches import Rectangle
from natsort import natsorted
from tqdm import tqdm

%matplotlib inline

import tensorflow as tf
from tensorflow.keras.preprocessing.image import *

In [6]:
os.chdir('/content/drive/MyDrive/EE113DB')
print(os.getcwd())

dataDir = 'custom_datasets320'
dataType = 'val2017'

img_filenames = natsorted(os.listdir(os.path.join(dataDir, 'images', dataType)))
img_ids = [os.path.splitext(f)[0] for f in img_filenames] # get rid of '.jpg' extension

with open(os.path.join(dataDir, 'annotations', '{}bboxes.json'.format(dataType) ), 'r') as json_file:
  img_bboxes_dict = json.load(json_file)
  json_file.close()

'''
Custom JSON hierarchy:
img_id
    cat
        bbox1
        bbox2
        bbox3
        ...
    cat 
        bbox1
        bbox2
        bbox3
        ...
'''

/content/drive/MyDrive/EE113DB


'\nCustom JSON hierarchy:\nimg_id\n    cat\n        bbox1\n        bbox2\n        bbox3\n        ...\n    cat \n        bbox1\n        bbox2\n        bbox3\n        ...\n'

In [7]:
# based on https://github.com/experiencor/keras-yolo3
import numpy as np
from numpy import expand_dims
from keras.models import load_model
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
from matplotlib import pyplot
from matplotlib.patches import Rectangle
 
class BoundBox:
	def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
		self.xmin = xmin
		self.ymin = ymin
		self.xmax = xmax
		self.ymax = ymax
		self.objness = objness
		self.classes = classes
		self.label = -1
		self.score = -1
 
	def get_label(self):
		if self.label == -1:
			self.label = np.argmax(self.classes)
 
		return self.label
 
	def get_score(self):
		if self.score == -1:
			self.score = self.classes[self.get_label()]
 
		return self.score
 
def _sigmoid(x):
	return 1. / (1. + np.exp(-x))
 
def decode_netout(netout, anchors, obj_thresh, net_h, net_w):
  grid_h, grid_w = netout.shape[:2]
  nb_box = 3
  netout = netout.reshape((grid_h, grid_w, nb_box, -1)) # Converts to 
  nb_class = netout.shape[-1] - 5
  boxes = []
  netout[..., :2]  = _sigmoid(netout[..., :2])
  netout[..., 4:]  = _sigmoid(netout[..., 4:])
  netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]
  netout[..., 5:] *= netout[..., 5:] > obj_thresh

  for i in range(grid_h*grid_w):
    row = i / grid_w
    col = i % grid_w
    for b in range(nb_box):
      # 4th element is objectness score
      objectness = netout[int(row)][int(col)][b][4]
      if(objectness.all() <= obj_thresh): continue
      # first 4 elements are x, y, w, and h
      x, y, w, h = netout[int(row)][int(col)][b][:4]
      x = (col + x) / grid_w # center position, unit: image width
      y = (row + y) / grid_h # center position, unit: image height
      w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
      h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height
      # last elements are class probabilities
      classes = netout[int(row)][col][b][5:]
      box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
      boxes.append(box)
  return boxes
 
def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
	new_w, new_h = net_w, net_h
	for i in range(len(boxes)):
		x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
		y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
		boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
		boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
		boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
		boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
 
def _interval_overlap(interval_a, interval_b):
	x1, x2 = interval_a
	x3, x4 = interval_b
	if x3 < x1:
		if x4 < x1:
			return 0
		else:
			return min(x2,x4) - x1
	else:
		if x2 < x3:
			 return 0
		else:
			return min(x2,x4) - x3
 
def bbox_iou(box1, box2):
	intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
	intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
	intersect = intersect_w * intersect_h
	w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
	w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
	union = w1*h1 + w2*h2 - intersect
	return float(intersect) / union
 
def do_nms(boxes, nms_thresh):
	if len(boxes) > 0:
		nb_class = len(boxes[0].classes)
	else:
		return
	for c in range(nb_class):
		sorted_indices = np.argsort([-box.classes[c] for box in boxes])
		for i in range(len(sorted_indices)):
			index_i = sorted_indices[i]
			if boxes[index_i].classes[c] == 0: continue
			for j in range(i+1, len(sorted_indices)):
				index_j = sorted_indices[j]
				if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
					boxes[index_j].classes[c] = 0

# get all of the results above a threshold
def get_boxes(boxes, labels, thresh):
	v_boxes, v_labels, v_scores = list(), list(), list()
	# enumerate all boxes
	for box in boxes:
		# enumerate all possible labels
		for i in range(len(labels)):
			# check if the threshold for this label is high enough
			if box.classes[i] > thresh:
				v_boxes.append(box)
				v_labels.append(labels[i])
				v_scores.append(box.classes[i]*100)
				# don't break, many labels may trigger for one box
	return v_boxes, v_labels, v_scores
 
# draw all results
def draw_boxes(filename, v_boxes, v_labels, v_scores):
	# load the image
	data = pyplot.imread(filename)
	# plot the image
	pyplot.imshow(data)
	# get the context for drawing boxes
	ax = pyplot.gca()
	# plot each box
	for i in range(len(v_boxes)):
		box = v_boxes[i]
		# get coordinates
		y1, x1, y2, x2 = box.ymin, box.xmin, box.ymax, box.xmax
		# calculate width and height of the box
		width, height = x2 - x1, y2 - y1
		# create the shape
		rect = Rectangle((x1, y1), width, height, fill=False, color='white')
		# draw the box
		ax.add_patch(rect)
		# draw text and score in top left corner
		label = "%s (%.3f)" % (v_labels[i], v_scores[i])
		pyplot.text(x1, y1, label, color='white')
	# show the plot
	pyplot.show()

In [8]:
def load_img_array(image_path, target_size=(320,320)):
  image = load_img(image_path, target_size=target_size)
  image = img_to_array(image)
  
  # scale pixel values to [0, 1]
  image = image.astype('float32')
  image /= 255.0

  return image

def compute_iou_xywh(box1, box2):
  x1, y1, w1, h1 = box1
  x2, y2, w2, h2 = box2

  bbox1 = BoundBox(x1, y1, x1+w1, y1+h1)
  bbox2 = BoundBox(x2, y2, x2+w2, y2+h2)

  return bbox_iou(bbox1, bbox2)

def get_max_iou(box, box_list):
  if box_list == []:
    return 0.0,[]
  iou_arr = []
  for p_box in box_list:
    iou_arr.append(compute_iou_xywh(box, p_box))
  iou_arr = np.asarray(iou_arr)
  idx_max = np.argmax(iou_arr)

  return iou_arr[idx_max], box_list[idx_max]


In [None]:
# Run inference on dataset -> create results JSON file (if you want to use the COCO evaluation API)
# Alternatively -> create custom evaluation code to focus only on the relevant classes

model = tf.keras.models.load_model('set_yolov3-tiny.h5')
anchors = [[115, 73, 119,199, 242,238], [12, 18,  37, 49,  52,132]]
class_threshold = 0.4 # may want to lower to 0.1
input_h, input_w = 320, 320
nms_thresh = 0.25

# TODO - rerun but with 'motorcycle' instead of 'motorbike' - TYPO in experiencor REPO
labels = ["person", "bicycle", "car", "motorcycle", "aeroplane", "bus", "train", "truck",
	"boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench",
	"bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe",
	"backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard",
	"sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard",
	"tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana",
	"apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake",
	"chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse",
	"remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator",
	"book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]


relevant_cats = ['car', 'bicycle', 'motorcycle', 'bus', 'truck', 
                 'traffic light', 'stop sign', 'parking meter', 'fire hydrant']

model.summary()

In [14]:
# https://pro.arcgis.com/en/pro-app/latest/tool-reference/image-analyst/how-compute-accuracy-for-object-detection-works.htm#:~:text=Precision%E2%80%94%20Precision%20is%20the%20ratio,the%20precision%20is%2090%20percent.

# For debug, just do 2-10 images
# img_filenames = img_filenames[0:2]
# img_ids = img_ids[0:2]

# TODO - for now just computing precision/recall in a multi-class T/F sense
# Should we be evaluating the scores of the model?

# TODO - how we compute network performance MUST be mentioned in the final report

tp_dict = {cat:0 for cat in relevant_cats}
fp_dict = {cat:0 for cat in relevant_cats}
fn_dict = {cat:0 for cat in relevant_cats}
IOU_THRESH = 0.75 # Typical value used, try 0.25 (better) and 0.75 (worse)
# is meant to reward networks with better localization

for i in tqdm(range(len(img_filenames))):
  img_path = os.path.join(dataDir, 'images', dataType,  img_filenames[i])
  img = np.expand_dims(load_img_array(img_path), axis=0)
  img_id = img_ids[i]
  
  yhat = model.predict(img)
  boxes = []
  for i in range(len(yhat)):
    boxes += decode_netout(yhat[i][0], anchors[i], class_threshold, input_h, input_w)

  # correct the sizes of the bounding boxes for the shape of the image
  correct_yolo_boxes(boxes, 320, 320, 320, 320)

  # Suppress non-maximal (confidences) boxes with high overlap (IoU)
  do_nms(boxes, nms_thresh)

  # Get bounding boxes above score theshold
  v_boxes, v_labels, v_scores = get_boxes(boxes, labels, class_threshold)
  
  # TODO - only for demo & debug purposes
  # draw_boxes(img_path, v_boxes, v_labels, v_scores) 

  # Populate gt_dict and pred_dict 
  gt_dict = img_bboxes_dict[img_id]
  pred_dict = {cat:[] for cat in gt_dict.keys()}

  for i, v_box in enumerate(v_boxes):
    x = v_box.xmin
    y = v_box.ymin
    w = v_box.xmax - v_box.xmin
    h = v_box.ymax - v_box.ymin

    cat = v_labels[i]
    score = v_scores[i]
    if cat in relevant_cats:
      pred_dict[cat].append([x,y,w,h]) # .append(([x,y,w,h], score))

  # Compute tp and fn
  for cat in gt_dict.keys():
    for bbox in gt_dict[cat]:
      max_iou, p_box = get_max_iou(bbox, pred_dict[cat])
      if max_iou >= IOU_THRESH:
        tp_dict[cat] += 1
      else:
        fn_dict[cat] += 1

  # Compute fp
  for cat in relevant_cats:
    if pred_dict[cat] == []:
      continue
    for bbox in pred_dict[cat]:
      max_iou, g_box = get_max_iou(bbox, gt_dict[cat])
      if max_iou <= IOU_THRESH:
        fp_dict[cat] += 1

print("\n")
print(tp_dict)
print(fp_dict)
print(fn_dict)

# '''
# gt_dict:
# cat
#   bbox1
#   bbox2
#   bbox3
#   ...
# cat 
#   bbox1
#   bbox2
#   bbox3
#   ...


# pred_dict: for now, not including scores
# cat
#   bbox1, score1
#   bbox2, score2
#   bbox3, score3
#   ...
# cat 
#   bbox1, score1
#   bbox2, score2
#   bbox3, score3
#   ...
# '''

# precision = tp/(tp + fp)
# recall = tp/(tp + fn)
# f1 = (Precision × Recall)/[(Precision + Recall)/2]

eps = 1e-6 # avoid div by zero errors -> motorbike name mismatch

precision_dict = {cat: tp_dict[cat]/(eps + tp_dict[cat] + fp_dict[cat]) for cat in relevant_cats}
recall_dict = {cat: tp_dict[cat]/(eps + tp_dict[cat] + fn_dict[cat]) for cat in relevant_cats}
f1_dict = {cat: 2*precision_dict[cat]*recall_dict[cat]/(eps + precision_dict[cat] + recall_dict[cat]) for cat in relevant_cats}

json_filenames_to_save = ['precision.json', 'recall.json', 'f1.json', 'tp.json', 'fp.json', 'fn.json']
dicts = [precision_dict, recall_dict, f1_dict, tp_dict, fp_dict, fn_dict]

for i, filename in enumerate(json_filenames_to_save):
  with open(os.path.join('evaluation_results','IOU75', filename), 'w') as fp:
    json.dump(dicts[i], fp, sort_keys=True, indent=4)
    fp.close()



100%|██████████| 769/769 [06:54<00:00,  1.85it/s]



{'car': 14, 'bicycle': 3, 'motorcycle': 9, 'bus': 14, 'truck': 4, 'traffic light': 2, 'stop sign': 10, 'parking meter': 3, 'fire hydrant': 6}
{'car': 362, 'bicycle': 39, 'motorcycle': 63, 'bus': 87, 'truck': 50, 'traffic light': 41, 'stop sign': 27, 'parking meter': 9, 'fire hydrant': 33}
{'car': 476, 'bicycle': 150, 'motorcycle': 254, 'bus': 197, 'truck': 234, 'traffic light': 56, 'stop sign': 30, 'parking meter': 35, 'fire hydrant': 61}





In [None]:
# precision = tp/(tp + fp)
# recall = tp/(tp + fn)
# f1 = (Precision × Recall)/[(Precision + Recall)/2]

eps = 1e-6 # avoid div by zero errors -> motorbike name mismatch

precision_dict = {cat: tp_dict[cat]/(eps + tp_dict[cat] + fp_dict[cat]) for cat in relevant_cats}
recall_dict = {cat: tp_dict[cat]/(eps + tp_dict[cat] + fn_dict[cat]) for cat in relevant_cats}
f1_dict = {cat: 2*precision_dict[cat]*recall_dict[cat]/(eps + precision_dict[cat] + recall_dict[cat]) for cat in relevant_cats}

json_filenames_to_save = ['precision.json', 'recall.json', 'f1.json', 'tp.json', 'fp.json', 'fn.json']
dicts = [precision_dict, recall_dict, f1_dict, tp_dict, fp_dict, fn_dict]

for i, filename in enumerate(json_filenames_to_save):
  with open(os.path.join('evaluation_results','IOU75', filename), 'w') as fp:
    json.dump(dicts[i], fp, sort_keys=True, indent=4)
    fp.close()
