# Setup and Imports

In [1]:
# ignore gpu
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

Import all necessary packages and modules.

In [2]:
import sys
sys.path.insert(0,'../..')  # add project root to PATH

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
# supress tensorflow depreciation warnings
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [5]:
import cv2
import time
import numpy as np

In [6]:
# imports
from src.annotated_image import AnnotatedImage, Image
from src.model import TinyYoloV3, train_test_split
from src.model_utils import yolo_eval

Using TensorFlow backend.


In [7]:
model_path =  '../../models/custom_trained_weights/model_1308_custom_jittering.h5'
image_dir = "../../data/"
# set these two variables for selecting the inference mode
using_nano_camera = True
detect_on_space_keystroke = True

In [8]:
from src.preprocessing import JSONUtil
from pathlib import Path 

# get test images
data_path = '../../data/'
annotation_data = JSONUtil.read(Path(data_path, "Master.json"))

# Train-Test split
test_split = 0.1
split_seed = 2345
train_val_data, test_data = train_test_split(annotation_data, test_split, split_seed)

# Object Detection

Instantiate the object detector and perform some detections for demonstration.

In [9]:
model = TinyYoloV3(path=model_path)

In [10]:
def load_and_detect(model, image_path, rescale=False):
    image = Image(path=image_path)
    if rescale:
        image.resize(*model.config.input_size)
    img_box = model.detect(image, show=False)
    
    return img_box

In [11]:
def find_best_match(true_box, predicted_boxes, confidence_threshold):
    best_iou = 0
    best_match = None
    for i, box in enumerate(predicted_boxes):
        if box.score < confidence_threshold:
            continue
        iou = true_box.iou(box)
        if iou > best_iou and true_box.label == box.label:
            best_iou = iou
            best_match = i
    return best_iou, best_match
            

def match_boxes(prediction, ground_truth, confidence_threshold):
    # find best match for each ground truth box
    matches = [find_best_match(true_box, prediction, confidence_threshold)
               for true_box in ground_truth]
    
    # make sure no predicted box is assigned to multiple ground truth boxes
    predicted_to_true_assignment = {
        predicted_id: [(iou, true_id) for true_id, (iou, i) in enumerate(matches) 
                       if i == predicted_id]
        for predicted_id in range(len(prediction))
    }
    # remove duplicate assignments
    for predicted_id, assigned_boxes in predicted_to_true_assignment.items():
        if len(assigned_boxes) > 1:
            max_iou = max(assigned_boxes)[0]
            for iou, true_id in assigned_boxes:
                if iou != max_iou:
                    matches[true_id] = 0, None
    
    return matches

In [12]:
def evaluate_image(prediction, ground_truth, confidence_threshold, iou_threshold=0.5):
    total_predicted_boxes = sum(1 for box in prediction if box.score > confidence_threshold)
    total_ground_truth_boxes = len(ground_truth)
    
    matched_boxes = match_boxes(prediction, ground_truth, confidence_threshold)
    correctly_predicted_boxes = sum(1 for iou, _ in matched_boxes if iou > iou_threshold)
    
    return correctly_predicted_boxes, total_predicted_boxes, total_ground_truth_boxes

In [13]:
def precision(correctly_predicted_boxes, total_predicted_boxes):
    if total_predicted_boxes == 0:
        return 0.0
    return correctly_predicted_boxes / total_predicted_boxes

def recall(correctly_predicted_boxes, total_ground_truth_boxes):
    if total_ground_truth_boxes == 0:
        return 0.0
    return correctly_predicted_boxes / total_ground_truth_boxes

In [14]:
def overall_evaluation(predictions, ground_truths, confidence_threshold, class_name=None, 
                       iou_threshold=0.5):
    global_evaluation = np.array([0, 0, 0])
    for key in predictions:
        prediction = [box for box in predictions[key].annotations 
                      if box.label == class_name or class_name is None]
        ground_truth = [box for box in ground_truths[key].annotations
                      if box.label == class_name or class_name is None]

        evaluation = evaluate_image(prediction, ground_truth, confidence_threshold, iou_threshold)
        global_evaluation += np.asarray(evaluation)

    correctly_predicted_boxes, total_predicted_boxes, total_ground_truth_boxes = global_evaluation

    p = precision(correctly_predicted_boxes, total_predicted_boxes)
    r = recall(correctly_predicted_boxes, total_ground_truth_boxes)
    
    print(f"TP        = {correctly_predicted_boxes}")
    print(f"TP + FP   = {total_predicted_boxes}")
    print(f"TP + FN   = {total_ground_truth_boxes}")
    print(f"Precision = {p}")
    print(f"Recall    = {r}")
    
    return p

In [15]:
model.config.score = 0
predictions = {key: load_and_detect(model, image_dir + key) for key in test_data}
ground_truths = {key: AnnotatedImage(image_path=image_dir + key, annotation_dict=test_data) 
                for key in test_data}

In [16]:
precisions_paprika = []
precisions_kiwi = []
confidences = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]

for confidence_threshold in confidences:
    print(f"confidence_threshold: {confidence_threshold}")
    print("Paprika:")
    p_paprika = overall_evaluation(predictions, ground_truths, confidence_threshold, "Paprika", 0.5)
    print("Kiwi:")
    p_kiwi = overall_evaluation(predictions, ground_truths, confidence_threshold, "Kiwi", 0.5)
    precisions_paprika.append(p_paprika)
    precisions_kiwi.append(p_kiwi)

confidence_threshold: 0.0
Paprika:
TP        = 140
TP + FP   = 2580
TP + FN   = 148
Precision = 0.05426356589147287
Recall    = 0.9459459459459459
Kiwi:
TP        = 103
TP + FP   = 2580
TP + FN   = 108
Precision = 0.03992248062015504
Recall    = 0.9537037037037037
confidence_threshold: 0.1
Paprika:
TP        = 134
TP + FP   = 199
TP + FN   = 148
Precision = 0.6733668341708543
Recall    = 0.9054054054054054
Kiwi:
TP        = 90
TP + FP   = 151
TP + FN   = 108
Precision = 0.5960264900662252
Recall    = 0.8333333333333334
confidence_threshold: 0.2
Paprika:
TP        = 126
TP + FP   = 161
TP + FN   = 148
Precision = 0.782608695652174
Recall    = 0.8513513513513513
Kiwi:
TP        = 80
TP + FP   = 121
TP + FN   = 108
Precision = 0.6611570247933884
Recall    = 0.7407407407407407
confidence_threshold: 0.3
Paprika:
TP        = 113
TP + FP   = 139
TP + FN   = 148
Precision = 0.8129496402877698
Recall    = 0.7635135135135135
Kiwi:
TP        = 73
TP + FP   = 100
TP + FN   = 108
Precision = 0.73
R

In [17]:
for confidence_threshold in confidences:
    print(f"confidence_threshold: {confidence_threshold}")
    overall_evaluation(predictions, ground_truths, confidence_threshold, None, 0.5)

confidence_threshold: 0.0
TP        = 243
TP + FP   = 5160
TP + FN   = 256
Precision = 0.04709302325581395
Recall    = 0.94921875
confidence_threshold: 0.1
TP        = 224
TP + FP   = 350
TP + FN   = 256
Precision = 0.64
Recall    = 0.875
confidence_threshold: 0.2
TP        = 206
TP + FP   = 282
TP + FN   = 256
Precision = 0.7304964539007093
Recall    = 0.8046875
confidence_threshold: 0.3
TP        = 186
TP + FP   = 239
TP + FN   = 256
Precision = 0.7782426778242678
Recall    = 0.7265625
confidence_threshold: 0.4
TP        = 164
TP + FP   = 203
TP + FN   = 256
Precision = 0.8078817733990148
Recall    = 0.640625
confidence_threshold: 0.5
TP        = 147
TP + FP   = 179
TP + FN   = 256
Precision = 0.8212290502793296
Recall    = 0.57421875
confidence_threshold: 0.6
TP        = 126
TP + FP   = 153
TP + FN   = 256
Precision = 0.8235294117647058
Recall    = 0.4921875
confidence_threshold: 0.7
TP        = 106
TP + FP   = 124
TP + FN   = 256
Precision = 0.8548387096774194
Recall    = 0.4140625

In [17]:
AP_paprika = np.mean(precisions_paprika)
AP_kiwi = np.mean(precisions_kiwi)
print("Average Precision:")
print(f"- Paprika: {AP_paprika}")
print(f"- Kiwi:    {AP_kiwi}")

Average Precision:
- Paprika: 0.6793552380589076
- Kiwi:    0.6364245076563104


In [18]:
mAP = np.mean([AP_paprika, AP_kiwi])
print(f"mAP: {mAP}")

mAP: 0.657889872857609
