In [1]:
import numpy as np
import cv2
import os
import re
import openpyxl

In [2]:
#global variables

#init SSD model
net = cv2.dnn.readNetFromCaffe("MobileNetSSD_deploy.prototxt", "MobileNetSSD_deploy.caffemodel")
#Set table for displaying SSD classes in human format
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
           "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
           "dog", "horse", "motorbikes", "person", "pottedplant", "sheep",
           "sofa", "train", "tvmonitor"]

#set SSD precision
conf_threshold = 0.1
#set imtersection over union precision
treshold = 0.5

#set paths to essential data
path_to_ground_truth = "./TestGround/"
#path_to_images = "./TestImages/"
path_to_images = "./TestImagesSmall/"
path_to_SSD_GT_results = "./SSD_GT_Outputs/"

#Dictionaries to store object classes and their coordinates.
ssd_results_dictionary = {}
ground_truth_dictionary = {}

The function compares two rectangles by calculating the ratio of their intersection and the total area.

In [3]:
def intersection_over_union(boxA, boxB):
    # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])

    # compute the area of intersection rectangle
    interArea = abs(max((xB - xA, 0)) * max((yB - yA), 0))
    if interArea == 0:
        return 0
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = abs((boxA[2] - boxA[0]) * (boxA[3] - boxA[1]))
    boxBArea = abs((boxB[2] - boxB[0]) * (boxB[3] - boxB[1]))

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)

    # return the intersection over union value
    return iou

The next function step by step compares the coincidence of the coordinates obtained from the Ground Truth and the coordinates received from the Single Shot Detector. Comparison is made using the "intersection_over_union" function. The comparison results are stored in a special dictionary, which is used to make the final verdict.

In [4]:
def confusion_matrix(GTdictionary, SSDictionary, category):
    TP = 0
    FP = 0
    FN = 0
    iouresults = {}
    max_check = treshold
    
    #if SSD not get right category at all
    if not category in SSDictionary:
        FN = int(len(GTdictionary[category])/5)
        TP = 0
        FP = 0
        return TP,FP,FN
    
    #for each object in ground truth looking for most suitable SSD object.
    for inc in range(0, len(GTdictionary[category]), 5):
        for jnc in range(0, len(SSDictionary[category]), 5):
            #gain intersection over union results to determine
            iou = intersection_over_union(GTdictionary[category][inc:inc+4],SSDictionary[category][jnc:jnc+4])
            #for current GT object finding the most suitable SSD object
            #and write it number to result dictionary where key is the number of the GT object
            #if nothing matches the GT object with the given precision, then nothing will be written to the result dictionary
            if max_check < iou:
                max_check = iou
                iouresults[inc/5+1] = jnc/5+1
        #print("The iou check dict",iouresults)
        max_check = treshold
    
    #Check if one object from SSD matches more than one object from GT
    SSD_frame_overmatch = len(iouresults)-len(dict.fromkeys(iouresults.values()))
    #Only correct matches is written to the results dictionary, so it length is the number on correctly determined objects
    TP = len(iouresults)-SSD_frame_overmatch
    #remaining objects from GT dictionary are undefined by SSD
    FN = int(len(GTdictionary[category])/5)-TP
    #objects from SSD dictionary that not muth the GT objects are determined as false detection
    FP = int(len(SSDictionary[category])/5)-len(dict.fromkeys(iouresults.values()))
    iouresults.clear()
    return TP,FP,FN

The following function is designed to control the operation of the main function "confusion_matrix". If something does not work as it should, this function will signal that the output does not match the predicted one.

In [5]:
def test_confusion_matrix():
    #GIVEN
    GroundTruthDictionary = {"person":[225, 46, 273, 192,-1,1,12,100,150,-1]}
    SingleShotDetectorDictionary = {"person":[235, 56, 273, 181,0.5,281, 42, 328, 195, 0.5]}
    #WHEN
    TruePositive,FalsePositive,FalseNegative = confusion_matrix(GroundTruthDictionary, SingleShotDetectorDictionary, "person")
    
    #THEN
    assert TruePositive == 1, "TruePositive must be 1"
    assert FalsePositive == 1, "TruePositive must be 1"
    assert FalseNegative == 1, "TruePositive must be 1"
    
test_confusion_matrix()

next function use SSD model to recognise objects on the image and save class of the object, its coordinates and confidence in the dictionary.

In [6]:
#analyse image with SSD model and save results to dictionary
def process_image(image,dictionary,image_size,threshold):
    image_in_work = cv2.imread(path_to_images+image)
    #get the image sizes
    (h, w) = image_in_work.shape[:2]
    #generate blob to work with SSD
    blob = cv2.dnn.blobFromImage(cv2.resize(image_in_work, (image_size[0], image_size[1])), 0.007843, (image_size[0], image_size[1]), 127.5)
    #use blob for some way
    net.setInput(blob)
    detections = net.forward()
    for i in np.arange(0, detections.shape[2]):
        #get the confidence
        confidence = detections[0, 0, i, 2]
        
        if confidence > threshold:
            #get object class
            idx = int(detections[0, 0, i, 1])
            #get coordinates
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")
            
            #add coordinates and confidence to dictionary
            if CLASSES[idx] in dictionary:
                dictionary[CLASSES[idx]] += [startX,startY,endX,endY,confidence]
            else:
                dictionary[CLASSES[idx]] = [startX,startY,endX,endY,confidence]    
    #cv2.imwrite(path_to_SSD_GT_results+image_file.split(".")[0]+"_SSD."+image_file.split(".")[1], image)


The function opens a file of a certain format with Ground Truth information, reads the coordinates of the objects and writes to the dictionary.

In [7]:
def get_coordinates_from_ground_truth(file, dictionary):
    groundFile = open(file, "r") #open file
    for line in groundFile:
        if line.find("Original label for object") == 0:
            separated = line.split()
            category_type = re.sub('"','', separated[-1]) #get type of object from txt file
        if line.find("Bounding box for object") == 0:
            separated = line.split()
            #get Top Left, Right Bottom coordinates from file
            #as fifth element adding degenerate confidence to have a similar format with coordinates from the SSD
            coordinates = [int(re.sub('[^0-9]','', separated[12])),int(re.sub('[^0-9]','', separated[13])),int(re.sub('[^0-9]','',separated[15])),int(re.sub('[^0-9]','', separated[16])),-1]
            #save coordinates in the right dictionary element
            if category_type in dictionary:
                dictionary[category_type] += coordinates
            else:
                dictionary[category_type] = coordinates
    groundFile.close()

In [8]:
def write_frame_to_picture(image,source_path,save_path, dictionary,name_modificator):
    color = (0, 0, 0)
    image_processed = cv2.imread(source_path+image)
    for x_key in dictionary.keys():
        for inc in range(0, len(dictionary[x_key]), 5):
            startX, startY, endX, endY, confidence = dictionary[x_key][inc:inc+5]
            if confidence > -1:
                label = "{}: {:.2f}%".format(x_key, confidence * 100)
            else:
                label = "{}".format(x_key)
            cv2.rectangle(image_processed, (startX, startY), (endX, endY), color, 2)
            y = startY - 15 if startY - 15 > 15 else startY + 15
            cv2.putText(image_processed, label, (startX, y),
            cv2.FONT_HERSHEY_SIMPLEX, 0.7, color, 2)
    #print(image.split(".")[0]+name_modificator+"."+image.split(".")[1])
    cv2.imwrite(save_path+image.split(".")[0]+name_modificator+"."+image.split(".")[1], image_processed)

In [9]:
#initialize the entry in xlsx file
xlsx_row = 1
xlsx_col = 1
#create xlsx file to store results
CM_results_workbook = openpyxl.Workbook()
worksheet = CM_results_workbook.active
worksheet.title = "Default_SSD_Evaluation"
#prepare column names
worksheet.cell(xlsx_row, xlsx_col, "File name")
worksheet.cell(xlsx_row, xlsx_col+1, "Category")
worksheet.cell(xlsx_row, xlsx_col+2, "TP")
worksheet.cell(xlsx_row, xlsx_col+3, "FP")
worksheet.cell(xlsx_row, xlsx_col+4, "FN")

xlsx_row += 1

In [10]:
# create folder to store images with detector frames.
if not os.path.exists(path_to_SSD_GT_results):
    os.makedirs(path_to_SSD_GT_results)
            
#get list of images to process
image_list = os.listdir(path_to_images)
image_list.sort() 

#iterate over images                 
for image_file in image_list:
    #write image name to xlxs file
    worksheet.cell(xlsx_row, xlsx_col, image_file)
                
    process_image(image_file,ssd_results_dictionary,[300,300],conf_threshold)

    #from image name generating txt filename with ground truth information.
    image_ground_truth = image_file.split(".")[0]+".txt"
    get_coordinates_from_ground_truth(path_to_ground_truth+image_ground_truth,ground_truth_dictionary)

    #saving picture with GT frames to the new image 
    #write_frame_to_picture(image_file,path_to_images,path_to_SSD_GT_results, ground_truth_dictionary,"_GT")
    #saving picture with SSD frames to the new image 
    write_frame_to_picture(image_file,path_to_images,path_to_SSD_GT_results, ssd_results_dictionary,"_SSD")
   
    #write confusion matrix results to the xlsx file
    for x_cat in ground_truth_dictionary.keys():
        TruePositive,FalsePositive,FalseNegative = confusion_matrix(ground_truth_dictionary,ssd_results_dictionary,x_cat)
        #writing results to xlsx file
        worksheet.cell(xlsx_row, xlsx_col+1, x_cat)
        worksheet.cell(xlsx_row, xlsx_col+2, TruePositive)
        worksheet.cell(xlsx_row, xlsx_col+3, FalsePositive)
        worksheet.cell(xlsx_row, xlsx_col+4, FalseNegative)
        xlsx_row += 1
        
    #cleaning dictionaries in order to avoid previous image interference
    ground_truth_dictionary.clear()
    ssd_results_dictionary.clear()

    
#saving xlsx file
CM_results_workbook.save('New_table.xlsx')
print("finished!")    

finished!
