In [17]:
import pandas as pd
import xml.etree.ElementTree as ET
import cv2
import os
import cv2
import numpy as np


In [68]:
def getPoints(path):
    tree = ET.parse(path)
    root = tree.getroot()

    points = []
    for element in root.findall("./object/bndbox"):

        xmin = int(element.find('xmin').text)
        ymin = int(element.find('ymin').text)

        xmax = int(element.find('xmax').text)
        ymax = int(element.find('ymax').text)

        points.append([xmin,ymin,xmax,ymax])
    
    return points # [x1,y1,x2,y2]

def drawRect(image,points):
    for p in points:
        image = cv2.rectangle(image,(p[0],p[1]),(p[2],p[3]),(0, 0, 255),2)
    return image

In [69]:
path = "dogs/labelimg/"
dir_list = os.listdir(path)
 
for file in dir_list:
    
    if file.endswith('.xml'):
        points = getPoints(path+file)
        
        path_jpg = path + file.split(".xml")[0] + ".jpg"
        image = cv2.imread(path_jpg)
        image  = drawRect(image, points)
        
        cv2.imshow("",image)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        
 


In [70]:
# caminhios para os ficheiros de configuracao
MODEL_FILE = "config/frozen_inference_graph.pb"
CONFIG_FILE = "config/ssd_mobilenet_v2_coco_2018_03_29.pbtxt.txt"
CLASS_FILE = "config/object_detection_classes_coco.txt"

# valor de limiar miniar para considerar que as predicoes sao de fato objetos
CONFIDENCE_THRESHOLD = 0.5

# ler os nomes das classes
with open(CLASS_FILE, 'r') as f:
    class_names = f.read().split('\n') 
    
# gerar cores aleatoriamente para cada uma das classes
COLORS = np.random.uniform(0, 255, size=(len(class_names), 3))

# carregar o modelo (neste caso o SSD)
SSDmodel = cv2.dnn.readNet(model=MODEL_FILE, config=CONFIG_FILE, framework="TensorFlow")

In [74]:
path_images = "dogs/images/"
dir_list_images = os.listdir(path_images)

tp = 0
fp = 0 
    
for img_name in dir_list_images:
    count = 0
    img = cv2.imread(path_images+img_name)
    img_height, img_width, img_channels = img.shape
    
    # normalizar com blobFromImage - 300x300 serao as dimensoes das imagens enviadas 'a rede
    blob = cv2.dnn.blobFromImage(image=img, size=(300, 300), swapRB=True)
    SSDmodel.setInput(blob)
    output = SSDmodel.forward()
    
    for detection in output[0, 0, :, :]:

        # oter o indice de confianca na detecao
        confidence = detection[2]
        
        if confidence > CONFIDENCE_THRESHOLD:

            # obter a classe
            class_id = detection[1]
            class_name = class_names[int(class_id) - 1]
            color = COLORS[int(class_id)]
            
            if class_name == 'dog':            
                            
                # obter as coordenadas e dimensoes das bounding boxes, normalizadas para coordenadas da imagem
                bbox_x = detection[3] * img_width
                bbox_y = detection[4] * img_height
                bbox_width = detection[5] * img_width
                bbox_height = detection[6] * img_height
               
                # [x1,y1,x2,y2]
                box_SSDmodel = [bbox_x , bbox_y , bbox_width , bbox_height]
                
                path_xml = path + img_name.split(".jpg")[0] + ".xml"
                
                
                temp = getPoints(path_xml)
                print(temp)
                if len(temp) > 1:
                    box_labelImg =  temp[count]
                    count += 1
                else:
                     box_labelImg =  temp[0]
                
                
                iou = get_iou(box_labelImg, box_SSDmodel)
                print(iou)
                if iou < 0.5:
                    fp += 1

                # colocar retangulos e texto a marcar os objetos identificados
                cv2.rectangle(img, (int(bbox_x), int(bbox_y)), (int(bbox_width), int(bbox_height)), color, thickness=2)
                cv2.putText(img, class_name, (int(bbox_x), int(bbox_y - 5)), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)

    cv2.imshow('output', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()
print(fp)

[[50, 7, 277, 440]]
0.906594389886459
[[18, 25, 482, 500]]
0.953031547883198
[[40, 16, 372, 495]]
0.8571736248133314
[[42, 8, 333, 483]]
0.8642287484669343
[[32, 66, 484, 323]]
0.9415995429891003
[[67, 104, 128, 168]]
0.29761989415938206
[[95, 19, 207, 160]]
0.7496994336035187
[[269, 303, 730, 598]]
0.9434787556376647
[[123, 45, 210, 183], [42, 45, 125, 185]]
0.07813865536584842
[[123, 45, 210, 183], [42, 45, 125, 185]]
0.047544316499723294
[[23, 57, 129, 193], [131, 9, 234, 198]]
0.9062651876042656
[[23, 57, 129, 193], [131, 9, 234, 198]]
0.8092336397798787
3


In [23]:
total_dogs = 13
fp = 0
precision = tp/(tp+fp)
print("Precision:",precision)

Precision: 1.0


In [None]:
recall = tp / (tp+fn)

In [22]:
(tp+fp)

13

In [39]:
def calc_precision_recall(image_results):
    """Calculates precision and recall from the set of images
    Args:
        img_results (dict): dictionary formatted like:
            {
                'img_id1': {'true_pos': int, 'false_pos': int, 'false_neg': int},
                'img_id2': ...
                ...
            }
    Returns:
        tuple: of floats of (precision, recall)
    """
    true_positive=0
    false_positive=0
    false_negative=0
    for img_id, res in image_results.items():
        true_positive +=res['true_positive']
        false_positive += res['false_positive']
        false_negative += res['false_negative']
        try:
            precision = true_positive/(true_positive+ false_positive)
        except ZeroDivisionError:
            precision=0.0
        try:
            recall = true_positive/(true_positive + false_negative)
        except ZeroDivisionError:
            recall=0.0
    return (precision, recall)

In [51]:
def get_iou(a, b, epsilon=1e-5):
    """ Given two boxes `a` and `b` defined as a list of four numbers:
            [x1,y1,x2,y2]
        where:
            x1,y1 represent the upper left corner
            x2,y2 represent the lower right corner
        It returns the Intersect of Union score for these two boxes.

    Args:
        a:          (list of 4 numbers) [x1,y1,x2,y2]
        b:          (list of 4 numbers) [x1,y1,x2,y2]
        epsilon:    (float) Small value to prevent division by zero

    Returns:
        (float) The Intersect of Union score.
    """
    # COORDINATES OF THE INTERSECTION BOX
    x1 = max(a[0], b[0])
    y1 = max(a[1], b[1])
    x2 = min(a[2], b[2])
    y2 = min(a[3], b[3])
    # AREA OF OVERLAP - Area where the boxes intersect
    width = (x2 - x1)
    height = (y2 - y1)
    # handle case where there is NO overlap
    if (width<0) or (height <0):
        return 0.0
    area_overlap = width * height

    # COMBINED AREA
    area_a = (a[2] - a[0]) * (a[3] - a[1])
    area_b = (b[2] - b[0]) * (b[3] - b[1])
    area_combined = area_a + area_b - area_overlap
    
    # RATIO OF AREA OF OVERLAP OVER COMBINED AREA
    iou = area_overlap / (area_combined+epsilon)
    return iou