In [1]:
import numpy as np
import cv2
import time

In [33]:
PROTOTXT = "MobileNetSSD_deploy.prototxt"
MODEL = "MobileNetSSD_deploy.caffemodel"
INP_VIDEO_PATH = 'dog.jpg'
OUT_VIDEO_PATH = 'dog_detection.jpg'
GPU_SUPPORT = 0
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus",  "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))

In [34]:
net = cv2.dnn.readNetFromCaffe(PROTOTXT, MODEL)
if GPU_SUPPORT:
    net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
    net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA)

In [35]:
#     ret, frame = ccaap.read()
#     if not ret:
#        break
def SSD(img_path):
    frame = cv2.imread(img_path)
    h, w = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 0.007843, (300, 300), 127.5)
    net.setInput(blob)
    detections = net.forward()
    for i in np.arange(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > 0.5:
            idx = int(detections[0, 0, i, 1])
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])

            (startX, startY, endX, endY) = box.astype("int")
            label = "{}: {:.2f}%".format(CLASSES[idx],confidence*100)
            cv2.rectangle(frame, (startX, startY), (endX, endY),    COLORS[idx], 2)
            y = startY - 15 if startY - 15 > 15 else startY + 15
            cv2.putText(frame, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, COLORS[idx], 2)
            cv2.imwrite(img_path + '2' +'.jpg', frame)
        

In [36]:
st = time.time()
SSD('rupesj.jpeg')
et = time.time()
elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

Execution time: 0.23862528800964355 seconds


In [37]:


def get_output_layers(net):
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
    return output_layers

    
    

In [38]:

def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
    global t
    label = str(classes[class_id]) + ' ' + str(format(confidence, '.2f'))
    if t == 0:
        color = [255, 0, 0]
    elif t == 1:
        color = [0, 255, 0]
    elif t == 2:
        color = [0, 0, 255]
    else:
        color = [0, 255, 0]
    # print(color)
    cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)
    cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
    t += 1



In [39]:

def detect_objects(image, image_name):
    global classes
    objects, positions, coords = [], [], []
    image = cv2.resize(image, (400, 300))
    # image = cv2.resize(cv2.imread(r'dog.jpg'), (400, 300))

    Width = image.shape[1]
    Height = image.shape[0]
    scale = 0.00392

    with open('./yolov3.txt', 'r') as f:
        classes = [line.strip() for line in f.readlines()]

    COLORS = np.random.uniform(0, 255, size=(len(classes), 3))
    # print(COLORS)
    net = cv2.dnn.readNet('./yolov3.cfg', './yolov3.weights')
    blob = cv2.dnn.blobFromImage(image, scale, (416, 416), (0, 0, 0), True, crop=False)

    start = time.time()

    net.setInput(blob)
    outs = net.forward(get_output_layers(net))
    # print(outs)
    # print(outs[2].shape, len(outs))

    class_ids = []
    confidences = []
    boxes = []
    conf_threshold = 0.5
    nms_threshold = 0.4

    loop = 0
    for out in outs:
        loop += 1
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                # print('\n\n\n', loop, detection)
                center_x = int(detection[0] * Width)
                center_y = int(detection[1] * Height)
                w = int(detection[2] * Width)
                h = int(detection[3] * Height)
                x = center_x - w / 2
                y = center_y - h / 2
                class_ids.append(class_id)
                confidences.append(float(confidence))
                boxes.append([x, y, w, h])


    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
    total_time = time.time() - start
    print(total_time)

    for i in indices:
        i = i[0]
        box = boxes[i]
        x = box[0]
        y = box[1]
        w = box[2]
        h = box[3]
        draw_prediction(image, class_ids[i], confidences[i], round(x), round(y), round(x+w), round(y+h))
        print(classes[class_ids[i]] + ' detected with an accuracy of ' + str(confidences[i] * 100) + ' %')
        objects.append(classes[class_ids[i]])

        center_x = x + (w / 2)
        # center_y = (y + h) / 2
        # print("co ", x, y, w, h, image.shape)
        coords.append([x, y, w, h])
        if center_x < 133:
            print("left")
            print(center_x)
            positions.append('left')
        elif center_x > 266:

            positions.append('right')
            print(center_x)
            print("right")
        else:
            print("front")
            positions.append('front')

    cv2.imwrite(image_name + '3'+".jpg", image)
    # cv2.imshow('a', image)
    # cv2.waitKey(0)


    # cv2.destroyAllWindows()
    return objects, coords, positions


In [40]:
t = 0
classes = None
objects, positions, coords = [], [], []



In [41]:
image_name = 'rupesj.jpeg'
img = cv2.imread(image_name)

In [42]:

st = time.time()
detect_objects(img, image_name)
et = time.time()

elapsed_time = et - st
print('Execution time:', elapsed_time, 'seconds')

0.6729645729064941
chair detected with an accuracy of 99.27465319633484 %
311.0
right
person detected with an accuracy of 97.5334882736206 %
left
85.0
chair detected with an accuracy of 81.11701011657715 %
front
person detected with an accuracy of 61.13331913948059 %
290.0
right
chair detected with an accuracy of 59.875690937042236 %
left
53.0
book detected with an accuracy of 56.91693425178528 %
front
book detected with an accuracy of 52.10632681846619 %
front
book detected with an accuracy of 51.14923715591431 %
front
Execution time: 1.0090017318725586 seconds
