# Task1 Object Detection Using Yolov3 Algorithm

# Author : Vinay Sai

# GRIP MAY 21 TASK 2





In [36]:
#importing required libraries

import cv2

import numpy as np

# Load Yolo

net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")  

# dnn is deep neural network

classes = []   

# in classes you can specify objects that you want to detect 

# classes=['car','person','bicycle'] 

with open("coco.names", "r") as f:      

    classes = [line.strip() for line in f.readlines()]


In [37]:
# Loading classes from coco file

layer_names = net.getLayerNames()

output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# with output layer we get detection of objects.

colors = np.random.uniform(0, 255, size=(len(classes), 3))

In [38]:
# Loading image

img = cv2.imread("image.jpg")   

# Enter your image name here that you want to detect

img = cv2.resize(img, None, fx=0.2, fy=0.2) #OPTIONAL

# Resizing the image. None refers to no fixed size. fx and fy are width and height .

height, width, channels = img.shape


In [39]:
# Detecting objects

blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)  

# True refers to converting into rgb format since opencv uses bgr.

net.setInput(blob)  

# Passing blob image to yolo algo in network

outs = net.forward(output_layers)  


In [40]:
# Showing informations on the screen

class_ids = []

confidences = []

boxes = []

for out in outs:

    for detection in out:

        scores = detection[5:]

        class_id = np.argmax(scores)

        confidence = scores[class_id]

        if confidence > 0.5:

            # Object detected

            center_x = int(detection[0] * width)

            center_y = int(detection[1] * height)

            w = int(detection[2] * width)

            h = int(detection[3] * height)

            # Rectangle coordinates

            x = int(center_x - w / 2)

            y = int(center_y - h / 2)

            boxes.append([x, y, w, h])

            confidences.append(float(confidence))

            class_ids.append(class_id)

In [41]:
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) 

# NMS - non max supression

#print(indexes)

font = cv2.FONT_HERSHEY_PLAIN

for i in range(len(boxes)):

    if i in indexes:

        x, y, w, h = boxes[i]

        label = str(classes[class_ids[i]])

        color = colors[i]

        cv2.rectangle(img, (x, y), (x + w, y + h), color, 2) 

        # Draw rectangle around boxes. '2' is the width of box.

        cv2.putText(img, label, (x, y + 30), font, 3, color, 3)

        # Text in Box to label the object

# Object Detection in Image

In [42]:
cv2.imshow("Image", img) 

cv2.waitKey(0) 

cv2.destroyAllWindows()

In [43]:
# import the necessary packages

import numpy as np

import cv2

import math

# Load Yolo

net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")

classes = []

with open("coco.names", "r") as f:

    classes = [line.strip() for line in f.readlines()]

layer_names = net.getLayerNames()

output_layers = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# Object Detection in Video

In [46]:
video = cv2.VideoCapture('video.mp4')


while True:

    ret,img=video.read()
    #height, width, channels = img.shape
    # Detecting objects

    blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

    net.setInput(blob)

    outs = net.forward(output_layers)
    # Showing informations on the screen

    class_ids = []

    confidences = []

    boxes = []

    for out in outs:

        for detection in out:

            scores = detection[5:]

            class_id = np.argmax(scores)

            confidence = scores[class_id]

            if confidence > 0.5:

                # Object detected

                center_x = int(detection[0] * width)

                center_y = int(detection[1] * height)

                w = int(detection[2] * width)

                h = int(detection[3] * height)

                # Rectangle coordinates

                x = int(center_x - w / 2)

                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])

                confidences.append(float(confidence))

                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    #print(indexes)

    font = cv2.FONT_HERSHEY_PLAIN
    for i in range(len(boxes)):

        if i in indexes:

            x, y, w, h = boxes[i]

            label = str(classes[class_ids[i]])

            color = colors[i]

            cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)

            cv2.putText(img, label, (x, y + 30), font, 3, color, 2)    

    #frame = cv2.resize(frame, None, fx=2, fy=2)
    cv2.imshow("running video", img)
    
    if cv2.waitKey(30) & 0xFF==ord('q'):
        break
video.release()
cv2.destroyAllWindows()

# Object detection in Live Cam

In [37]:
video=cv2.VideoCapture(0)

while True:

    ret,img=video.read()
    height, width, channels = img.shape
    # Detecting objects

    blob = cv2.dnn.blobFromImage(img, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

    net.setInput(blob)

    outs = net.forward(output_layers)
    # Showing informations on the screen

    class_ids = []

    confidences = []

    boxes = []

    for out in outs:

        for detection in out:

            scores = detection[5:]

            class_id = np.argmax(scores)

            confidence = scores[class_id]

            if confidence > 0.5:

                # Object detected

                center_x = int(detection[0] * width)

                center_y = int(detection[1] * height)

                w = int(detection[2] * width)

                h = int(detection[3] * height)

                # Rectangle coordinates

                x = int(center_x - w / 2)

                y = int(center_y - h / 2)

                boxes.append([x, y, w, h])

                confidences.append(float(confidence))

                class_ids.append(class_id)

    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    #print(indexes)

    font = cv2.FONT_HERSHEY_PLAIN
    for i in range(len(boxes)):

        if i in indexes:

            x, y, w, h = boxes[i]

            label = str(classes[class_ids[i]])

            color = colors[i]

            cv2.rectangle(img, (x, y), (x + w, y + h), color, 2)

            cv2.putText(img, label, (x, y + 30), font, 3, color, 2)    

    #frame = cv2.resize(frame, None, fx=2, fy=2)
    cv2.imshow("live cam", img)
    
    if cv2.waitKey(30) & 0xFF==ord('q'):
        break
video.release()
cv2.destroyAllWindows()