# Katha D Mehta
## Task 1 - Object Detection
### GRIPFeb'21 - The Spark Foundation

### Image

In [4]:
import cv2
import numpy as np

#object detection in image

#creating a network using Deep Neural Network module of OpenCv
net = cv2.dnn.readNet('yolov3.weights','yolov3.cfg ')
classes = []
with open('coco.names','r') as f:
    classes = f.read().splitlines()

#input any image file
img = cv2.imread('bus.jpeg')
height, width, _ = img.shape

#creating a 4-dimensional blob from image
blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)
#blob is collection of images with same spatial dimensions-height,width;same depth;and have preprocessed in same manner

#this collection becomes our input to he network
net.setInput(blob)

#fetching names of layers with unconnected outputs and foward pass it to compute net output
output_layers_names = net.getUnconnectedOutLayersNames()
layerOutputs = net.forward(output_layers_names)

boxes = []
confidences = []
class_ids = []

#to extract bounding boxes, confidence and predicted classes
#two for loops which loop over the layers
for output in layerOutputs:
    for detection in output:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        #if confidence > 0.5, start to locate the boxes
        if confidence > 0.5:                                
            center_x = int(detection[0]*width)
            center_y = int(detection[1]*height)
            w = int(detection[2]*width)
            h = int(detection[3]*height)
            
            #yolo predicts result with centers of bounding boxes
            #extracting upper left corner position to present with OpenCv 
            x = int(center_x - w/2)
            y = int(center_y - h/2)
            
            boxes.append([x, y, w, h])
            confidences.append((float(confidence)))
            class_ids.append(class_id)

#remove redundance with non-max suppression 
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) 

font = cv2.FONT_HERSHEY_PLAIN
colors = np.random.uniform(0, 255, size=(len(boxes), 3))

for i in indexes.flatten():
    x, y, w, h = boxes[i]
    label = str(classes[class_ids[i]])
    confidence = str(round(confidences[i],2))
    color = colors[i]
    cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
    cv2.putText(img, label + "" + confidence, (x, y+20), font, 2, (255,255,255), 2) 

cv2.imshow('Image',img)
cv2.waitKey(0)
cv2.destroyAllWindows() 

### Video/Webcam

In [5]:
import cv2
import numpy as np

net = cv2.dnn.readNet('yolov3.weights','yolov3.cfg ')
classes = []
with open('coco.names','r') as f:
    classes = f.read().splitlines()

#for webcam footage, replace 'traffic.mp4' with 0
cap = cv2.VideoCapture('traffic.mp4')           

while True:
    _, img = cap.read()
    height, width, _ = img.shape
    
    blob = cv2.dnn.blobFromImage(img, 1/255, (416, 416), (0,0,0), swapRB=True, crop=False)

    net.setInput(blob)

    output_layers_names = net.getUnconnectedOutLayersNames()
    layerOutputs = net.forward(output_layers_names)

    boxes = []
    confidences = []
    class_ids = []

    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0]*width)
                center_y = int(detection[1]*height)
                w = int(detection[2]*width)
                h = int(detection[3]*height)
            
                x = int(center_x - w/2)
                y = int(center_y - h/2)
            
                boxes.append([x, y, w, h])
                confidences.append((float(confidence)))
                class_ids.append(class_id)
            
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4) 

    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(boxes), 3))
    
    if len(indexes)>0:
        for i in indexes.flatten():
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i],2))
            color = colors[i]
            cv2.rectangle(img, (x,y), (x+w, y+h), color, 2)
            cv2.putText(img, label + " " + confidence, (x, y+20), font, 2, (255,255,255), 2) 
    
    cv2.imshow('Video', img)
    #waitKey to break the while loop
    key = cv2.waitKey(1)
    if key == 27:
        break
    
cap.release()
cv2.destroyAllWindows()