In [1]:
import cv2
import numpy as np
from matplotlib import pyplot as plt

In [2]:
!wget https://pjreddie.com/media/files/yolov3.weights

--2023-03-20 13:45:01--  https://pjreddie.com/media/files/yolov3.weights
Resolving pjreddie.com (pjreddie.com)... 128.208.4.108
Connecting to pjreddie.com (pjreddie.com)|128.208.4.108|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 248007048 (237M) [application/octet-stream]
Saving to: ‘yolov3.weights’


2023-03-20 13:45:04 (104 MB/s) - ‘yolov3.weights’ saved [248007048/248007048]



In [3]:
class Object_Detector:
    def __init__(self, image):
        self.image = image
        self.Width = image.shape[1]
        self.Height = image.shape[0]
        self.scale = 0.00392
        
        self.model_setup()
        self.run_inference()
    
    def model_setup(self):

        self.classes = None
        with open('/kaggle/input/images-and-cfg/yolov3.txt', 'r') as f:
            self.classes = [line.strip() for line in f.readlines()]

        # read pre-trained model and config file
        self.net = cv2.dnn.readNet('/kaggle/working/yolov3.weights', '/kaggle/input/images-and-cfg/yolov3.cfg')

        # create input blob 
        blob = cv2.dnn.blobFromImage(self.image, self.scale, (416,416), (0,0,0), True, crop=False)

        # set input blob for the network
        self.net.setInput(blob)

    # function to get the output layer names 
    # in the architecture
    def get_output_layers(self):

        layer_names = self.net.getLayerNames()

        output_layers = [layer_names[i - 1] for i in self.net.getUnconnectedOutLayers()]

        return output_layers

    # function to draw bounding box on the detected object with class name
    def draw_bounding_box(self, img, class_id, confidence, x, y, x_plus_w, y_plus_h):

        label = str(self.classes[class_id])

        cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), (0, 255, 0), 3)
    
        cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        
    def run_inference(self):
        # run inference through the network
        # and gather predictions from output layers
        outs = self.net.forward(self.get_output_layers())

        # initialization
        self.class_ids = []
        self.confidences = []
        self.boxes = []
        self.conf_threshold = 0.5
        self.nms_threshold = 0.4

        # for each detetion from each output layer 
        # get the confidence, class id, bounding box params
        # and ignore weak detections (confidence < 0.5)
        for out in outs:
            for detection in out:
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > 0.5:
                    center_x = int(detection[0] * self.Width)
                    center_y = int(detection[1] * self.Height)
                    w = int(detection[2] * self.Width)
                    h = int(detection[3] * self.Height)
                    x = center_x - w / 2
                    y = center_y - h / 2
                    self.class_ids.append(class_id)
                    self.confidences.append(float(confidence))
                    self.boxes.append([x, y, w, h])
                    
    def get_output(self):
        # apply non-max suppression
        indices = cv2.dnn.NMSBoxes(self.boxes, self.confidences, self.conf_threshold, self.nms_threshold)

        # go through the detections remaining
        for i in indices:
            box = self.boxes[i]
            x = box[0]
            y = box[1]
            w = box[2]
            h = box[3]

            self.draw_bounding_box(self.image, self.class_ids[i], self.confidences[i], round(x), round(y), round(x+w), round(y+h))

        return self.image


In [4]:
vidcap = cv2.VideoCapture('/kaggle/input/car-video/video.mp4')
success = True
count = 0
img_array = []
while success:
    success,image = vidcap.read()
    if(success == 0 or image is None): 
        break
    OD = Object_Detector(image)
    img_array.append(OD.get_output())
    if(count == 12 or count == 200 or count == 400):
        cv2.imwrite("frame%d.jpg" % count, img_array[-1])     # save frame as JPEG file
        break
    count += 1

In [5]:
height, width, layers = img_array[0].shape
size = (width,height)

out = cv2.VideoWriter('project_YOLO.avi',cv2.VideoWriter_fourcc(*'DIVX'), 15, size)
 
for i in range(len(img_array)):
    out.write(img_array[i])
out.release()