#### Object detection using YOLOv3 on Video

In [1]:
# Import libraries
import cv2
import numpy as np

# Load YOLO
net =  cv2.dnn.readNet('cfg/yolov3.weights', 'cfg/yolov3.cfg')
classes = []
with open('cfg/coco.names', 'r') as f:
    classes = f.read().splitlines()

# Load input video
video_file = 'vid/vid_1.mp4'
cap = cv2.VideoCapture(video_file)

# Object Detection Loop
while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    height, width, _ = frame.shape

    # Preprocess input image
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)

    # Forward pass through the network
    output_layers_names = net.getUnconnectedOutLayersNames()
    layer_outputs = net.forward(output_layers_names)

    # Process detection results
    boxes = []
    confidences = []
    class_ids = []

    for output in layer_outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                x = int(center_x - w/2)
                y = int(center_y - h/2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Non-max suppression to remove redundant overlapping boxes
    indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

    font = cv2.FONT_HERSHEY_PLAIN
    colors = np.random.uniform(0, 255, size=(len(classes), 3))

    # Draw bounding boxes and labels
    if len(indexes) > 0:
        for i in indexes.flatten():
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i], 2))
            color = colors[i]
            cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)
            cv2.putText(frame, label + " " + confidence, (x, y + 20), font, 2, (255, 255, 255), 2)

    # Display the output frame
    cv2.imshow("Object Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


#### GUI Application

In [1]:
import tkinter as tk
from tkinter import filedialog
import cv2
import numpy as np

class ObjectDetectionApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Object Detection App")
        self.root.geometry("400x300")

        # Create title label
        title_label = tk.Label(self.root, text="Object Detection App", font=("Helvetica", 16, "bold"), pady=10)
        title_label.pack()

        # Create buttons frame
        buttons_frame = tk.Frame(self.root)
        buttons_frame.pack(pady=10)

        # Create buttons
        self.select_button = tk.Button(buttons_frame, text="Select Video", command=self.select_video)
        self.select_button.grid(row=0, column=0, padx=10)

        self.play_button = tk.Button(buttons_frame, text="Play Video", command=self.play_video)
        self.play_button.grid(row=0, column=1, padx=10)

        self.detect_button = tk.Button(buttons_frame, text="Detect Objects", command=self.detect_objects)
        self.detect_button.grid(row=1, column=0, columnspan=2, pady=10)

        self.exit_button = tk.Button(self.root, text="Exit", command=self.root.quit)
        self.exit_button.pack()

        # Video capture object
        self.cap = None

    def select_video(self):
        # Open file dialog to select video file
        file_path = filedialog.askopenfilename(filetypes=[("Video files", "*.mp4")])
        if file_path:
            # Release any existing video capture object
            if self.cap:
                self.cap.release()
            # Create new video capture object
            self.cap = cv2.VideoCapture(file_path)

    def play_video(self):
        if self.cap:
            while True:
                ret, frame = self.cap.read()
                if not ret:
                    break
                cv2.imshow("Video", frame)
                if cv2.waitKey(25) & 0xFF == ord('q'):
                    break
            cv2.destroyAllWindows()

    def detect_objects(self):
        if self.cap:
            net = cv2.dnn.readNet('cfg/yolov3.weights', 'cfg/yolov3.cfg')
            classes = []
            with open('cfg/coco.names', 'r') as f:
                classes = f.read().splitlines()

            while True:
                ret, frame = self.cap.read()
                if not ret:
                    break
                
                height, width, _ = frame.shape
                blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
                net.setInput(blob)
                output_layers_names = net.getUnconnectedOutLayersNames()
                layer_outputs = net.forward(output_layers_names)

                boxes = []
                confidences = []
                class_ids = []

                for output in layer_outputs:
                    for detection in output:
                        scores = detection[5:]
                        class_id = np.argmax(scores)
                        confidence = scores[class_id]
                        if confidence > 0.5:
                            center_x = int(detection[0] * width)
                            center_y = int(detection[1] * height)
                            w = int(detection[2] * width)
                            h = int(detection[3] * height)

                            x = int(center_x - w/2)
                            y = int(center_y - h/2)
                            boxes.append([x, y, w, h])
                            confidences.append(float(confidence))
                            class_ids.append(class_id)

                indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
                font = cv2.FONT_HERSHEY_PLAIN
                colors = np.random.uniform(0, 255, size=(len(classes), 3))

                if len(indexes) > 0:
                    for i in indexes.flatten():
                        x, y, w, h = boxes[i]
                        label = str(classes[class_ids[i]])
                        confidence = str(round(confidences[i], 2))
                        color = colors[i]
                        cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)
                        cv2.putText(frame, label + " " + confidence, (x, y + 20), font, 2, (255, 255, 255), 2)

                cv2.imshow("Object Detection", frame)
                if cv2.waitKey(25) & 0xFF == ord('q'):
                    break

            cv2.destroyAllWindows()

if __name__ == "__main__":
    root = tk.Tk()
    app = ObjectDetectionApp(root)
    root.mainloop()


2024-04-24 11:24:26.227 Python[8909:363288] +[CATransaction synchronize] called within transaction
2024-04-24 11:24:26.387 Python[8909:363288] +[CATransaction synchronize] called within transaction
2024-04-24 11:24:26.396 Python[8909:363288] +[CATransaction synchronize] called within transaction


#### Tests

In [3]:
import cv2

print(cv2.__version__)

4.9.0
