In [None]:
import tkinter as tk
from tkinter import filedialog
from PIL import Image, ImageTk
import cv2
import numpy as np

# Load YOLO
net = cv2.dnn.readNet('cfg/yolov3.weights', 'cfg/yolov3.cfg')
classes = []
with open('cfg/coco.names', 'r') as f:
    classes = f.read().splitlines()

root = tk.Tk()
root.geometry("1000x600")
root.title("Video Object Detection")
root.config(bg="white")

video_file = ""
cap = None

def select_video():
    global video_file, cap
    video_file = filedialog.askopenfilename(filetypes=[("Video files", "*.mp4;*.avi")])
    cap = cv2.VideoCapture(video_file)
    play_video()

def play_video():
    global cap
    if cap.isOpened():
        ret, frame = cap.read()
        if ret:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = Image.fromarray(frame)
            frame = ImageTk.PhotoImage(frame)
            label.config(image=frame)
            label.image = frame
            label.after(10, play_video)
        else:
            cap.release()

def detect_objects():
    global cap
    if cap is not None and cap.isOpened():
        while True:
            ret, frame = cap.read()
            if not ret:
                break

            height, width, _ = frame.shape

            blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (416, 416), swapRB=True, crop=False)
            net.setInput(blob)
            output_layers_names = net.getUnconnectedOutLayersNames()
            layer_outputs = net.forward(output_layers_names)

            boxes = []
            confidences = []
            class_ids = []

            for output in layer_outputs:
                for detection in output:
                    scores = detection[5:]
                    class_id = np.argmax(scores)
                    confidence = scores[class_id]
                    if confidence > 0.5:
                        center_x = int(detection[0] * width)
                        center_y = int(detection[1] * height)
                        w = int(detection[2] * width)
                        h = int(detection[3] * height)

                        x = int(center_x - w / 2)
                        y = int(center_y - h / 2)
                        boxes.append([x, y, w, h])
                        confidences.append(float(confidence))
                        class_ids.append(class_id)

            indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

            if len(indexes) > 0:
                for i in indexes.flatten():
                    x, y, w, h = boxes[i]
                    label = str(classes[class_ids[i]])
                    confidence = str(round(confidences[i], 2))
                    color = (255, 0, 0)
                    cv2.rectangle(frame, (x, y), (x + w, y + h), color, 2)
                    cv2.putText(frame, label + " " + confidence, (x, y + 20), cv2.FONT_HERSHEY_PLAIN, 2, (255, 255, 255), 2)

            cv2.imshow("Object Detection", frame)
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

label = tk.Label(root)
label.pack()

select_button = tk.Button(root, text="Select Video", command=select_video)
select_button.pack(pady=10)

detect_button = tk.Button(root, text="Detect Objects", command=detect_objects)
detect_button.pack(pady=5)

root.mainloop()
