<a href="https://colab.research.google.com/github/saktheeswaranswan/random-math-addition-data-for-rnn-generator/blob/main/yolo_beeper_yolo_mp3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import cv2
import numpy as np
from playsound import playsound

def get_outputs_names(net):
    # Get the names of all the layers in the network
    layers_names = net.getLayerNames()

    # Get the names of the output layers
    return [layers_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# Load the YOLOv3 model
net = cv2.dnn.readNet("yolov3-tiny.weights", "yolov3-tiny.cfg")

# Get the names of the output layers
output_layers_names = get_outputs_names(net)

# Load the class names
with open("coco.names", "r") as f:
    class_names = [line.strip() for line in f.readlines()]

# Initialize the video capture
cap = cv2.VideoCapture(0)

while True:
    # Read a frame from the video capture
    _, frame = cap.read()

    # Get the dimensions of the frame
    height, width, _ = frame.shape

    # Create a blob from the frame
    blob = cv2.dnn.blobFromImage(frame, 1/255, (416, 416), (0, 0, 0), swapRB=True, crop=False)

    # Set the input to the network
    net.setInput(blob)

    # Run the forward pass
    outputs = net.forward(output_layers_names)

    # Initialize the bounding boxes, confidences, and class IDs
    boxes = []
    confidences = []
    class_ids = []

    # Loop over each detection
    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > 0.5:
                # Get the center of the bounding box
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # Get the top-left corner of the bounding box
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                # Update the list of bounding boxes, confidences, and class IDs
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Perform non-maximum suppression to eliminate overlapping bounding boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

        # Draw the bounding boxes on the frame
    for i in indices:
        i = i[0]
        box = boxes[i]
        x, y, w, h = box
        cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)

        # Get the label and confidence of the detection
        label = f"{class_names[class_ids[i]]}: {confidences[i]:.2f}"

        # Get the size of the label text
        text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]

        # Get the co-ordinates of the label text
        text_x = x + 5
        text_y = y - 5

        # Ensure the label text is not drawn outside the frame
        if text_x + text_size[0] >= width:
            text_x = x + w - text_size[0] - 5

        if text_y - text_size[1] < 0:
            text_y = y + h + text_size[1] + 5

        # Draw the label text
        cv2.rectangle(frame, (text_x, text_y - text_size[1]), (text_x + text_size[0], text_y), (255, 0, 0), cv2.FILLED)
        cv2.putText(frame, label, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
    
    # Play an MP3 file corresponding to the number of objects detected
    num_objects = len(indices)
    if num_objects > 0:
        playsound(f"{num_objects}.mp3")

    # Show the output frame
    cv2.imshow("Image", frame)

    # Break the loop if the 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()

In [None]:
#In this code, playsound is used to play the MP3 file corresponding to the number 
#of objects detected. The MP3 files should be named 1.mp3, 2.mp3, 3.mp3, etc. 
#based on the number of objects. If there are 2 objects detected, the code will play 
#2.mp3, and so on.