In [1]:
import cv2
import numpy as np

# Load YOLO
net = cv2.dnn.readNet("yolov3.weights", "yolov3.cfg")
classes = []
with open("coco.names", "r") as f:
    classes = [line.strip() for line in f.readlines()]

In [2]:
# Get output layer names
layer_names = net.getLayerNames()
output_layers = [layer_names[i-1] for i in net.getUnconnectedOutLayers()]

In [3]:
# Load image
image = cv2.imread("image.jpeg")

In [4]:
# Resize and normalize image
height, width, channels = image.shape
blob = cv2.dnn.blobFromImage(image, 0.00392, (416, 416), (0, 0, 0), True, crop=False)

In [5]:
blob

array([[[[0.92120004, 0.92120004, 0.92120004, ..., 0.30184   ,
          0.32144   , 0.36064   ],
         [0.91728   , 0.91728   , 0.91728   , ..., 0.38808   ,
          0.40376002, 0.41944   ],
         [0.91728   , 0.91728   , 0.91728   , ..., 0.47432   ,
          0.43904   , 0.43904   ],
         ...,
         [0.8428    , 0.85064   , 0.7056    , ..., 0.79576004,
          0.8232    , 0.882     ],
         [0.77616   , 0.78400004, 0.83888   , ..., 0.85456   ,
          0.8428    , 0.87416   ],
         [0.8232    , 0.81536   , 0.90944004, ..., 0.90944004,
          0.87024003, 0.83496   ]],

        [[0.99176   , 0.99176   , 0.99176   , ..., 0.36064   ,
          0.38416   , 0.42728   ],
         [0.98784   , 0.98784   , 0.98784   , ..., 0.44688   ,
          0.45864   , 0.48216   ],
         [0.98784   , 0.98784   , 0.98784   , ..., 0.5292    ,
          0.49392   , 0.49392   ],
         ...,
         [0.8624    , 0.86632   , 0.71736   , ..., 0.79576004,
          0.8232    , 0.8

In [6]:
# Forward pass through the network
net.setInput(blob)
outs = net.forward(output_layers)

In [7]:
# Process detections
class_ids = []
confidences = []
boxes = []
for out in outs:
    for detection in out:
        scores = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            # Object detected
            center_x = int(detection[0] * width)
            center_y = int(detection[1] * height)
            w = int(detection[2] * width)
            h = int(detection[3] * height)

            # Rectangle coordinates
            x = int(center_x - w / 2)
            y = int(center_y - h / 2)

            boxes.append([x, y, w, h])
            confidences.append(float(confidence))
            class_ids.append(class_id)

In [8]:
# Apply non-maximum suppression
indexes = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)

In [9]:
indexes

array([ 7, 21, 23, 11, 25, 10, 13, 27,  9], dtype=int32)

In [10]:
# Draw bounding boxes and labels
font = cv2.FONT_HERSHEY_SIMPLEX
for i in range(len(boxes)):
    if i in indexes:
        x, y, w, h = boxes[i]
        label = classes[class_ids[i]]
        confidence = confidences[i]
        color = (0, 255, 0)
        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
        cv2.putText(image, f"{label} {confidence:.2f}", (x, y - 10), font, 0.5, color, 2)

In [11]:
# Display the resulting image
cv2.imshow("YOLO Object Detection", image)
cv2.waitKey(0)
cv2.destroyAllWindows()


: 