In [34]:
import cv2
import os

# Specify paths to YOLO files
weights_path = r"C:\Users\DELL\ML\yolov3\yolov3.weights"
config_path = r"C:\Users\DELL\ML\yolov3\yolov3.cfg"
classes_path = r"C:\Users\DELL\ML\yolov3\coco.names"

# Verify that all files exist
if not os.path.exists(weights_path):
    raise FileNotFoundError(f"YOLO weights file not found: {weights_path}")
if not os.path.exists(config_path):
    raise FileNotFoundError(f"YOLO configuration file not found: {config_path}")
if not os.path.exists(classes_path):
    raise FileNotFoundError(f"Classes file not found: {classes_path}")

# Load class labels
with open(classes_path, "r") as f:
    classes = [line.strip() for line in f.readlines()]

print(f"Loaded {len(classes)} classes: {classes}")

# Load YOLO model
net = cv2.dnn.readNet(weights_path, config_path)

# Get output layer names
layer_names = net.getLayerNames()
unconnected_layers = net.getUnconnectedOutLayers()

# Adjust for proper indexing
output_layers = [layer_names[i - 1] for i in unconnected_layers.flatten()]

print("YOLO model loaded successfully.")
print(f"Output layers: {output_layers}")


Loaded 80 classes: ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
YOLO model loaded successfully.
Output layers: ['yolo_82', 'yolo_94', 'yolo_106']


Code Reading

Preprocessing the Image

In [35]:
import cv2
import numpy as np

# Load and preprocess image
def preprocess_image(image_path, input_size=416):
    image = cv2.imread(image_path)
    height, width = image.shape[:2]

    # Resize image to fit YOLO's input size (416x416 or any other specific size)
    blob = cv2.dnn.blobFromImage(image, 0.00392, (input_size, input_size), (0, 0, 0), True, crop=False)

    return image, blob, height, width


Loading YOLO Model (with weights and config)

In [36]:
def load_yolo_model(weights_path, config_path):
    # Load YOLO network using OpenCV DNN module
    net = cv2.dnn.readNet(weights_path, config_path)

    # Get YOLO's output layers
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

    return net, output_layers


Obtaining Detections (Forward Pass)

In [37]:
def get_detections(net, blob, output_layers):
    # Set the blob as input to the network and perform a forward pass
    net.setInput(blob)
    outs = net.forward(output_layers)

    return outs


Post-processing the Detections

In [38]:
def post_process(image, outs, confidence_threshold=0.5, nms_threshold=0.4):
    # Get image dimensions
    height, width = image.shape[:2]

    # Initialize lists for detected objects
    class_ids = []
    confidences = []
    boxes = []

    # Loop over all the detections from the output layers
    for out in outs:
        for detection in out:
            scores = detection[5:]  # Get scores (object class probabilities)
            class_id = np.argmax(scores)
            confidence = scores[class_id]

            if confidence > confidence_threshold:
                # Get the bounding box coordinates (x, y, width, height)
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)

                # Calculate top-left corner of the bounding box
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)

                # Store the box, class id, and confidence
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    # Apply Non-Maximum Suppression to remove redundant boxes
    indices = cv2.dnn.NMSBoxes(boxes, confidences, confidence_threshold, nms_threshold)

    return indices, boxes, confidences, class_ids


Displaying the Results

In [39]:
def draw_boxes(image, indices, boxes, confidences, class_ids, classes, colors=None):
    # Draw bounding boxes on the image
    font = cv2.FONT_HERSHEY_SIMPLEX
    for i in indices.flatten():
        x, y, w, h = boxes[i]
        label = str(classes[class_ids[i]])  # Get the class label
        confidence = confidences[i]

        # Set the color (you can define a list of colors for different classes)
        color = (0, 255, 0) if colors is None else colors[class_ids[i]]

        # Draw rectangle and label
        cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
        cv2.putText(image, f"{label} {confidence:.2f}", (x, y - 10), font, 0.5, color, 2)

    # Display the output image
    cv2.imshow("YOLOv3 Output", image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()


In [None]:
import cv2
import numpy as np
import os

def main(image_path, weights_path, config_path, classes_path):
    # 1. Load classes (COCO class names)
    with open(classes_path, "r") as f:
        classes = [line.strip() for line in f.readlines()]

    # 2. Load and preprocess the image
    image, blob, height, width = preprocess_image(image_path)

    # 3. Load YOLO model
    net, output_layers = load_yolo_model(weights_path, config_path)

    # 4. Get detections
    outs = get_detections(net, blob, output_layers)

    # 5. Post-process detections
    indices, boxes, confidences, class_ids = post_process(image, outs)

    # 6. Draw boxes on image
    draw_boxes(image, indices, boxes, confidences, class_ids, classes)

if __name__ == "__main__":
    # Replace with the paths to your files
    image_path = r"C:\Users\DELL\ML\yolov3\data\images\bus.jpg"
    image_path = r"C:\Users\DELL\ML\yolov3\data\images\zidane.jpg"# Input image
    # Specify paths to YOLO files
    weights_path = r"C:\Users\DELL\ML\yolov3\yolov3.weights"
    config_path = r"C:\Users\DELL\ML\yolov3\yolov3.cfg"
    classes_path = r"C:\Users\DELL\ML\yolov3\coco.names"

    main(image_path, weights_path, config_path, classes_path)


Estimation using trained weights

In [None]:
import cv2
import numpy as np

# Load YOLO
def load_yolo_model(weights_path, config_path):
    net = cv2.dnn.readNet(weights_path, config_path)
    layer_names = net.getLayerNames()
    output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
    return net, output_layers

# Preprocess image
def preprocess_image(image_path, input_size=416):
    image = cv2.imread(image_path)
    if image is None:
        raise ValueError(f"Image not found at the path: {image_path}")
    height, width = image.shape[:2]
    blob = cv2.dnn.blobFromImage(image, 0.00392, (input_size, input_size), (0, 0, 0), True, crop=False)
    return image, blob, height, width

# Perform object detection and draw bounding boxes
def post_process(image, outs, classes, height, width):
    class_ids = []
    confidences = []
    boxes = []
    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.5:  # You can adjust this threshold
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(confidence))
                class_ids.append(class_id)

    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    for i in range(len(boxes)):
        if i in indices:
            x, y, w, h = boxes[i]
            label = str(classes[class_ids[i]])
            confidence = str(round(confidences[i], 2))
            color = (0, 255, 0)  # Green color for bounding box
            cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
            cv2.putText(image, f"{label} {confidence}", (x, y - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

    return image

# Main function to perform object detection
def detect_objects(image_path, weights_path, config_path, classes_path):
    # Load class names
    with open(classes_path, "r") as f:
        classes = [line.strip() for line in f.readlines()]

    # Preprocess image
    image, blob, height, width = preprocess_image(image_path)

    # Load YOLO model
    net, output_layers = load_yolo_model(weights_path, config_path)

    # Perform forward pass to get outputs
    net.setInput(blob)
    outs = net.forward(output_layers)

    # Post-process the outputs to draw bounding boxes
    result_image = post_process(image, outs, classes, height, width)

    # Display the resulting image
    cv2.imshow("YOLO Detection", result_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

    # Save the output image with bounding boxes
    output_image_path = "output_image.jpg"
    cv2.imwrite(output_image_path, result_image)
    print(f"Output saved at: {output_image_path}")

# Main execution
if __name__ == "__main__":
    # Paths to the necessary files
    image_path = r"C:\Users\DELL\ML\yolov3\data\images\zidane.jpg"# Input image
    # Specify paths to YOLO files
    weights_path = r"C:\Users\DELL\ML\yolov3\yolov3_2000.weights"
    config_path = r"C:\Users\DELL\ML\yolov3\yolov3.cfg"
    classes_path = r"C:\Users\DELL\ML\yolov3\coco.names"

    # Perform object detection
    detect_objects(image_path, weights_path, config_path, classes_path)


Create a file for study

Convert Annotations to YOLO Format

In [None]:
import xml.etree.ElementTree as ET
import os
import cv2

# Function to convert Pascal VOC XML annotation to YOLO format
def convert_xml_to_yolo(xml_file, image_width, image_height):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    yolo_annotations = []
    for obj in root.findall('object'):
        class_name = obj.find('name').text
        xmin = int(obj.find('bndbox/xmin').text)
        ymin = int(obj.find('bndbox/ymin').text)
        xmax = int(obj.find('bndbox/xmax').text)
        ymax = int(obj.find('bndbox/ymax').text)

        # Calculate YOLO format coordinates (relative to image size)
        x_center = (xmin + xmax) / 2 / image_width
        y_center = (ymin + ymax) / 2 / image_height
        width = (xmax - xmin) / image_width
        height = (ymax - ymin) / image_height

        # Class ID (set according to your dataset)
        class_id = 0  # Modify this based on the number of classes in your dataset

        yolo_annotations.append(f"{class_id} {x_center} {y_center} {width} {height}")

    return yolo_annotations

# Function to process all XML annotations in a directory and convert to YOLO format
def convert_annotations_to_yolo(annotation_dir, image_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    for xml_file in os.listdir(annotation_dir):
        if xml_file.endswith('.xml'):
            # Get the image corresponding to the annotation
            image_path = os.path.join(image_dir, xml_file.replace('.xml', '.jpg'))
            image = cv2.imread(image_path)
            image_height, image_width = image.shape[:2]

            # Convert XML annotation to YOLO format
            yolo_annotations = convert_xml_to_yolo(
                os.path.join(annotation_dir, xml_file),
                image_width, image_height
            )

            # Write YOLO annotations to file
            yolo_annotation_file = os.path.join(output_dir, xml_file.replace('.xml', '.txt'))
            with open(yolo_annotation_file, 'w') as f:
                for annotation in yolo_annotations:
                    f.write(f"{annotation}\n")

# Example usage
annotation_dir = 'simpsons_dataset/annotations'
image_dir = 'simpsons_dataset/images'
output_dir = 'simpsons_dataset/yolo_annotations'

convert_annotations_to_yolo(annotation_dir, image_dir, output_dir)


Set Up YOLO Training Files

In [None]:
import os

# Function to generate train.txt and test.txt files
def generate_data_files(image_dir, output_dir, split_ratio=0.8):
    images = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith('.jpg')]
    train_size = int(len(images) * split_ratio)

    # Create train.txt and test.txt files
    with open(os.path.join(output_dir, 'train.txt'), 'w') as f_train, \
         open(os.path.join(output_dir, 'test.txt'), 'w') as f_test:
        for i, image in enumerate(images):
            if i < train_size:
                f_train.write(image + '\n')
            else:
                f_test.write(image + '\n')

# Example usage
generate_data_files('simpsons_dataset/images', 'simpsons_dataset')


(Advanced) Code Reading

The important parts of the YOLOv3 paper that influence the code implementation are:

The architecture of the single convolutional network.
Bounding box regression and anchor boxes.
Detection at multiple scales.
Objectness score (confidence) and class predictions.
The Darknet53 backbone for feature extraction.
The loss function, which combines multiple components.


These concepts are all implemented and reflected in the codebase through the configuration files, network definitions, and training code.