In [None]:
# Single Object Detection in an Image
import cv2
import numpy as np
from google.colab.patches import cv2_imshow

# Read the ImageNet class names from a file
with open('classification_classes_ILSVRC2012.txt', 'r') as f:
    image_net_names = f.read().split('\n')
# Extract the first word of each class name from ImageNet
classes = [name.split(',')[0] for name in image_net_names]

# Load the pre-trained neural network model
model = cv2.dnn.readNet(model='DenseNet_121.caffemodel',
                      config='DenseNet_121.prototxt',
                      framework='Caffe')

# Load the image from the disk
image = cv2.imread('image_1.jpg')
# Convert the image to a blob format required by the model
blob = cv2.dnn.blobFromImage(image=image, scalefactor=0.01, size=(224, 224),
                             mean=(104, 117, 123))
# Set the blob as input to the neural network
model.setInput(blob)
# Perform a forward pass through the neural network
outputs = model.forward()

final = outputs[0]
# Flatten the output to a 1D array
final = final.reshape(1000, 1)
# Determine the class label with the highest score
idlabel = np.argmax(final)
# Convert scores to probabilities using softmax
probability = np.exp(final) / np.sum(np.exp(final))
# Extract the highest probability
final_prob = np.max(probability) * 100.
# Map the highest probability to the corresponding class name
out = classes[idlabel]
text = f"{out}, {final_prob:.3f}"

# Annotate the image with the class name and probability
cv2.putText(image, text, (25, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)

# Display the image with annotation
cv2_imshow(image)
cv2.waitKey(0)
# Save the annotated image to disk
cv2.imwrite('outputs/result_image.jpg', image)

In [None]:
# Multi-Object Detection in an Image
import cv2
import numpy as np
from google.colab.patches import cv2_imshow

# Load COCO class labels
with open('/content/object_detection_classes_coco.txt', 'r') as file:
    class_labels = file.read().split('\n')

# Generate a unique color for each class
colors = np.random.uniform(0, 255, size=(len(class_labels), 3))

# Load the pre-trained DNN model
dnn_model = cv2.dnn.readNet(model='/content/frozen_inference_graph.pb',
                            config='ssd_mobilenet_v2_coco_2018_03_29.pbtxt.txt',
                            framework='TensorFlow')

# Load the image from disk
input_image = cv2.imread('image_2.jpg')  # tiger image
image_height, image_width, _ = input_image.shape

# Convert the image to a blob format
image_blob = cv2.dnn.blobFromImage(image=input_image, size=(300, 300), mean=(104, 117, 123), swapRB=True)

# Set the blob as input to the model
dnn_model.setInput(image_blob)

# Perform forward pass to get detections
detections = dnn_model.forward()

# Iterate over the detections
for detection in detections[0, 0, :, :]:
    # Extract the confidence level of the detection
    detection_confidence = detection[2]

    # Draw bounding boxes for detections above the confidence threshold
    if detection_confidence > 0.4:
        # Get the class ID
        class_id = detection[1]
        # Map class ID to class label
        class_label = class_labels[int(class_id) - 1]
        color = colors[int(class_id)]
        # Calculate bounding box coordinates
        x_start = detection[3] * image_width
        y_start = detection[4] * image_height
        x_end = detection[5] * image_width
        y_end = detection[6] * image_height
        # Draw the bounding box
        cv2.rectangle(input_image, (int(x_start), int(y_start)), (int(x_end), int(y_end)), color, thickness=2)
        # Prepare label with class name and confidence
        label_text = f"{class_label}, {detection_confidence * 100:.3f}"
        # Put label text on the image
        cv2.putText(input_image, label_text, (int(x_start), int(y_start - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)

# Show the image with detected objects
cv2_imshow(input_image)

# Save the image with bounding boxes and labels
cv2.imwrite('image_result.jpg', input_image)

# Wait for a key press and close all windows
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
# Video Object Detection
import cv2
import time
import numpy as np
from google.colab.patches import cv2_imshow

# Load COCO class labels
with open('object_detection_classes_coco.txt', 'r') as file:
    class_labels = file.read().split('\n')

# Generate unique colors for each class
colors = np.random.uniform(0, 255, size=(len(class_labels), 3))

# Load the pre-trained DNN model
dnn_model = cv2.dnn.readNet(model='frozen_inference_graph.pb',
                            config='ssd_mobilenet_v2_coco_2018_03_29.pbtxt.txt',
                            framework='TensorFlow')

# Capture video from file
video_capture = cv2.VideoCapture('video_1.mp4')
# Get video dimensions for proper saving
frame_width = int(video_capture.get(3))
frame_height = int(video_capture.get(4))
# Create a VideoWriter object to save the output video
video_writer = cv2.VideoWriter('video_result.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30,
                               (frame_width, frame_height))

# Process each frame of the video
while video_capture.isOpened():
    ret, frame = video_capture.read()
    if ret:
        input_frame = frame
        frame_height, frame_width, _ = input_frame.shape
        # Convert the frame to a blob format
        frame_blob = cv2.dnn.blobFromImage(image=input_frame, size=(300, 300), mean=(104, 117, 123),
                                           swapRB=True)
        # Start timing for FPS calculation
        start_time = time.time()
        dnn_model.setInput(frame_blob)
        detections = dnn_model.forward()
        # End timing after detection
        end_time = time.time()
        # Calculate FPS for the current frame
        fps = 1 / (end_time - start_time)
        # Iterate over the detections
        for detection in detections[0, 0, :, :]:
            # Extract the confidence level of the detection
            detection_confidence = detection[2]
            # Draw bounding boxes for detections above the confidence threshold
            if detection_confidence > 0.4:
                # Get the class ID
                class_id = detection[1]
                # Map class ID to class label
                class_label = class_labels[int(class_id) - 1]
                color = colors[int(class_id)]
                # Calculate bounding box coordinates
                x_start = detection[3] * frame_width
                y_start = detection[4] * frame_height
                x_end = detection[5] * frame_width
                y_end = detection[6] * frame_height
                # Draw the bounding box
                cv2.rectangle(input_frame, (int(x_start), int(y_start)), (int(x_end), int(y_end)), color, thickness=2)
                # Put class label text on the detected object
                cv2.putText(input_frame, class_label, (int(x_start), int(y_start - 5)), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2)
                # Put FPS text on the top of the frame
                cv2.putText(input_frame, f"{fps:.2f} FPS", (20, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

        cv2_imshow(input_frame)
        video_writer.write(input_frame)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break
    else:
        break

video_capture.release()
cv2.destroyAllWindows()