In [None]:
# Importing the cv2 lib
import cv2

# OS traversal 
import os 

# Predicting 
from tqdm import tqdm 

# Array math 
import numpy as np 

# Ploting 
import matplotlib.pyplot as plt

# Importing the NAS vision model 
import super_gradients.training.models

# Getting the sg model 
yolo_nas = super_gradients.training.models.get("yolo_nas_l", pretrained_weights="coco")

# Defining the video name 
video_name = 'DJI_0763.MP4'

# Defining the path to model and video dirs 
video_path = os.path.join(os.getcwd(), 'videos', video_name)

# Defining the directory for extracted images 
extracted_images_dir = os.path.join(os.getcwd(), 'extracted_images')
if not os.path.exists(extracted_images_dir):
    os.mkdir(extracted_images_dir)

# Postprocessed images 
postprocessed_images_dir = os.path.join(os.getcwd(), 'postprocessed_images')
if not os.path.exists(postprocessed_images_dir):
    os.mkdir(postprocessed_images_dir)

# Splitting the video into images 

In [None]:
split_img = True

if split_img:
    # Reading the video 
    cap = cv2.VideoCapture(video_path)

    # Removing any image that is present in the extracted images dir
    for image in os.listdir(extracted_images_dir):
        os.remove(os.path.join(extracted_images_dir, image))

    # Spliting the video by frame 
    i = 0
    while(cap.isOpened()):
        ret, frame = cap.read()
        if ret == False:
            break
        cv2.imwrite(os.path.join(extracted_images_dir, f'{str(i)}.jpg'), frame)
        i+=1

# Blurring the images 

In [None]:
# Listing the images in the extracted images dir
images = os.listdir(extracted_images_dir)

In [None]:
def letterbox(im, new_shape=(960, 960), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding

    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, r, (dw, dh)

## Example image pipeline 

In [None]:
# Getting one image path 
image_path = os.path.join(extracted_images_dir, '2204.jpg')

# Reading the image from open cv 
image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)

# Saving the original image's w and h 
original_w, original_h = image.shape[1], image.shape[0]

# Ploting the image 
plt.imshow(image)

In [None]:
hat = yolo_nas.predict(image)

# Saving the list of class names
class_names = hat[0].class_names 

# Making a dictionary where the key is the index and the value is the class name
class_names_dict = {i: class_names[i] for i in range(len(class_names))}

print(f"Number of classes: {len(class_names)}")

# Extracting the predictions 
predictions = hat[0].prediction

In [None]:
predictions

In [None]:
# Saving the bboxes 
bboxes = predictions.bboxes_xyxy

# Saving the labels 
labels = predictions.labels

# Saving hte confidences
confidences = predictions.confidence

In [None]:
# Iterating over the bboxes
if len(bboxes) > 0:
    # Reading and plotting the original image
    img = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)

    # Iterating over the boxes
    for i, box in enumerate(bboxes):
        # Getting the class name
        class_name = class_names_dict[labels[i]]

        # Getting the x, y, w, h
        x0, y0, x1, y1 = box[0], box[1], box[2], box[3]

        # Converting to int
        x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

        # Getting the confidence
        confidence = round(float(confidences[i]), 2)

        # Drawing the rectangle
        cv2.rectangle(img, (x0, y0), (x1, y1), (0, 255, 0), 2)

        # Putting the class name
        cv2.putText(img, f"{class_name} {confidence}", (x0, y0 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

# Plotting the image
plt.imshow(img)

# Applying on all images 

In [None]:
classes_to_blur = ['person']
classes_to_draw = ['person', 'car', 'bus', 'truck']

# Defining the number of frames to predict
n_frames = 500

In [None]:
# Listing the images in the extracted images dir
images = os.listdir(extracted_images_dir)

# Creating the image dictionary where the key is the image name and the image index 
image_dict = dict()
for image in images:
    image_dict[image] = int(image.split('.')[0])

# Sorting the images by the index
images = sorted(images, key=lambda x: image_dict[x])

# Removing the images that are not in the range
if n_frames is not None:
    images = images[:n_frames]

# Iterating over the images
list_of_images = []
for image in tqdm(images):
    # Predicting the bounding boxes
    img = cv2.imread(os.path.join(extracted_images_dir, image))

    # Making the prediction 
    hat = yolo_nas.predict(os.path.join(extracted_images_dir, image))

    # Extracting the predictions
    predictions = hat[0].prediction

    # Saving the bboxes 
    bboxes = predictions.bboxes_xyxy

    # Saving the labels 
    labels = predictions.labels

    # Saving hte confidences
    confidences = predictions.confidence

    # Getting the bounding boxes
    if len(bboxes) > 0:
        # Iterating over the boxes
        for i, box in enumerate(bboxes):
            try:
                # Getting the class name
                class_name = class_names_dict[labels[i]]

                if class_name in classes_to_draw:

                    # Getting the x, y, w, h
                    x0, y0, x1, y1 = box[0], box[1], box[2], box[3]

                    # Converting to int
                    x0, y0, x1, y1 = int(x0), int(y0), int(x1), int(y1)

                    # Getting the confidence
                    confidence = round(float(confidences[i]), 2)

                    # Drawing the rectangle
                    cv2.rectangle(img, (x0, y0), (x1, y1), (0, 255, 0), 2)

                    # Putting the class name
                    cv2.putText(img, f"{class_name} {confidence}", (x0, y0 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

                    if class_name in classes_to_blur:
                        # Blurring the image
                        img[y0:y1, x0:x1] = cv2.blur(img[y0:y1, x0:x1], (30, 30))
            except Exception as e:
                print(f"{image} - {e}")
                continue

    # Defining the path to the image
    image_path = os.path.join(postprocessed_images_dir, os.path.basename(image))

    # If the image exists in the postprocessed images dir, remove it
    if os.path.exists(image_path):
        os.remove(image_path)

    # Saving the image
    cv2.imwrite(image_path, img)

# Making a video from the images

In [None]:
# Converting all the images in the postprocessed images dir to a video
images = os.listdir(postprocessed_images_dir)

# Creating the image dictionary where the key is the image name and the image index 
image_dict = dict()
for image in images:
    image_dict[image] = int(image.split('.')[0])

# Sorting the images by the index
images = sorted(images, key=lambda x: image_dict[x])

# Defining the output video path
output_video_path = os.path.join(os.getcwd(), 'output_video.mp4')

# Defining the fps
fps = 24

# Defining the size of the video
# Reading the first image to get the sizes 
img = cv2.imread(os.path.join(postprocessed_images_dir, images[0]))
size = (img.shape[1], img.shape[0])

# Defining the video writer
out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'DIVX'), fps, size)

# Iterating over the images
for image in tqdm(images):
    # Reading the image
    img = cv2.imread(os.path.join(postprocessed_images_dir, image))

    # Writing the image
    out.write(img)

cv2.destroyAllWindows()

# Closing the video writer
out.release()