Name: Rutuja Kokate
SJSU ID: 017453865

In [1]:
pip install ultralytics opencv-python torch torchvision torchaudio moviepy

Collecting torchaudio
  Downloading torchaudio-2.4.1-cp311-cp311-win_amd64.whl.metadata (6.5 kB)
Collecting moviepy
  Downloading moviepy-1.0.3.tar.gz (388 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
INFO: pip is looking at multiple versions of torchaudio to determine which version is compatible with other requirements. This could take a while.
Collecting torchaudio
  Downloading torchaudio-2.4.0-cp311-cp311-win_amd64.whl.metadata (6.4 kB)
  Downloading torchaudio-2.3.1-cp311-cp311-win_amd64.whl.metadata (6.4 kB)
  Downloading torchaudio-2.3.0-cp311-cp311-win_amd64.whl.metadata (6.4 kB)
  Downloading torchaudio-2.2.2-cp311-cp311-win_amd64.whl.metadata (6.4 kB)
  Downloading torchaudio-2.2.1-cp311-cp311-win_amd64.whl.metadata (6.4 kB)
  Downloading torchaudio-2.2.0-cp311-cp311-win_amd64.whl.metadata (6.4 kB)
  Downloading torchaudio-2.1.2-cp311-cp311-win_amd64.whl.metadata (6.4 kB)
Collecting decorator<5.0,>=4.0.2 (from movie

In [2]:
pip install opencv-python-headless


Collecting opencv-python-headlessNote: you may need to restart the kernel to use updated packages.

  Using cached opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl.metadata (20 kB)
Using cached opencv_python_headless-4.10.0.84-cp37-abi3-win_amd64.whl (38.8 MB)
Installing collected packages: opencv-python-headless
Successfully installed opencv-python-headless-4.10.0.84


In [3]:
import torch
import cv2
import os
from ultralytics import YOLO
from moviepy.editor import VideoFileClip

In [4]:
model = YOLO("yolov8n.pt")  

# Function to process video into frames
def process_video(video_path, output_frames_dir):
    if not os.path.exists(output_frames_dir):
        os.makedirs(output_frames_dir)

    # Capture video
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        frame_path = os.path.join(output_frames_dir, f"frame_{frame_count:05d}.jpg")
        cv2.imwrite(frame_path, frame)
        frame_count += 1
    cap.release()
    return frame_count


Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to 'yolov8n.pt'...
100%|█████████████████████████████████████████████████████████████████████████████| 6.23M/6.23M [00:00<00:00, 34.6MB/s]


In [5]:
# Function to run inference and draw bounding boxes on frames
def run_inference_on_frames(input_frames_dir, output_frames_dir):
    if not os.path.exists(output_frames_dir):
        os.makedirs(output_frames_dir)

    frames = sorted(os.listdir(input_frames_dir))
    for frame_name in frames:
        frame_path = os.path.join(input_frames_dir, frame_name)
        frame = cv2.imread(frame_path)

        # Run inference
        results = model(frame)
        
        # Iterate over the detections
        for result in results:
            # Each 'result' contains boxes and class predictions for a single frame
            boxes = result.boxes  # This gets the bounding boxes
            for box in boxes:
                x1, y1, x2, y2 = box.xyxy[0].tolist()  # Extract box coordinates
                conf = box.conf.item()  # Convert confidence to a float
                cls = box.cls.item()  # Convert class id to a float
                label = model.names[int(cls)]  # Get the label (e.g., car, truck, etc.)
                
                if label in ["car", "bike", "bus"]:  # Vehicle classes
                    # Draw bounding box and label on the frame
                    cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
                    cv2.putText(frame, f"{label} {conf:.2f}", (int(x1), int(y1) - 10), 
                                cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)
        
        # Save processed frame
        output_frame_path = os.path.join(output_frames_dir, frame_name)
        cv2.imwrite(output_frame_path, frame)


In [6]:
# Function to convert frames back to video
def frames_to_video(input_frames_dir, output_video_path, fps=30):
    frames = sorted(os.listdir(input_frames_dir))
    frame_path = os.path.join(input_frames_dir, frames[0])
    frame = cv2.imread(frame_path)
    height, width, layers = frame.shape
    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
    out = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))

    for frame_name in frames:
        frame_path = os.path.join(input_frames_dir, frame_name)
        frame = cv2.imread(frame_path)
        out.write(frame)

    out.release()

In [7]:
# Split video into frames
video_path = "BeforeObjectD.mp4"  # Path to the input video
input_frames_dir = "frames_input/"  # Adjusted for local paths
output_frames_dir = "frames_output/"  # Adjusted for local paths
frame_count = process_video(video_path, input_frames_dir)

In [8]:
# Run YOLO inference on each frame
run_inference_on_frames(input_frames_dir, output_frames_dir)

# Combine processed frames back into a video
output_video_path = "output_inference_video.mp4"
frames_to_video(output_frames_dir, output_video_path, fps=30)

print("Inference video saved at:", output_video_path)


0: 640x384 1 car, 291.4ms
Speed: 16.4ms preprocess, 291.4ms inference, 45.3ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 car, 197.1ms
Speed: 6.0ms preprocess, 197.1ms inference, 3.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 car, 220.2ms
Speed: 5.0ms preprocess, 220.2ms inference, 5.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 (no detections), 204.8ms
Speed: 4.5ms preprocess, 204.8ms inference, 3.2ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 car, 177.9ms
Speed: 4.0ms preprocess, 177.9ms inference, 4.1ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 car, 203.2ms
Speed: 4.0ms preprocess, 203.2ms inference, 4.2ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 car, 286.2ms
Speed: 6.0ms preprocess, 286.2ms inference, 7.0ms postprocess per image at shape (1, 3, 640, 384)

0: 640x384 1 car, 176.6ms
Speed: 3.0ms preprocess, 176.6ms inference, 5.0ms postprocess per image at shape (1, 3

Inference video saved at: output_inference_video.mp4
