<a href="https://colab.research.google.com/github/nalajalagreeshma/Q-Techsolutions-Internship/blob/main/video_processing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install opencv-python
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install yolov5
!pip install opencv-python opencv-python-headless

In [None]:
import cv2
import torch
import numpy as np
from google.colab.patches import cv2_imshow
from yolov5 import YOLOv5

# Initialize YOLOv5 model with COCO-trained weights (yolov5s.pt)
model = YOLOv5('yolov5s.pt', device='cuda' if torch.cuda.is_available() else 'cpu')

# Initialize Video Capture
video_path = '/content/4261446-uhd_3840_2160_25fps.mp4'  # Replace with your video file or use 0 for webcam
cap = cv2.VideoCapture(video_path)

# Check if the video capture is opened correctly
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# ----- Video Stabilization Preparation -----
# Read the first frame
_, prev_frame = cap.read()
prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)

# Initialize ORB detector for finding keypoints and descriptors
orb = cv2.ORB_create()

# Get keypoints and descriptors for the first frame
kp1, des1 = orb.detectAndCompute(prev_gray, None)

# Define transformation matrix storage
transforms = []

# ----- Motion Tracking Preparation -----
# Parameters for Lucas-Kanade optical flow
lk_params = dict(winSize=(15, 15), maxLevel=2,
                 criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))

# Points for motion tracking
old_gray = prev_gray.copy()
p0 = cv2.goodFeaturesToTrack(old_gray, maxCorners=100, qualityLevel=0.3, minDistance=7, blockSize=7)

# Loop through each frame in the video
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video reached or no frames captured.")
        break

    # ----- Video Stabilization -----
    # Convert current frame to grayscale
    curr_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Find keypoints and descriptors for the current frame
    kp2, des2 = orb.detectAndCompute(curr_gray, None)

    # Match descriptors between frames using BFMatcher
    bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
    matches = bf.match(des1, des2)

    # Sort matches based on distance (lower distance is better)
    matches = sorted(matches, key=lambda x: x.distance)

    # Extract the matched keypoints
    pts_prev = np.float32([kp1[m.queryIdx].pt for m in matches]).reshape(-1, 1, 2)
    pts_curr = np.float32([kp2[m.trainIdx].pt for m in matches]).reshape(-1, 1, 2)

    # Find the affine transformation matrix based on matched points
    transform, _ = cv2.estimateAffinePartial2D(pts_prev, pts_curr)

    # Store the transformation
    transforms.append(transform)

    # Apply the transformation to stabilize the frame
    frame_stabilized = cv2.warpAffine(frame, transform, (frame.shape[1], frame.shape[0]))

    # Update previous keypoints and descriptors for the next iteration
    prev_gray = curr_gray.copy()
    kp1, des1 = kp2, des2

    # ----- Motion Tracking -----
    p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, curr_gray, p0, None, **lk_params)

    # Select good points
    good_new = p1[st == 1]
    good_old = p0[st == 1]

    # Draw the tracks for motion tracking
    for i, (new, old) in enumerate(zip(good_new, good_old)):
        a, b = new.ravel()
        c, d = old.ravel()

        # Convert float points to integer points
        a, b, c, d = int(a), int(b), int(c), int(d)

        # Draw the line representing motion tracking
        frame_stabilized = cv2.line(frame_stabilized, (a, b), (c, d), (0, 255, 0), 2)

    old_gray = curr_gray.copy()
    p0 = good_new.reshape(-1, 1, 2)

    # ----- Object Detection with YOLOv5 -----
    results = model.predict(frame_stabilized)
    detections = results.xyxy[0]  # Bounding boxes for detected objects

    # Loop through the detections and draw boxes and labels
    for *box, conf, cls in detections:
        x1, y1, x2, y2 = map(int, box)
        confidence = float(conf)
        class_id = int(cls)
        label = model.model.names[class_id]  # Get the label for the detected class

        # Draw bounding boxes and put the label with confidence score on the frame
        cv2.rectangle(frame_stabilized, (x1, y1), (x2, y2), (0, 255, 0), 2)
        label_text = f"{label}: {confidence:.2f}"
        cv2.putText(frame_stabilized, label_text, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)


    # Display the frame with detected objects, motion tracking, and stabilization
    cv2_imshow(frame_stabilized)

    # Press 'q' to exit the video loop (you can skip this in Google Colab)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture object
cap.release()
cv2.destroyAllWindows()