In [1]:
import cv2
import numpy as np

# Input video file path
video_path = r"D:\Sem-4\Project\Datasets\VISEM-Tracking - Datasets from Paper 2\VISEM_Tracking_Train_v4\Train\11\11.mp4"

# Load the video
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Cannot open video.")
    exit()

# Background subtractor with adjusted parameters
bg_subtractor = cv2.createBackgroundSubtractorMOG2(history=500, varThreshold=25)

# Kernel for morphological operations
kernel = np.ones((3, 3), np.uint8)

# Scaling factor to resize the video frames
scale_factor = 0.5

# Overlay text
font = cv2.FONT_HERSHEY_SIMPLEX
org_text = (10, 30)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video.")
        break

    # Resize the frame
    frame_resized = cv2.resize(frame, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_AREA)

    # 1. Grayscale
    gray = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2GRAY)

    # 2. Background Subtraction
    foreground_mask = bg_subtractor.apply(frame_resized)

    # 3. Noise Removal
    # Morphological operations to clean the mask
    foreground_mask_cleaned = cv2.morphologyEx(foreground_mask, cv2.MORPH_OPEN, kernel)
    foreground_mask_cleaned = cv2.morphologyEx(foreground_mask_cleaned, cv2.MORPH_CLOSE, kernel)

    # Smooth the mask using Gaussian blur
    foreground_mask_blurred = cv2.GaussianBlur(foreground_mask_cleaned, (5, 5), 0)

    # Apply thresholding to refine the mask
    _, foreground_mask_final = cv2.threshold(foreground_mask_blurred, 127, 255, cv2.THRESH_BINARY)

    # 4. Thresholding on grayscale image (unchanged)
    _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

    # 5. Morphological Operations (unchanged)
    opened = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)  # Opening
    closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)  # Closing

    # 6. Edge Detection
    edges = cv2.Canny(gray, 50, 150)

    # 7. Blurring
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # 8. Contour Detection
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour_frame = frame_resized.copy()
    cv2.drawContours(contour_frame, contours, -1, (0, 255, 0), 2)

    # Combine all images into a single visualization with labels
    top_row = np.hstack([
        cv2.putText(frame_resized.copy(), 'Original', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
        cv2.putText(cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR), 'Grayscale', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
        cv2.putText(cv2.cvtColor(foreground_mask_final, cv2.COLOR_GRAY2BGR), 'Foreground', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA)
    ])
    middle_row = np.hstack([
        cv2.putText(cv2.cvtColor(thresh, cv2.COLOR_GRAY2BGR), 'Threshold', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
        cv2.putText(cv2.cvtColor(opened, cv2.COLOR_GRAY2BGR), 'Opened', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
        cv2.putText(cv2.cvtColor(closed, cv2.COLOR_GRAY2BGR), 'Closed', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA)
    ])
    bottom_row = np.hstack([
        cv2.putText(cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR), 'Edges', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
        cv2.putText(cv2.cvtColor(blurred, cv2.COLOR_GRAY2BGR), 'Blurred', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
        cv2.putText(contour_frame, 'Contours', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA)
    ])

    # Stack the combined images
    combined = np.vstack([top_row, middle_row, bottom_row])

    # Display the combined visualization with file name and quit instructions
    file_name = video_path.split("\\")[-1]
    cv2.putText(combined, f'File: {file_name}', (10, 50), font, 0.6, (255, 255, 255), 1, cv2.LINE_AA)
    cv2.putText(combined, 'Press "q" to quit', (10, 80), font, 0.6, (255, 255, 255), 1, cv2.LINE_AA)

    cv2.imshow('Classic Methods Visualization', combined)

    # Exit if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Properly release the video and close the window
cap.release()
cv2.destroyAllWindows()


In [2]:
import cv2
import numpy as np

# Input video file path
video_path = r"D:\Sem-4\Project\Datasets\VISEM-Tracking - Datasets from Paper 2\VISEM_Tracking_Train_v4\Train\11\11.mp4"

# Load the video
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Cannot open video.")
    exit()

# Background subtractor
bg_subtractor = cv2.createBackgroundSubtractorMOG2()

# Kernel for morphological operations
kernel = np.ones((3, 3), np.uint8)

# Scaling factor to resize the video frames
scale_factor = 0.5

# Overlay text
font = cv2.FONT_HERSHEY_SIMPLEX
org_text = (10, 30)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video.")
        break

    # Resize the frame
    frame_resized = cv2.resize(frame, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_AREA)

    # 1. Grayscale
    gray = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2GRAY)

    # 2. Background Subtraction
    foreground_mask = bg_subtractor.apply(frame_resized)

    # 3. Thresholding
    _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

    # 4. Morphological Operations
    opened = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)  # Opening
    closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)  # Closing

    # 5. Edge Detection
    edges = cv2.Canny(gray, 50, 150)

    # 6. Blurring
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # 7. Contour Detection
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour_frame = frame_resized.copy()
    cv2.drawContours(contour_frame, contours, -1, (0, 255, 0), 2)

    # Combine all images into a single visualization with labels
    top_row = np.hstack([
        cv2.putText(frame_resized.copy(), 'Original', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
        cv2.putText(cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR), 'Grayscale', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
        cv2.putText(cv2.cvtColor(foreground_mask, cv2.COLOR_GRAY2BGR), 'Foreground', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA)
    ])
    middle_row = np.hstack([
        cv2.putText(cv2.cvtColor(thresh, cv2.COLOR_GRAY2BGR), 'Threshold', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
        cv2.putText(cv2.cvtColor(opened, cv2.COLOR_GRAY2BGR), 'Opened', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
        cv2.putText(cv2.cvtColor(closed, cv2.COLOR_GRAY2BGR), 'Closed', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA)
    ])
    bottom_row = np.hstack([
        cv2.putText(cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR), 'Edges', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
        cv2.putText(cv2.cvtColor(blurred, cv2.COLOR_GRAY2BGR), 'Blurred', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
        cv2.putText(contour_frame, 'Contours', (10, 30), font, 0.6, (255, 255, 255), 2, cv2.LINE_AA)
    ])

    # Stack the combined images
    combined = np.vstack([top_row, middle_row, bottom_row])

    # Display the combined visualization with file name and quit instructions
    file_name = video_path.split("\\")[-1]
    cv2.putText(combined, f'File: {file_name}', (10, 50), font, 0.6, (255, 255, 255), 1, cv2.LINE_AA)
    cv2.putText(combined, 'Press "q" to quit', (10, 80), font, 0.6, (255, 255, 255), 1, cv2.LINE_AA)

    cv2.imshow('Classic Methods Visualization', combined)

    # Exit if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Properly release the video and close the window
cap.release()
cv2.destroyAllWindows()

In [3]:
import cv2
import numpy as np

# Note: Removed imports for `torch` and `torch.nn` as they are not available in the current environment.

# Input video file path
video_path = r"D:\\Sem-4\\Project\\Datasets\\VISEM-Tracking - Datasets from Paper 2\\VISEM_Tracking_Train_v4\\Train\\11\\11.mp4"

# Load the video
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Cannot open video.")
    exit(1)  # Provide an exit code

# Background subtractor
bg_subtractor = cv2.createBackgroundSubtractorMOG2()

# Kernel for morphological operations
kernel = np.ones((3, 3), np.uint8)

# Scaling factor to resize the video frames
scale_factor = 0.5

# Overlay text
font = cv2.FONT_HERSHEY_SIMPLEX
org_text = (10, 30)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        print("End of video.")
        break

    # Resize the frame
    frame_resized = cv2.resize(frame, None, fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_AREA)

    # 1. Grayscale
    gray = cv2.cvtColor(frame_resized, cv2.COLOR_BGR2GRAY)

    # 2. Background Subtraction
    foreground_mask = bg_subtractor.apply(frame_resized)

    # 3. Thresholding
    _, thresh = cv2.threshold(gray, 127, 255, cv2.THRESH_BINARY)

    # 4. Morphological Operations
    opened = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)  # Opening
    closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)  # Closing

    # 5. Edge Detection
    edges = cv2.Canny(gray, 50, 150)

    # 6. Blurring
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)

    # 7. Contour Detection
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contour_frame = frame_resized.copy()
    cv2.drawContours(contour_frame, contours, -1, (0, 255, 0), 2)

    # Visualize contours with IDs (for debugging)
    for idx, contour in enumerate(contours):
        x, y, w, h = cv2.boundingRect(contour)
        cv2.putText(contour_frame, f'ID: {idx}', (x, y - 10), font, 0.5, (255, 0, 0), 1)
        cv2.rectangle(contour_frame, (x, y), (x + w, y + h), (0, 255, 0), 1)

    # Combine all images into a single visualization
    try:
        top_row = np.hstack([
            cv2.putText(frame_resized.copy(), 'Original', org_text, font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
            cv2.putText(cv2.cvtColor(gray, cv2.COLOR_GRAY2BGR), 'Grayscale', org_text, font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
            cv2.putText(cv2.cvtColor(foreground_mask, cv2.COLOR_GRAY2BGR), 'Foreground', org_text, font, 0.6, (255, 255, 255), 2, cv2.LINE_AA)
        ])
        bottom_row = np.hstack([
            cv2.putText(contour_frame, 'Contours', org_text, font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
            cv2.putText(cv2.cvtColor(edges, cv2.COLOR_GRAY2BGR), 'Edges', org_text, font, 0.6, (255, 255, 255), 2, cv2.LINE_AA),
            cv2.putText(cv2.cvtColor(blurred, cv2.COLOR_GRAY2BGR), 'Blurred', org_text, font, 0.6, (255, 255, 255), 2, cv2.LINE_AA)
        ])

        # Stack the combined images
        combined = np.vstack([top_row, bottom_row])

        # Display the combined visualization
        cv2.imshow('SPDConv Visualization', combined)

    except cv2.error as e:
        print("Error occurred while combining visualization: ", e)
        break

    # Exit if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Properly release the video and close the window
cap.release()
cv2.destroyAllWindows()
