In [1]:
import argparse
import cv2
import sys
import numpy as np
import cv2.aruco as aruco
import torch
from torchvision import models, transforms
from PIL import Image
import time

In [5]:
# Parameters
video_path = 'stabilized_video.mp4'  # Replace with your video path
output_stabilized = 'stabilized_video2.mp4'  # Output path for stabilized video

## Stabilization

by: https://github.com/krutikabapat/Video-Stabilization-using-OpenCV/blob/master/video_stabilization.py

In [6]:
import numpy as np
import cv2
SMOOTHING_RADIUS = 50

def movingAverage(curve, radius): 
    window_size = 2 * radius + 1
    f = np.ones(window_size) / window_size 
    curve_pad = np.lib.pad(curve, (radius, radius), 'edge') 
    curve_smoothed = np.convolve(curve_pad, f, mode='same') 
    curve_smoothed = curve_smoothed[radius:-radius]
    return curve_smoothed 

def smooth(trajectory): 
    smoothed_trajectory = np.copy(trajectory) 
    for i in range(3):
        smoothed_trajectory[:, i] = movingAverage(trajectory[:, i], radius=SMOOTHING_RADIUS)
    return smoothed_trajectory

def fixBorder(frame):
    s = frame.shape
    T = cv2.getRotationMatrix2D((s[1]/2, s[0]/2), 0, 1.04)
    frame = cv2.warpAffine(frame, T, (s[1], s[0]))
    return frame

# Read input video
cp = cv2.VideoCapture(video_path)

n_frames = int(cp.get(cv2.CAP_PROP_FRAME_COUNT))
print(n_frames)

width = int(cp.get(cv2.CAP_PROP_FRAME_WIDTH)) 
height = int(cp.get(cv2.CAP_PROP_FRAME_HEIGHT))

print("Width:", width)
print("Height:", height)

fps = cp.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out = cv2.VideoWriter(output_stabilized, fourcc, fps, (width, height))

_, prev = cp.read()
prev_gray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
transforms = np.zeros((n_frames - 1, 3), np.float32) 

for i in range(n_frames - 2):
    prev_pts = cv2.goodFeaturesToTrack(prev_gray, maxCorners=200, qualityLevel=0.01, minDistance=30, blockSize=3)
    succ, curr = cp.read()

    if not succ:
        break

    curr_gray = cv2.cvtColor(curr, cv2.COLOR_BGR2GRAY)
    curr_pts, status, err = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, prev_pts, None)

    idx = np.where(status == 1)[0]
    prev_pts = prev_pts[idx]
    curr_pts = curr_pts[idx]
    assert prev_pts.shape == curr_pts.shape 

    m, inliers = cv2.estimateAffine2D(prev_pts, curr_pts)
    if m is None:
        print("Could not estimate affine transformation")
        continue

    dx = m[0, 2]
    dy = m[1, 2]
    da = np.arctan2(m[1, 0], m[0, 0])
    transforms[i] = [dx, dy, da] 
    prev_gray = curr_gray

trajectory = np.cumsum(transforms, axis=0) 
smoothed_trajectory = smooth(trajectory)
difference = smoothed_trajectory - trajectory
transforms_smooth = transforms + difference

cp.set(cv2.CAP_PROP_POS_FRAMES, 0) 
for i in range(n_frames - 2):
    success, frame = cp.read() 
    if not success:
        break

    dx = transforms_smooth[i, 0]
    dy = transforms_smooth[i, 1]
    da = transforms_smooth[i, 2]

    m = np.zeros((2, 3), np.float32)
    m[0, 0] = np.cos(da)
    m[0, 1] = -np.sin(da)
    m[1, 0] = np.sin(da)
    m[1, 1] = np.cos(da)
    m[0, 2] = dx
    m[1, 2] = dy

    frame_stabilized = cv2.warpAffine(frame, m, (width, height))
    frame_stabilized = fixBorder(frame_stabilized) 

    frame_out = cv2.hconcat([frame, frame_stabilized])
    
    if frame_out.shape[1] > 1920: 
        frame_out = cv2.resize(frame_out, (int(frame_out.shape[1] / 2), int(frame_out.shape[0] / 2)))
    
    cv2.imshow("Before and After", frame_out)
    cv2.waitKey(10)
    out.write(frame_stabilized)

cp.release()
out.release()
cv2.destroyAllWindows()



430
Width: 1440
Height: 1080


OpenCV: FFMPEG: tag 0x47504a4d/'MJPG' is not supported with codec id 7 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


## Select Position

In [3]:
def select_initial_position(video_path):
    """
    Plays the first few frames of the video slowly. When the user presses 's',
    the video pauses, and they can select the initial position of the buoy.
    Returns the (x, y) coordinates of the selected point and the frame number.
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return None, None

    initial_position = None
    frame_number = 0

    # Callback function to capture mouse click
    def mouse_callback(event, x, y, flags, param):
        nonlocal initial_position
        if event == cv2.EVENT_LBUTTONDOWN:
            initial_position = (x, y)
            print(f"Initial position selected at: {initial_position} in frame {frame_number}")
            cv2.destroyWindow("Select Initial Position")  # Close the window after selection

    # Set up window and callback
    cv2.namedWindow("Select Initial Position")
    cv2.setMouseCallback("Select Initial Position", mouse_callback)

    frame_delay = 500  # Delay in milliseconds to slow down frames

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Reached the end of the video or encountered an error.")
            break

        frame_number = int(cap.get(cv2.CAP_PROP_POS_FRAMES))  # Current frame number
        cv2.imshow("Select Initial Position", frame)
        key = cv2.waitKey(frame_delay) & 0xFF

        if key == ord('s'):  # Press 's' to select initial position
            print("Press 's' detected. Click on the frame to select the initial position.")
            while initial_position is None:
                cv2.waitKey(1)
            break
        elif key == ord('q'):  # Press 'q' to quit
            print("Selection canceled.")
            break

    cap.release()
    return initial_position, frame_number

def select_roi(frame, initial_position):
    """Zooms into the selected area and allows the user to select the ROI."""
    x, y = initial_position
    zoom_scale = 2.0  # Scale factor for zooming
    h, w = frame.shape[:2]

    # Define the zoomed-in area
    start_x = max(0, int(x - w / (2 * zoom_scale)))
    start_y = max(0, int(y - h / (2 * zoom_scale)))
    end_x = min(frame.shape[1], int(x + w / (2 * zoom_scale)))
    end_y = min(frame.shape[0], int(y + h / (2 * zoom_scale)))

    # Crop and resize the frame for zoom effect
    zoomed_frame = frame[start_y:end_y, start_x:end_x]
    zoomed_frame = cv2.resize(zoomed_frame, (w, h))

    # Show the zoomed-in frame and allow ROI selection
    roi = cv2.selectROI("Select ROI", zoomed_frame, fromCenter=False, showCrosshair=True)
    cv2.destroyWindow("Select ROI")

    # Calculate the bounding box in the original frame
    roi_x, roi_y, roi_w, roi_h = roi
    original_bbox = (start_x + roi_x, start_y + roi_y, roi_w, roi_h)
    
    return original_bbox

## Point tracking

In [14]:
import argparse
import cv2
import sys
import numpy as np
import cv2.aruco as aruco
import torch
from torchvision import models, transforms
from PIL import Image
import time

def apply_motion_model(prev_gray_frame, curr_gray_frame):
    """Estimate motion between two grayscale frames."""
    flow = cv2.calcOpticalFlowFarneback(prev_gray_frame, curr_gray_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    dx, dy = np.median(flow[..., 0]), np.median(flow[..., 1])
    return dx, dy

def apply_background_subtraction(frame, background):
    """Applies background subtraction to reduce wave motion."""
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    background_gray = cv2.cvtColor(background, cv2.COLOR_BGR2GRAY)
    diff = cv2.absdiff(background_gray, frame_gray)
    _, thresh = cv2.threshold(diff, 50, 255, cv2.THRESH_BINARY) #30 255
    return diff

In [24]:
# Parameters
video_path = 'stabilized_video.mp4'  # Path to the already stabilized video
initial_position, frame_number = (611, 491),  23 # select_initial_position(video_path)

# Initialize video capture using the stabilized video
cap = cv2.VideoCapture(video_path)

# Set the position to the specified frame number
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)

# Use the first stabilized frame as the background
success, background = cap.read()
if not success:
    print("Failed to read the background frame.")
    cap.release()
    exit()

# Allow user to select the ROI based on the initial position
# bbox = select_roi(background, initial_position)
bbox = (593, 476, 37, 17) # cv2.selectROI("Select ROI", background, fromCenter=False, showCrosshair=True)
# bbox = cv2.selectROI("Select ROI", background, fromCenter=False, showCrosshair=True)
# print(bbox)

# Initialize the CSRT tracker with the first stabilized frame
tracker = cv2.TrackerCSRT_create()
tracker.init(background, bbox)

# Initialize variables for motion compensation
prev_gray_frame = cv2.cvtColor(background, cv2.COLOR_BGR2GRAY)
is_paused = False # Control pausing
frame_count = frame_number


# MOG2
MOG2_subtractor = cv2.createBackgroundSubtractorMOG2(detectShadows = True) # exclude shadow areas from the objects you detected
bg_subtractor=MOG2_subtractor

# Begin tracking loop
while True:
    if not is_paused:
        # Read the next frame from the video
        ret, frame = cap.read()
        frame_count+=1
        if not ret:
            break  # Exit if no frames are left
        
        if frame_count == 45:
            success = False
        elif not success:
            bbox = cv2.selectROI("Select ROI", frame, fromCenter=False, showCrosshair=True)
            tracker.init(background, bbox)

        # Convert the current frame to grayscale for motion model
        curr_gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Estimate motion using motion model
        dx, dy = apply_motion_model(prev_gray_frame, curr_gray_frame)
        motion_compensated_frame = cv2.warpAffine(frame, np.float32([[1, 0, -dx], [0, 1, -dy]]), (frame.shape[1], frame.shape[0]))  # Apply motion compensation to the current frame
        motion_free_frame = apply_background_subtraction(motion_compensated_frame, background)  # Apply background subtraction to reduce wave motion

        # foreground_mask = bg_subtractor.apply(frame) 
        # ret , treshold = cv2.threshold(foreground_mask.copy(), 160, 255,cv2.THRESH_BINARY)

        # Update the tracker
        success, bbox = tracker.update(motion_free_frame)

        if success:
            (x, y, w, h) = [int(v) for v in bbox]
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(frame, "Buoy", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        else:
            cv2.putText(frame, "Lost", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        cv2.circle(frame, initial_position, radius=5, color=(0, 0, 255), thickness=-1)
        cv2.putText(frame, str(frame_count), (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        # Show the frame
        cv2.imshow("Motion free frame", motion_free_frame)
        cv2.imshow("Buoy Tracking", frame)

        # Update the previous gray frame
        prev_gray_frame = curr_gray_frame

    # Check for key presses
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break
    elif key == ord(' '):  # Space bar to pause/resume
        is_paused = not is_paused  # Toggle pause

# Release video capture and close windows
cap.release()
cv2.destroyAllWindows()

Select a ROI and then press SPACE or ENTER button!
Cancel the selection process by pressing c button!
Select a ROI and then press SPACE or ENTER button!
Cancel the selection process by pressing c button!
