In [1]:
import argparse
import cv2
import sys
import numpy as np
import cv2.aruco as aruco
import torch
from torchvision import models, transforms
from PIL import Image
import time

In [21]:
# Parameters
video_path = 'buoy_video.mp4'  # Replace with your video path
output_stabilized = 'stabilized_video3.mp4'  # Output path for stabilized video

## Stabilization

by: https://github.com/krutikabapat/Video-Stabilization-using-OpenCV/blob/master/video_stabilization.py

In [6]:
import numpy as np
import cv2
SMOOTHING_RADIUS = 50

def movingAverage(curve, radius): 
    window_size = 2 * radius + 1
    f = np.ones(window_size) / window_size 
    curve_pad = np.lib.pad(curve, (radius, radius), 'edge') 
    curve_smoothed = np.convolve(curve_pad, f, mode='same') 
    curve_smoothed = curve_smoothed[radius:-radius]
    return curve_smoothed 

def smooth(trajectory): 
    smoothed_trajectory = np.copy(trajectory) 
    for i in range(3):
        smoothed_trajectory[:, i] = movingAverage(trajectory[:, i], radius=SMOOTHING_RADIUS)
    return smoothed_trajectory

def fixBorder(frame):
    s = frame.shape
    T = cv2.getRotationMatrix2D((s[1]/2, s[0]/2), 0, 1.04)
    frame = cv2.warpAffine(frame, T, (s[1], s[0]))
    return frame

# Read input video
cp = cv2.VideoCapture(video_path)

n_frames = int(cp.get(cv2.CAP_PROP_FRAME_COUNT))
print(n_frames)

width = int(cp.get(cv2.CAP_PROP_FRAME_WIDTH)) 
height = int(cp.get(cv2.CAP_PROP_FRAME_HEIGHT))

print("Width:", width)
print("Height:", height)

fps = cp.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out = cv2.VideoWriter(output_stabilized, fourcc, fps, (width, height))

_, prev = cp.read()
prev_gray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
transforms = np.zeros((n_frames - 1, 3), np.float32) 

for i in range(n_frames - 2):
    prev_pts = cv2.goodFeaturesToTrack(prev_gray, maxCorners=200, qualityLevel=0.01, minDistance=30, blockSize=3)
    succ, curr = cp.read()

    if not succ:
        break

    curr_gray = cv2.cvtColor(curr, cv2.COLOR_BGR2GRAY)
    curr_pts, status, err = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, prev_pts, None)

    idx = np.where(status == 1)[0]
    prev_pts = prev_pts[idx]
    curr_pts = curr_pts[idx]
    assert prev_pts.shape == curr_pts.shape 

    m, inliers = cv2.estimateAffine2D(prev_pts, curr_pts)
    if m is None:
        print("Could not estimate affine transformation")
        continue

    dx = m[0, 2]
    dy = m[1, 2]
    da = np.arctan2(m[1, 0], m[0, 0])
    transforms[i] = [dx, dy, da] 
    prev_gray = curr_gray

trajectory = np.cumsum(transforms, axis=0) 
smoothed_trajectory = smooth(trajectory)
difference = smoothed_trajectory - trajectory
transforms_smooth = transforms + difference

cp.set(cv2.CAP_PROP_POS_FRAMES, 0) 
for i in range(n_frames - 2):
    success, frame = cp.read() 
    if not success:
        break

    dx = transforms_smooth[i, 0]
    dy = transforms_smooth[i, 1]
    da = transforms_smooth[i, 2]

    m = np.zeros((2, 3), np.float32)
    m[0, 0] = np.cos(da)
    m[0, 1] = -np.sin(da)
    m[1, 0] = np.sin(da)
    m[1, 1] = np.cos(da)
    m[0, 2] = dx
    m[1, 2] = dy

    frame_stabilized = cv2.warpAffine(frame, m, (width, height))
    frame_stabilized = fixBorder(frame_stabilized) 

    frame_out = cv2.hconcat([frame, frame_stabilized])
    
    if frame_out.shape[1] > 1920: 
        frame_out = cv2.resize(frame_out, (int(frame_out.shape[1] / 2), int(frame_out.shape[0] / 2)))
    
    cv2.imshow("Before and After", frame_out)
    cv2.waitKey(10)
    out.write(frame_stabilized)

cp.release()
out.release()
cv2.destroyAllWindows()



430
Width: 1440
Height: 1080


OpenCV: FFMPEG: tag 0x47504a4d/'MJPG' is not supported with codec id 7 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


In [7]:
import numpy as np
import cv2

SMOOTHING_RADIUS = 50

def movingAverage(curve, radius): 
    window_size = 2 * radius + 1
    f = np.ones(window_size) / window_size 
    curve_pad = np.lib.pad(curve, (radius, radius), 'edge') 
    curve_smoothed = np.convolve(curve_pad, f, mode='same') 
    curve_smoothed = curve_smoothed[radius:-radius]
    return curve_smoothed 

def smooth(trajectory): 
    smoothed_trajectory = np.copy(trajectory) 
    for i in range(3):
        smoothed_trajectory[:, i] = movingAverage(trajectory[:, i], radius=SMOOTHING_RADIUS)
    return smoothed_trajectory

def fixBorder(frame):
    s = frame.shape
    T = cv2.getRotationMatrix2D((s[1]/2, s[0]/2), 0, 1.04)
    frame = cv2.warpAffine(frame, T, (s[1], s[0]))
    return frame

def detect_horizon(frame):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLines(edges, 1, np.pi / 180, 150)
    if lines is not None:
        for rho, theta in lines[:, 0]:
            if 0.9 < theta < 2.2:  # Only keep lines close to horizontal
                return theta
    return None

# Read input video
cp = cv2.VideoCapture(video_path)

n_frames = int(cp.get(cv2.CAP_PROP_FRAME_COUNT))
print(n_frames)

width = int(cp.get(cv2.CAP_PROP_FRAME_WIDTH)) 
height = int(cp.get(cv2.CAP_PROP_FRAME_HEIGHT))

print("Width:", width)
print("Height:", height)

fps = cp.get(cv2.CAP_PROP_FPS)
fourcc = cv2.VideoWriter_fourcc(*'MJPG')
out = cv2.VideoWriter(output_stabilized, fourcc, fps, (width, height))

_, prev = cp.read()
prev_gray = cv2.cvtColor(prev, cv2.COLOR_BGR2GRAY)
transforms = np.zeros((n_frames - 1, 3), np.float32) 

for i in range(n_frames - 2):
    prev_pts = cv2.goodFeaturesToTrack(prev_gray, maxCorners=200, qualityLevel=0.01, minDistance=30, blockSize=3)
    succ, curr = cp.read()

    if not succ:
        break

    curr_gray = cv2.cvtColor(curr, cv2.COLOR_BGR2GRAY)
    curr_pts, status, err = cv2.calcOpticalFlowPyrLK(prev_gray, curr_gray, prev_pts, None)

    idx = np.where(status == 1)[0]
    prev_pts = prev_pts[idx]
    curr_pts = curr_pts[idx]
    assert prev_pts.shape == curr_pts.shape 

    m, inliers = cv2.estimateAffine2D(prev_pts, curr_pts)
    if m is None:
        print("Could not estimate affine transformation")
        continue

    dx = m[0, 2]
    dy = m[1, 2]
    da = np.arctan2(m[1, 0], m[0, 0])
    
    # Horizon-based correction
    horizon_angle = detect_horizon(curr)
    if horizon_angle is not None:
        correction_angle = horizon_angle - np.pi / 2
        da += correction_angle  # Adjust rotation based on horizon

    transforms[i] = [dx, dy, da] 
    prev_gray = curr_gray

trajectory = np.cumsum(transforms, axis=0) 
smoothed_trajectory = smooth(trajectory)
difference = smoothed_trajectory - trajectory
transforms_smooth = transforms + difference

cp.set(cv2.CAP_PROP_POS_FRAMES, 0) 
for i in range(n_frames - 2):
    success, frame = cp.read() 
    if not success:
        break

    dx = transforms_smooth[i, 0]
    dy = transforms_smooth[i, 1]
    da = transforms_smooth[i, 2]

    m = np.zeros((2, 3), np.float32)
    m[0, 0] = np.cos(da)
    m[0, 1] = -np.sin(da)
    m[1, 0] = np.sin(da)
    m[1, 1] = np.cos(da)
    m[0, 2] = dx
    m[1, 2] = dy

    frame_stabilized = cv2.warpAffine(frame, m, (width, height))
    frame_stabilized = fixBorder(frame_stabilized) 

    frame_out = cv2.hconcat([frame, frame_stabilized])
    
    if frame_out.shape[1] > 1920: 
        frame_out = cv2.resize(frame_out, (int(frame_out.shape[1] / 2), int(frame_out.shape[0] / 2)))
    
    cv2.imshow("Before and After", frame_out)
    cv2.waitKey(10)
    out.write(frame_stabilized)

cp.release()
out.release()
cv2.destroyWindow("Before and After")
cv2.destroyAllWindows()


432
Width: 1440
Height: 1080


OpenCV: FFMPEG: tag 0x47504a4d/'MJPG' is not supported with codec id 7 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


: 

In [14]:
def detect_horizon_and_draw(frame):
    # Convert the frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    # Use Canny edge detection
    edges = cv2.Canny(gray, 10, 100, apertureSize=7)
    # Use Hough line transform to detect lines
    lines = cv2.HoughLines(edges, 1, np.pi / 180, 150)
    
    horizon_angle = None
    if lines is not None:
        for rho, theta in lines[:, 0]:
            # Only consider lines close to horizontal (angle between 0.9 and 2.2 radians)
            if 0.9 < theta < 2.2:
                horizon_angle = theta
                # Draw the detected line on the frame
                a = np.cos(theta)
                b = np.sin(theta)
                x0 = a * rho
                y0 = b * rho
                x1 = int(x0 + 1000 * (-b))
                y1 = int(y0 + 1000 * a)
                x2 = int(x0 - 1000 * (-b))
                y2 = int(y0 - 1000 * a)
                cv2.line(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                break  # Use the first detected horizon line

    return frame, horizon_angle

def stabilize_video(video_path, output_path):
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fourcc = cv2.VideoWriter_fourcc(*'MJPG')
    out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Detect the horizon and draw it on the frame
        frame_with_horizon, horizon_angle = detect_horizon_and_draw(frame)
        
        # If a horizon angle is detected, use it to correct the frame's rotation
        if horizon_angle is not None:
            # Calculate the rotation needed to align the horizon
            correction_angle = (horizon_angle - np.pi / 2) * (180 / np.pi)
            # Get the rotation matrix
            center = (width // 2, height // 2)
            rotation_matrix = cv2.getRotationMatrix2D(center, correction_angle, 1)
            # Rotate the frame to stabilize the horizon
            frame_with_horizon = cv2.warpAffine(frame_with_horizon, rotation_matrix, (width, height))

        # Write the stabilized frame to the output video
        out.write(frame_with_horizon)
        
        # Display the frame
        frame_out = cv2.hconcat([frame, frame_with_horizon])
    
        if frame_out.shape[1] > 1920: 
            frame_out = cv2.resize(frame_out, (int(frame_out.shape[1] / 2), int(frame_out.shape[0] / 2)))
        
        cv2.imshow("Before and After", frame_out)

        # cv2.imshow("Stabilized Video with Horizon", frame_with_horizon)
        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    out.release()
    cv2.destroyWindow("Before and After")

stabilize_video(video_path, output_stabilized)


OpenCV: FFMPEG: tag 0x47504a4d/'MJPG' is not supported with codec id 7 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


KeyboardInterrupt: 

In [23]:
import cv2
import numpy as np

def detect_horizon_line(image_grayscaled):
    """Detect the horizon's starting and ending points in the given image.

    The horizon line is detected by applying Otsu's threshold method to
    separate the sky from the remainder of the image.
    """
    msg = ('`image_grayscaled` should be a grayscale, 2-dimensional image '
           'of shape (height, width).')
    assert image_grayscaled.ndim == 2, msg

    # Apply Gaussian blur to smooth the image
    image_blurred = cv2.GaussianBlur(image_grayscaled, ksize=(3, 3), sigmaX=0)
    
    # Apply Otsu's thresholding
    _, image_thresholded = cv2.threshold(
        image_blurred, thresh=0, maxval=1,
        type=cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )
    image_thresholded = image_thresholded - 1
    
    # Apply morphological closing to fill gaps
    image_closed = cv2.morphologyEx(image_thresholded, cv2.MORPH_CLOSE,
                                    kernel=np.ones((9, 9), np.uint8))
    
    horizon_x1 = 0
    horizon_x2 = image_grayscaled.shape[1] - 1
    # Find the maximum y-coordinate where the horizon line is detected
    horizon_y1 = max(np.where(image_closed[:, horizon_x1] == 0)[0])
    horizon_y2 = max(np.where(image_closed[:, horizon_x2] == 0)[0])

    return horizon_x1, horizon_x2, horizon_y1, horizon_y2

# Load the video
# video_path = 'path_to_your_video.mp4'  # Replace with your video path
cap = cv2.VideoCapture(video_path)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to grayscale
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect the horizon line
    try:
        horizon_x1, horizon_x2, horizon_y1, horizon_y2 = detect_horizon_line(gray_frame)
        
        # Draw the horizon line on the frame
        frame_with_horizon = frame.copy()
        cv2.line(frame_with_horizon, (horizon_x1, horizon_y1), (horizon_x2, horizon_y2), (0, 0, 255), 2)

        # Concatenate the original frame and the frame with the horizon line
        combined_frame = cv2.hconcat([frame, frame_with_horizon])

        # Display the combined frame
        cv2.imshow("Before and After", combined_frame)
    except Exception as e:
        print("Error detecting horizon:", e)
        continue

    # Break on 'q' key press
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()


In [22]:
import cv2
import numpy as np

def detect_horizon_line_combined(image_grayscaled):
    # Step 1: Apply Gaussian blur and Otsu's thresholding
    image_blurred = cv2.GaussianBlur(image_grayscaled, (5, 5), 0)
    _, image_thresholded = cv2.threshold(
        image_blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
    )
    
    # Step 2: Define a region of interest (ROI) for Hough Line Transform
    height, width = image_grayscaled.shape
    roi = image_grayscaled[:height // 2, :]  # Focus on the upper half of the image
    
    # Detect edges using Canny in the ROI
    edges = cv2.Canny(roi, 50, 150)
    
    # Step 3: Use the Hough Line Transform in the ROI
    lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=80, minLineLength=100, maxLineGap=50)
    
    horizon_y_candidates = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            if abs(y2 - y1) < 10:  # Consider only nearly horizontal lines
                horizon_y_candidates.append((y1 + y2) // 2)
    
    # Step 4: Calculate the Otsu-based horizon line position
    horizon_y_otsu = max(np.where(image_thresholded[height // 2:, :] == 0)[0]) + height // 2

    # Step 5: Combine the methods
    if horizon_y_candidates:
        horizon_y_hough = int(np.mean(horizon_y_candidates))
        horizon_y = int(0.5 * horizon_y_hough + 0.5 * horizon_y_otsu)
    else:
        horizon_y = horizon_y_otsu

    horizon_x1 = 0
    horizon_x2 = width - 1

    return horizon_x1, horizon_x2, horizon_y, horizon_y

# Load the video or image
# video_path = 'path_to_your_video.mp4'  # Replace with your video path
cap = cv2.VideoCapture('buoy_video.mp4')

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to grayscale
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect the horizon line
    try:
        horizon_x1, horizon_x2, horizon_y1, horizon_y2 = detect_horizon_line_combined(gray_frame)
        
        # Draw the horizon line on the frame
        frame_with_horizon = frame.copy()
        cv2.line(frame_with_horizon, (horizon_x1, horizon_y1), (horizon_x2, horizon_y2), (0, 0, 255), 2)

        # Concatenate the original frame and the frame with the horizon line
        combined_frame = cv2.hconcat([frame, frame_with_horizon])

        # Display the combined frame
        cv2.imshow("Original and Horizon Line", combined_frame)
    except Exception as e:
        print("Error detecting horizon:", e)
        continue

    # Break on 'q' key press
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()



KeyboardInterrupt: 

In [27]:
import cv2
import numpy as np

def detect_horizon_using_gradients(image_grayscaled):
    """Detect the horizon line using gradient information."""
    # Step 1: Apply a Gaussian blur to reduce noise
    image_blurred = cv2.GaussianBlur(image_grayscaled, (5, 5), 0)

    # Step 2: Compute the vertical gradient using the Sobel operator
    sobel_y = cv2.Sobel(image_blurred, cv2.CV_64F, 0, 1, ksize=3)
    sobel_y = np.abs(sobel_y)  # Take the absolute value to focus on intensity

    # Step 3: Sum the gradient magnitudes across each row
    gradient_sums = np.sum(sobel_y, axis=1)

    # Step 4: Find the row with the maximum gradient change in the upper half of the image
    height = image_grayscaled.shape[0]
    upper_half = gradient_sums[:height // 2]
    horizon_y = np.argmax(upper_half)

    return horizon_y

# Load the video or image
video_path = 'buoy_video.mp4'  # Replace with your video path
cap = cv2.VideoCapture(video_path)

# Initialize variables for smoothing
horizon_y_smoothed = None
alpha = 0.9  # Smoothing factor (0 < alpha < 1)

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Convert the frame to grayscale
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Detect the horizon line
    try:
        horizon_y_current = detect_horizon_using_gradients(gray_frame)

        # Apply exponential smoothing to make the horizon line smoother
        if horizon_y_smoothed is None:
            horizon_y_smoothed = horizon_y_current  # Initialize on the first frame
        else:
            horizon_y_smoothed = alpha * horizon_y_smoothed + (1 - alpha) * horizon_y_current

        # Draw the smoothed horizon line on the frame
        horizon_x1 = 0
        horizon_x2 = frame.shape[1] - 1
        horizon_y_smoothed_int = int(horizon_y_smoothed)

        frame_with_horizon = frame.copy()
        cv2.line(frame_with_horizon, (horizon_x1, horizon_y_smoothed_int), (horizon_x2, horizon_y_smoothed_int), (0, 0, 255), 2)

        # Concatenate the original frame and the frame with the horizon line
        combined_frame = cv2.hconcat([frame, frame_with_horizon])

        # Display the combined frame
        cv2.imshow("Original and Smoothed Horizon Line", combined_frame)
    except Exception as e:
        print("Error detecting horizon:", e)
        continue

    # Break on 'q' key press
    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

# Release the video capture and close all windows
cap.release()
cv2.destroyAllWindows()



## Select Position

In [3]:
def select_initial_position(video_path):
    """
    Plays the first few frames of the video slowly. When the user presses 's',
    the video pauses, and they can select the initial position of the buoy.
    Returns the (x, y) coordinates of the selected point and the frame number.
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return None, None

    initial_position = None
    frame_number = 0

    # Callback function to capture mouse click
    def mouse_callback(event, x, y, flags, param):
        nonlocal initial_position
        if event == cv2.EVENT_LBUTTONDOWN:
            initial_position = (x, y)
            print(f"Initial position selected at: {initial_position} in frame {frame_number}")
            cv2.destroyWindow("Select Initial Position")  # Close the window after selection

    # Set up window and callback
    cv2.namedWindow("Select Initial Position")
    cv2.setMouseCallback("Select Initial Position", mouse_callback)

    frame_delay = 500  # Delay in milliseconds to slow down frames

    while True:
        ret, frame = cap.read()
        if not ret:
            print("Reached the end of the video or encountered an error.")
            break

        frame_number = int(cap.get(cv2.CAP_PROP_POS_FRAMES))  # Current frame number
        cv2.imshow("Select Initial Position", frame)
        key = cv2.waitKey(frame_delay) & 0xFF

        if key == ord('s'):  # Press 's' to select initial position
            print("Press 's' detected. Click on the frame to select the initial position.")
            while initial_position is None:
                cv2.waitKey(1)
            break
        elif key == ord('q'):  # Press 'q' to quit
            print("Selection canceled.")
            break

    cap.release()
    return initial_position, frame_number

def select_roi(frame, initial_position):
    """Zooms into the selected area and allows the user to select the ROI."""
    x, y = initial_position
    zoom_scale = 2.0  # Scale factor for zooming
    h, w = frame.shape[:2]

    # Define the zoomed-in area
    start_x = max(0, int(x - w / (2 * zoom_scale)))
    start_y = max(0, int(y - h / (2 * zoom_scale)))
    end_x = min(frame.shape[1], int(x + w / (2 * zoom_scale)))
    end_y = min(frame.shape[0], int(y + h / (2 * zoom_scale)))

    # Crop and resize the frame for zoom effect
    zoomed_frame = frame[start_y:end_y, start_x:end_x]
    zoomed_frame = cv2.resize(zoomed_frame, (w, h))

    # Show the zoomed-in frame and allow ROI selection
    roi = cv2.selectROI("Select ROI", zoomed_frame, fromCenter=False, showCrosshair=True)
    cv2.destroyWindow("Select ROI")

    # Calculate the bounding box in the original frame
    roi_x, roi_y, roi_w, roi_h = roi
    original_bbox = (start_x + roi_x, start_y + roi_y, roi_w, roi_h)
    
    return original_bbox

## Point tracking

In [2]:
import argparse
import cv2
import sys
import numpy as np
import cv2.aruco as aruco
import torch
from torchvision import models, transforms
from PIL import Image
import time

def apply_motion_model(prev_gray_frame, curr_gray_frame):
    """Estimate motion between two grayscale frames."""
    flow = cv2.calcOpticalFlowFarneback(prev_gray_frame, curr_gray_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)
    dx, dy = np.median(flow[..., 0]), np.median(flow[..., 1])
    return dx, dy

def apply_background_subtraction(frame, background):
    """Applies background subtraction to reduce wave motion."""
    frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    background_gray = cv2.cvtColor(background, cv2.COLOR_BGR2GRAY)
    diff = cv2.absdiff(background_gray, frame_gray)
    _, thresh = cv2.threshold(diff, 50, 255, cv2.THRESH_BINARY) #30 255
    return diff

In [64]:
(major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.')
def set_CSRT_Params():
    # Don't modify
    default_params = {
        'padding': 3.,
        'template_size': 200.,
        'gsl_sigma': 1.,
        'hog_orientations': 9.,
        'num_hog_channels_used': 18,
        'hog_clip': 2.0000000298023224e-01,
        'use_hog': 1,
        'use_color_names': 1,
        'use_gray': 1,
        'use_rgb': 0,
        'window_function': 'hann',
        'kaiser_alpha': 3.7500000000000000e+00,
        'cheb_attenuation': 45.,
        'filter_lr': 1.9999999552965164e-02,
        'admm_iterations': 4,
        'number_of_scales': 100,
        'scale_sigma_factor': 0.25,
        'scale_model_max_area': 512.,
        'scale_lr': 2.5000000372529030e-02,
        'scale_step': 1.02,
        'use_channel_weights': 1,
        'weights_lr': 1.9999999552965164e-02,
        'use_segmentation': 1,
        'histogram_bins': 16,
        'background_ratio': 2,
        'histogram_lr': 3.9999999105930328e-02,
        'psr_threshold': 3.5000000149011612e-02,
    }
    # modify
    params = {
        # 'scale_lr': 0.5,
        'number_of_scales': 33,
        'scale_step': 1.05,
        # 'filter_lr': 0.01,
        # 'weights_lr': 0.01,
        'padding': 1.5,
        'psr_threshold': 0.05,
        'use_channel_weights': 0

    }
    params = {**default_params, **params}
    tracker = None
    if int(major_ver) == 3 and 3 <= int(minor_ver) <= 4:
        import json
        import os
        with open('tmp.json', 'w') as fid:
            json.dump(params, fid)
        fs_settings = cv2.FileStorage("tmp.json", cv2.FILE_STORAGE_READ)
        tracker = cv2.TrackerCSRT_create()
        tracker.read(fs_settings.root())
        os.remove('tmp.json')
    elif int(major_ver) >= 4:
        param_handler = cv2.TrackerCSRT_Params()
        for key, val in params.items():
            setattr(param_handler, key, val)
        tracker = cv2.TrackerCSRT_create(param_handler)
    else:
        print("Cannot set parameters, using defaults")
        tracker = cv2.TrackerCSRT_create()
    return tracker

In [65]:
import cv2
import numpy as np
import depthCalculation
import importlib
importlib.reload(depthCalculation)

# Parameters
video_path = 'stabilized_video.mp4'  # Path to the already stabilized video
initial_position, frame_number = (611, 491),  23  # select_initial_position(video_path)

# Initialize video capture using the stabilized video
cap = cv2.VideoCapture(video_path)

# Set the position to the specified frame number
cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)

# Use the first stabilized frame as the background
success, background = cap.read()
if not success:
    print("Failed to read the background frame.")
    cap.release()
    exit()

# Allow user to select the ROI based on the initial position
init_bbox = (593, 476, 37, 17)  # Predefined initial bounding box
# tracker = cv2.TrackerCSRT_create()
tracker = set_CSRT_Params()
tracker.init(background, init_bbox)

# Initialize variables for motion compensation
prev_gray_frame = cv2.cvtColor(background, cv2.COLOR_BGR2GRAY)
is_paused = False  # Control pausing
frame_count = frame_number

# Function to adjust the bounding box for a search area
def adjust_bbox(bbox, scale=1.5):
    (x, y, w, h) = [int(v) for v in bbox]
    print("bbox", bbox)
    print(x,y,w,h)
    # Expand the bounding box by the scale factor
    return (int(x - w * (scale - 1) / 2), int(y - h * (scale - 1) / 2),
            int(w * scale), int(h * scale))

# Begin tracking loop
while True:
    if not is_paused:
        # Read the next frame from the video
        ret, frame = cap.read()
        frame_count += 1
        if not ret:
            break  # Exit if no frames are left

        # If the buoy is lost and outside the frames to be skipped
        if (frame_count >= 45 and frame_count <= 58) or (frame_count >= 138 and frame_count <= 145):
            success = False  # Mark as lost but do not search again

        # Update the tracker
        success, bbox = tracker.update(frame)

        # if not success:
        #     # If tracking fails and it's not within frames 45-58 or 138-145
        #     if (frame_count < 45 or frame_count > 58) and (frame_count < 138 or frame_count > 145):
        #         print("LOST")
        #         search_bbox = adjust_bbox(init_bbox, scale=5)  # Expand search area
        #         cv2.rectangle(frame, (search_bbox[0], search_bbox[1]),
        #                       (search_bbox[0] + search_bbox[2], search_bbox[1] + search_bbox[3]),
        #                       (255, 0, 0), 2)  # Draw search area rectangle
        #         print(search_bbox)
                # Here you can implement additional logic to search within search_bbox if desired
                # bbox = cv2.selectROI("Select ROI", frame, fromCenter=False, showCrosshair=True)
                # tracker.init(frame, bbox)  # Reinitialize the tracker

        # Convert the current frame to grayscale for motion model
        curr_gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

        # Estimate motion using motion model
        dx, dy = apply_motion_model(prev_gray_frame, curr_gray_frame)
        motion_compensated_frame = cv2.warpAffine(frame, np.float32([[1, 0, -dx], [0, 1, -dy]]), 
                                                                   (frame.shape[1], frame.shape[0]))  # Apply motion compensation

        # Update the tracker
        success, bbox = tracker.update(motion_compensated_frame)

        if success:
            (x, y, w, h) = [int(v) for v in bbox]
            # Dark gray rectangle (2 pixels)
            cv2.rectangle(frame, (x, y), (x + w, y + h), (200, 65, 55), 2)
            # Create a light gray textbox for the distance text
            x_coor = x + w // 2
            y_coor = y + h // 2
            distance_buoy = depthCalculation.detect_horizontal_lines_in_video(frame, x_coor, y_coor)

            text = "Distance " + str(round(distance_buoy,4)) + "m"
            text_size = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)[0]
            text_x = x
            text_y = y - 10
            cv2.rectangle(frame, (text_x, text_y - text_size[1] - 5), 
                             (text_x + text_size[0], text_y + 5), (200, 200, 200), -1)
            cv2.putText(frame, text, (text_x, text_y), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (50, 50, 50), 1)
        else:
            # When lost, do not show bounding box or text during frames 45-58
            # if not (frame_count >= 45 and frame_count <= 58):
            cv2.putText(frame, "Buoy Lost", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)

        # Dark gray frame count text with prefix
        frame_count_text = f"Frame {frame_count}"
        cv2.putText(frame, frame_count_text, (50, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (50, 50, 50), 2)

        # Show the frame
        cv2.imshow("Motion free frame", motion_compensated_frame)
        cv2.imshow("Buoy Tracking", frame)

        # Update the previous gray frame
        prev_gray_frame = curr_gray_frame

    # Check for key presses
    key = cv2.waitKey(1) & 0xFF
    if key == ord('q'):
        break
    elif key == ord(' '):  # Space bar to pause/resume
        is_paused = not is_paused  # Toggle pause

# Release video capture and close windows
cap.release()
cv2.destroyAllWindows()

