#### Подключение библиотек

In [None]:
import os
from pathlib import Path
import numpy as np
import cv2 as cv
import time
import math
from skimage import metrics

#### Вспомогательные функции

In [None]:
# Define a smoothing function based on convolution
def curveSmoothing(curve, radius):
    
    # Define filter
    window_size = 2 * radius + 1
    fltr = np.ones(window_size) / window_size
    
    # Add padding to the boundaries
    curve_pad = np.lib.pad(curve, (radius, radius), "edge")
    # Apply convolution
    curve_smoothed = np.convolve(curve_pad, fltr, mode="same")
    # Remove padding
    curve_smoothed = curve_smoothed[radius:-radius]
    
    return curve_smoothed

def smooth(trajectory, radius):
    smoothed_trajectory = np.copy(trajectory)
    # Applying smoothing function to the dx, dy and rotation angle curves
    for i in range(3):
        smoothed_trajectory[:,i] = curveSmoothing(trajectory[:,i], radius)
    return smoothed_trajectory

In [None]:
def getFramesCount(cap):
    count = 0
    while True:
        res, frame = cap.read()
        if not res:
            break
        count += 1
    cap.set(cv.CAP_PROP_POS_AVI_RATIO, 0)
    return count

def getMetadata(filename):
    # Define video size in bytes
    s1 = os.path.getsize(filename)
    s2 = os.stat(filename).st_size
    s3 = Path(filename).stat().st_size
    s_list = [s1, s2, s3]

    # Define video duration
    time_cap = cv.VideoCapture(filename)
    n_frames = getFramesCount(time_cap)

    fps = time_cap.get(cv.CAP_PROP_FPS)
    
    time_cap.release()

    duration = n_frames / fps

    # Calculate bitrate
    bitrate = s1 / (125 * duration)
    print(f"Bitrate: {round(bitrate)} kbps")
    
    return [s1, duration, bitrate]

In [None]:
# Define metrics functions
def rmse(frame1, frame2, w, h):
    return math.sqrt(np.sum((frame1.astype("float") - frame2.astype("float")) ** 2) / (w * h))

#### Стабилизация видео

In [None]:
def stabilizeVideo(filename, params, output_name):
    maxCorners, qualityLevel, minDistance, blockSize, radius, scale = params
    
    # Read input video
    cap = cv.VideoCapture(filename)
    
    n_frames = int(cap.get(cv.CAP_PROP_FRAME_COUNT))
    print(f"Frames count: {n_frames}")

    # Get width and height of video stream
    w = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
    h = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
    print(f"Video size: {w}×{h} pixels")

    # Get video stream fps
    fps = cap.get(cv.CAP_PROP_FPS)
    print(f"Video fps: {fps:.5} frames per second")
    
    # Define the codec for output video
    fourcc = cv.VideoWriter_fourcc(*"I420")
    
    # Set up output video
    out = cv.VideoWriter(output_name, fourcc, fps, (w, h))
    
    # Read first frame
    res, prev_frame = cap.read()

    # Convert frame to grayscale
    prev_frame = cv.cvtColor(prev_frame, cv.COLOR_BGR2GRAY)
    
    # Pre-define transformation-store array
    transforms = np.zeros((n_frames - 1, 3), np.float32)
    
    # Find the transformations between the frames
    for i in range(n_frames - 1):

        # Detect feature points in previous frame
        """ 
        Parametrs:

            maxCorners — maximum number of corners to return; if there are more corners than are found, 
                         the strongest of them are returned.

            qualityLevel — parameter characterizing the minimal accepted quality of image corners;
                            the corners with the quality measure less than the (quality_level * best_corener_quality_measure)
                            are rejected.

            minDistance — minimum possible Euclidean distance between the returned corners

            blockSize — size of block for computing derivative matrix over each pixel neighborhood (sliding window) 
        """ 
        prev_frame_points = cv.goodFeaturesToTrack(prev_frame, maxCorners, qualityLevel, minDistance, blockSize)

        # Read next frame
        res, curr_frame = cap.read()
        if not res:
            print(f"Error: {i} frame")
            break

        curr_frame = cv.cvtColor(curr_frame, cv.COLOR_BGR2GRAY)

        # Calculate optical flow
        curr_frame_points, status, err = cv.calcOpticalFlowPyrLK(prev_frame, curr_frame, prev_frame_points, None)

        # Sanity check
        assert prev_frame_points.shape == curr_frame_points.shape

        # Filter only valid points
        mask = np.where(status == 1)[0]
        prev_frame_points = prev_frame_points[mask]
        curr_frame_points = curr_frame_points[mask]

        # Find transformation matrix
        transform_matrix = cv.estimateAffinePartial2D(prev_frame_points, curr_frame_points)[0]

        # Extract translation
        dx = transform_matrix[0, 2]
        dy = transform_matrix[1, 2]

        # Extract rotation angel
        da = np.arctan2(transform_matrix[1, 0], transform_matrix[0, 0])

        # Store transformation
        transforms[i] = [dx, dy, da]

        # Move to next frame
        prev_frame = curr_frame

        print(f"Frame {i + 2}/{n_frames} — tracked points: {len(curr_frame_points)}")
    
    # Compute trajectory using cumulative sum of transforms
    trajectory = np.cumsum(transforms, axis=0)

    # Compute smoothed trajectory
    smoothed_trajectory = smooth(trajectory, radius)

    # Calculate difference in smoothed_trajectory and trajectory
    diff = smoothed_trajectory - trajectory

    # Calculate new transformations array
    smoothed_transforms = transforms + diff
    
    # Initialize metrics
    average_rmse = 0.0
    average_ssim = 0.0

    # Reset stream to first frame
    cap.set(cv.CAP_PROP_POS_FRAMES, 0)
    res, prev_frame = cap.read()
    out.write(prev_frame)
    prev_frame = cv.cvtColor(prev_frame, cv.COLOR_BGR2GRAY)

    # Write n-1 transformed frames
    for i in range(n_frames - 1):
        # Read next frame
        res, curr_frame = cap.read()
        if not res:
            print(f"Error: {i} frame")
            break

        # Extract transformations from the transformations array
        dx = smoothed_transforms[i, 0]
        dy = smoothed_transforms[i, 1]
        da = smoothed_transforms[i, 2]

        # Reconstruct transformation matrix
        transform_matrix = np.zeros((2,3),np.float32)
        transform_matrix[0,0] = np.cos(da)
        transform_matrix[0,1] = -np.sin(da)
        transform_matrix[1,0] = np.sin(da)
        transform_matrix[1,1] = np.cos(da)
        transform_matrix[0,2] = dx
        transform_matrix[1,2] = dy

        # Apply affine wrapping to the given frame
        frame_stabilized = cv.warpAffine(curr_frame, transform_matrix, (w,h))

        # Fix border artifacts by scaling the image without moving the center 
        s = frame_stabilized.shape
        T = cv.getRotationMatrix2D((s[1]/2, s[0]/2), 0, 1.08)
        frame_stabilized = cv.warpAffine(frame_stabilized, T, (w, h))

        out.write(frame_stabilized)

        # Calculate metrics
        curr_frame = cv.cvtColor(frame_stabilized, cv.COLOR_BGR2GRAY)
        average_rmse += rmse(curr_frame, prev_frame, w, h)

        prev_frame = curr_frame
    
    # Calculate final metrics scores
    average_rmse /= n_frames - 1
    
    # When everything is done release all captuerd objects
    cap.release()
    out.release()
    cv.destroyAllWindows()
    
    return average_rmse

#### Подбор параметров

In [None]:
MAX_CORNERS = [100, 150, 200]
QUALITY_LEVEL = [0.01, 0.1, 0.25]
MIN_DISTANCE = [10, 30, 60]
BLOCK_SIZE = [3, 5, 9]
RADIUS = [50, 75, 100]
SCALE = [1.04, 1.06, 1.08]
BIG_B = 1e+6

In [None]:
best_rmse = [BIG_B for i in range(5)]
best_videos = ["" for i in range(5)]
best_params = [[] for i in range(5)]

index = 0

for max_corners in MAX_CORNERS:
    for quality_level in QUALITY_LEVEL:
        for min_distance in MIN_DISTANCE:
            for block_size in BLOCK_SIZE:
                for radius in RADIUS:
                    
                    input_name = "in/piano_boy.avi"
                    output_name = "out/piano_boy_stabilized_example_" + str(index) + ".avi"
                    
                    scale = SCALE[RADIUS.index(radius)]
                    params = [max_corners, quality_level, min_distance, block_size, radius, scale]
                    
                    video_rmse = stabilizeVideo(input_name, params, output_name)
                    
                    max_best = max(best_rmse)
                    i_max = best_rmse.index(max_best)
                    if video_rmse < max_best:
                        best_rmse[i_max] = video_rmse
                        if best_videos[i_max] != "":
                            os.remove(best_videos[i_max])
                            # time.sleep(3)
                        best_videos[i_max] = output_name
                        best_params[i_max] = params
                    else:
                        os.remove(output_name)
                        # time.sleep(3)
                    
                    index += 1
                    print(f"Index: {index}")

In [None]:
print("Best videos and their parametrs:")
for i in range (len(best_videos)):
    print(f"File name: {best_videos[i]} | RMSE: {best_rmse[i]}")
    print(f" maxCorners: {best_params[i][0]}, qualityLevel: {best_params[i][1]}, minDistance: {best_params[i][2]}, blockSize: {best_params[i][3]},radius: {best_params[i][4]}, scale: {best_params[i][5]}")