In [21]:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import math
import sys

In [19]:
# Load video
video_path = "./monkey.avi"
cap = cv2.VideoCapture(video_path)

# Get video frame dimensions 
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))

# Create an output video writer 
out = cv2.VideoWriter('output_video.mp4', cv2.VideoWriter_fourcc(*'MP4V'), 30, (frame_width, frame_height))

# Parameters
k = 4 # Explanation for this in README.txt
Tmin = 200 # Min threshold for noise filtering 
Tmax = 240 # Max threshold for noise filtering

frames = []

while True:
    return_flag, frame = cap.read()
    
    if not return_flag:
        print('Video Reach End')
        break

    frames.append(frame)

    if cv2.waitKey(30) & 0xff == ord('q'):
        break

cap.release()

frames = np.array(frames)
    

OpenCV: FFMPEG: tag 0x5634504d/'MP4V' is not supported with codec id 12 and format 'mp4 / MP4 (MPEG-4 Part 14)'
OpenCV: FFMPEG: fallback to use tag 0x7634706d/'mp4v'


Video Reach End


In [4]:
# SSD Calculator
def SSD(current_frame, next_frame, current_centroid, search_centroid, grid_size):
    k = grid_size // 2
    sum = 0

    for x in range(-k, k + 1):
        for y in range(-k, k + 1):
            for c in range(3):
                current_x = current_centroid[0] + x
                current_y = current_centroid[1] + y
                search_x = search_centroid[0] + x
                search_y = search_centroid[1] + y
                if 0 <= current_x < current_frame.shape[0] and 0 <= current_y < current_frame.shape[1] and 0 <= search_x < next_frame.shape[0] and 0 <= search_y < next_frame.shape[1]:
                    sum += (int(current_frame[current_centroid[0] + x, current_centroid[1] + y, c]) - int(next_frame[search_centroid[0] + x, search_centroid[1] + y, c])) ** 2
    return math.sqrt(sum)

In [5]:
# Helper function to draw arrows
def arrowdraw(img, x1, y1, x2, y2):
    radians = math.atan2(x1-x2, y2-y1)
    x11 = 0
    y11 = 0
    x12 = -10
    y12 = -10

    u11 = 0
    v11 = 0
    u12 = 10
    v12 = -10
    
    x11_ = x11 * math.cos(radians) - y11 * math.sin(radians) + x2
    y11_ = x11 * math.sin(radians) + y11 * math.cos(radians) + y2

    x12_ = x12 * math.cos(radians) - y12 * math.sin(radians) + x2
    y12_ = x12 * math.sin(radians) + y12 * math.cos(radians) + y2
    
    u11_ = u11 * math.cos(radians) - v11 * math.sin(radians) + x2
    v11_ = u11 * math.sin(radians) + v11 * math.cos(radians) + y2

    u12_ = u12 * math.cos(radians) - v12 * math.sin(radians) + x2
    v12_ = u12 * math.sin(radians) + v12 * math.cos(radians) + y2
    img = cv2.line(img, (x1, y1), (x2, y2), (255, 0, 0), 5)
    img = cv2.line(img, (int(x11_), int(y11_)), (int(x12_), int(y12_)), 
    (255, 255, 255), 5)
    img = cv2.line(img, (int(u11_), int(v11_)), (int(u12_), int(v12_)), 
    (255, 255, 255), 5)
    
    return img

In [6]:
# Show image in RGB
def img_show(img):
    new_img = img.copy()
    img_R = img[:, :, 2]
    img_B = img[:, :, 0]
    new_img[:, :, 2] = img_B
    new_img[:, :, 0] = img_R
    new_img = np.uint8(new_img)

    plt.figure()
    plt.imshow(new_img)
    plt.axis('off')
    plt.show() 

In [22]:
# Main code for performing Motion Estimation

BLUE = 120 # Threshold for Blue pixels

# Capture the first frame from monkey.avi and use it as the background
cap = cv2.VideoCapture('./monkey.avi')

ret, background_image = cap.read()
if not ret:
    print('Failed to capture the first frame as the background')
    sys.exit(1)
    
# Iterate through all frames
while True:
    ret, monkeyframe = cap.read()
    if not ret:
        break

    # Create a mask to isolate the monkey from the blue background
    monkey_mask = cv2.inRange(monkeyframe, (0, 50, 50), (BLUE, 200, 200))

    # Create an array to store the displacement vectors
    displacement_vectors = np.zeros((monkeyframe.shape[0], monkeyframe.shape[1], 2), dtype=np.float32)

    # Iterate through grid blocks of size 2k+1 * 2k+1 in the monkey frame
    for y in range(k, monkeyframe.shape[0] - k, 2 * k + 1):
        for x in range(k, monkeyframe.shape[1] - k, 2 * k + 1):
            # Shrinking image search area:
            if monkey_mask[y, x] == 255:  # Process only within monkey region by checking pixels aren't blue

                # Extract source block from monkey frame
                source_block = monkeyframe[y - k:y + k + 1, x - k:x + k + 1]

                # Define search area in the next frame  
                # Finding leftmost pixel in search area with min, and rightmost with max
                search_area = monkeyframe[max(y - k, 0):min(y + k, monkeyframe.shape[0]), max(x - k, 0):min(x + k, monkeyframe.shape[1])]

                # Calculate SSD
                ssd =  SSD(source_block, search_area, (k, k), (k, k), 2 * k + 1)

                # Check if calculated SSD is within range
                if Tmin <= ssd <= Tmax:
                    dx, dy = k, k
                else:
                    dx, dy = 0, 0

                displacement_vectors[y, x] = [dx, dy]

    # Visualize displacement vectors as arrows on the monkey frame    
    for y in range(k, monkeyframe.shape[0] - k, 2 * k + 1):
        for x in range(k, monkeyframe.shape[1] - k, 2 * k + 1):
            dx, dy = displacement_vectors[y, x]
            if monkey_mask[y, x] == 255:
                
                # Calculate coordinates for drawing arrow
                x1 = x
                y1 = y
                x2 = int(x + dx)
                y2 = int(y + dy)

                # Draw arrow on the monkey frame
                monkeyframe = arrowdraw(monkeyframe, x1, y1, x2, y2)

    cv2.imshow('Motion Estimation', monkeyframe)
    out.write(monkeyframe) # Save frame to output video
    cv2.waitKey(30)
   

cap.release()
out.release()
cv2.destroyAllWindows()


KeyboardInterrupt: 