# Lab Assigment 2 Estimation of Apparent Motion

<p style="text-align:left;">
    José Pedro Cruz
    <span style="float:right;">
        up201504646
    </span>
</p>
<p style="text-align:left;">
    Martinho Figueiredo
    <span style="float:right;">
        up201506179
    </span>
</p>
<p style="text-align:left;">
    Nuno Nascimento
    <span style="float:right;">
        up201907933
    </span>
</p>


[![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white)](https://github.com/martinhofigueiredo/VC)



## Scheme
```mermaid
flowchart LR
    A[Ingest Footage] -->|mp4 or mpegs| B{Multi Channel?}
    B -->|Yes| C[Split into Channels]
    B -->|No| D[Gray]-->H
    C --> E[R]-->H
    C --> F[G]-->H
    C --> G[B]-->H
    H{Multi Resolution?}-->|Yes|I[n Pyramid DownSampling Average]-->K
    H-->|No|J[One Shot]-->K
    K{algo} -->|HornShunck| M((.flo file))
    K -->|LucasKanade| M
```


## Benchmarking

```mermaid
flowchart LR
    GT[Ground truth] --> B{BenchMark}
    A(Flow Calculated) --> B
    B --> AE(Angular Error and Std Dev)
    B --> EE(Endpoint Error and Std Dev)
```


## Visualization
```mermaid
flowchart LR
    A[import .flo file]-->C{Type}
    C-->VF[Vector Field]
    C-->MF[Middlebury Flow]
    
```

# Setup

In [3]:
import cv2
import numpy as np
import yaml
import os
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import struct
import argparse
import math
%matplotlib widget


## Import Dataset

In [27]:
# IMport dataset with public ground truth
%pip install wget
import os
import subprocess
import wget
import zipfile

# Specify the URLs of the files to download
url1 = "https://vision.middlebury.edu/flow/data/comp/zip/other-gt-flow.zip"
url2 = "https://vision.middlebury.edu/flow/data/comp/zip/other-color-allframes.zip"
url3 = "https://vision.middlebury.edu/flow/data/comp/zip/eval-color-allframes.zip"
# Specify the destination folder to store the downloaded files
destination_folder = "dataset/"

# Download the files using wget
file1 = wget.download(url1, out=destination_folder)
file2 = wget.download(url2, out=destination_folder)
file3 = wget.download(url3, out=destination_folder)

# Unzip the files
with zipfile.ZipFile(file1, 'r') as zip_ref:
    zip_ref.extractall(destination_folder)

with zipfile.ZipFile(file2, 'r') as zip_ref:
    zip_ref.extractall(destination_folder)
    
with zipfile.ZipFile(file3, 'r') as zip_ref:
    zip_ref.extractall(destination_folder)

# Remove the zip files if needed
os.remove(file1)
os.remove(file2)
os.remove(file3)

4811.22s - pydevd: Sending message related to process being replaced timed-out after 5 seconds
Note: you may need to restart the kernel to use updated packages.


## Ingest 

In [12]:
# Find all jpgs and pngs in a folder and returns a list of their respective path 
def get_frame_paths(folder_path):
    frame_paths = []
    for filename in os.listdir(folder_path):
        if filename.endswith('.jpg') or filename.endswith('.png'):
            frame_path = os.path.join(folder_path, filename)
            frame_paths.append(frame_path)
    frame_paths.sort()
    print(frame_paths)
    return frame_paths

In [13]:
# Checks if the inut file is a directory and if it is it will try and get the frames inside the folder
# it will check if it is and mp4 and if it is it will read it and create a list of frames
def get_input_frames(input_path):
    frames = []
    if os.path.isdir(input_path):
        frame_paths = get_frame_paths(input_path)
        print(f"{frame_paths}")
        frames = [cv2.imread(frame_path) for frame_path in frame_paths]
    else :
        if input_path.endswith('.mp4'):
            cap = cv2.VideoCapture(total_path)
            frames = []
            while True:
                ret, frame = cap.read()
                if not ret:
                    break
                frames.append(frame)
            cap.release()
    return frames

## Load Algo Config from file

In [14]:
# Loads a yaml file with the parameters to run the code
def load_config(filename):
    with open(filename, 'r') as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
        config['input_path']= os.getcwd()+config['input_path'] # Fix to run in the location it called
        print(config)
    return config

## Flo file format Aux Functions

In [6]:
import struct

def write_flow_field(flow, filename):
    height, width, _ = flow.shape
    with open(filename, 'wb') as f:
        # Write the magic number: 'PIEH' in ASCII
        f.write(b'PIEH')

        # Write the width and height of the flow field
        f.write(np.array(width).astype(np.int32).tobytes())
        f.write(np.array(height).astype(np.int32).tobytes())

        # Interleave the u and v values and write the flow field data
        flow_data = np.concatenate((flow[..., 0], flow[..., 1]), axis=1)
        f.write(flow_data.astype(np.float32).tobytes())

def read_flow_field(filename):
    with open(filename, 'rb') as f:
        # Read the magic number
        magic = f.read(4).decode()
        if magic != 'PIEH':
            raise ValueError('Invalid flow file format.')

        # Read the width and height of the flow field
        width = np.frombuffer(f.read(4), dtype=np.int32)[0]
        height = np.frombuffer(f.read(4), dtype=np.int32)[0]

        # Read the flow field data
        flow_data = np.frombuffer(f.read(), dtype=np.float32)
        flow = flow_data.reshape((height, width, 2))

    return flow


def calculate_angular_error(flow_gt, flow_pred):
    flow_gt_norm = np.sqrt(np.sum(flow_gt ** 2, axis=2))
    flow_pred_norm = np.sqrt(np.sum(flow_pred ** 2, axis=2))
    dot_product = np.sum(flow_gt * flow_pred, axis=2)
    cos_theta = np.clip(dot_product / (flow_gt_norm * flow_pred_norm), -1.0, 1.0)
    angular_error = np.arccos(cos_theta)
    return angular_error

def calculate_endpoint_error(flow_gt, flow_pred):
    endpoint_error = np.sqrt(np.sum((flow_gt - flow_pred) ** 2, axis=2))
    return endpoint_error


def calculate_error_statistics(error):
    mean_error = np.mean(angular_error)
    std_error = np.std(angular_error)
    return mean_error, std_error

# Example usage
flow_gt = read_flow_field('/workspace/VC/assignment2/dataset/other-gt-flow/Urban2/flow10.flo')
write_flow_field(flow_gt, "flow_gt.flo")
flow_pred = read_flow_field('flow_gt.flo')

angular_error = calculate_angular_error(flow_gt, flow_pred)
angular_mean, angular_std = calculate_error_statistics(angular_error)

print('Mean Angular Error:', angular_mean)
print('Standard Deviation of Angular Error:', angular_std)

endpoint_error = calculate_endpoint_error(flow_gt, flow_pred)
endpoint_mean, endpoint_std = calculate_error_statistics(endpoint_error)

print('Mean Endpoint Error:', endpoint_mean)
print('Standard Deviation of Endpoint Error:', endpoint_std)


Mean Angular Error: 1.5791076
Standard Deviation of Angular Error: 0.5013132
Mean Endpoint Error: 1.5791076
Standard Deviation of Endpoint Error: 0.5013132


# Lucas-Kanade

In [None]:
print(f"Loading File \'config_LK.yml\'")

config = load_config('config_LK.yml')

input_path = config['input_path']

frames = get_input_frames(input_path)


feature_params = dict(
    maxCorners=config['max_corners'],
    qualityLevel=config['quality_level'],
    minDistance=config['min_distance'],
    blockSize=config['block_size']
)
lk_params = dict(
    winSize=(config['window_size'], config['window_size']),
    maxLevel=config['max_level'],
    criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, config['max_iterations'], config['epsilon'])
)
color = np.random.randint(0, 255, (100, 3))
old_frame = frames[0]
old_gray = cv2.cvtColor(old_frame, cv2.COLOR_BGR2GRAY)
p0 = cv2.goodFeaturesToTrack(old_gray, mask=None, **feature_params)

mask = np.zeros_like(old_frame)
height, width, channels = old_frame.shape
mask_flow = np.zeros((height,width,2))
print("DIM: ", mask_flow.shape)

for frame_num in frames[1:]:
    
    current_frame = frame_num
    frame_gray = cv2.cvtColor(frame_num, cv2.COLOR_BGR2GRAY)
    
    p1, st, err = cv2.calcOpticalFlowPyrLK(old_gray, frame_gray, p0, None, **lk_params)
    print("p1: ", np.asarray(p1).shape)
    print(np.asarray(p1).shape)
    print(p1)
    if p1 is not None:
        
        good_new = p1[st==1]
        good_old = p0[st==1]
    
    for i, (new, old) in enumerate(zip(good_new, good_old)):
        a, b = new.ravel()
        c, d = old.ravel()
        mask = cv2.line(mask, (int(a), int(b)), (int(c), int(d)), color[i].tolist(), 2)
        
    frame_with_flow = cv2.add(current_frame, mask)
    old_gray = frame_gray.copy()
    p0 = good_new.reshape(-1,1,2)
    

print("mask: ",mask.shape)
cv2.imwrite('flow.png', frame_with_flow)
write_flow_field(mask,'flow_field.flo')


flow_data = read_flow_field('/Users/nmcna/Desktop/FEUP/4oano/2osemestre/VC/assignment2/dataset/other-gt-flow/Urban2/flow10.flo')  # Read the .flo file

# Compute the magnitude and angle of the flow vectors
magnitude, angle = cv2.cartToPolar(flow_data[..., 0], flow_data[..., 1])

# Visualize the flow field
hue = angle * 180 / np.pi / 2
saturation = np.ones_like(magnitude)
value = cv2.normalize(magnitude, None, 0, 255, cv2.NORM_MINMAX)
flow_visualization = cv2.cvtColor(cv2.merge((hue, saturation, value)).astype(np.uint8), cv2.COLOR_HSV2BGR)
#teste = cv2.merge((hue, saturation, value)).astype(np.uint8)
# Display the flow visualization
cv2.imshow('Optical Flow', flow_visualization)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
image = cv2.imread('dataset/other-data/Urban2/frame07.png')

flow = read_flow_field('dataset/other-gt-flow/Urban2/flow10.flo')

warped = cv2.remap(image, flow, None, cv2.INTER_LINEAR)

cv2.imshow('warp', warped)
cv2.waitKey(0)
cv2.destroyAllWindows

# Horn-Shunchk

In [None]:
import cv2
import numpy as np
import yaml
import os
import matplotlib.pyplot as plt
from IPython.display import clear_output
from ipywidgets import Image
from io import BytesIO
import PIL.Image

def display_image(image):
    # Convert the OpenCV image to a PIL image
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    pil_image = PIL.Image.fromarray(image)
    
    # Display the image using the Jupyter inline magic
    buffered = BytesIO()
    pil_image.save(buffered, format="JPEG")
    display(Image(data=buffered.getvalue()))


# Load the tuning parameters from the config file
with open('config_HS.yml', 'r') as f:
    config = yaml.safe_load(f)

# Load the input frames
input_path = os.getcwd()+config['input_path']
input_frames = get_input_frames(input_path)

# Initialize the flow field for the first frame with zeros
prev_frame = input_frames[0]
flow = np.zeros((prev_frame.shape[0], prev_frame.shape[1], 2), dtype=np.float32)

# Set the Horn-Schunck parameters
alpha = config['alpha']
num_iterations = config['num_iterations']
epsilon = config['epsilon']

# Define the Gaussian pyramid levels
num_levels = config['num_levels']
pyramid_scale = config['pyramid_scale']

# Create an output widget for displaying images
output_widget = Image()

# Compute the optical flow for each level of the pyramid
for level in range(num_levels):
    # Downsample the input frames and flow field
    curr_frame = cv2.resize(prev_frame, None, fx=pyramid_scale, fy=pyramid_scale)
    curr_flow = cv2.resize(flow, None, fx=pyramid_scale, fy=pyramid_scale)

    # Compute the optical flow using the multi-channel Horn-Schunck algorithm
    for i in range(num_iterations):
        # Split the flow field into x and y components
        fx, fy = np.split(curr_flow, 2, axis=2)

        # Compute the Laplacian of the flow field
        fxx, _ = np.gradient(fx)
        _, fyy = np.gradient(fy)
        fxy = np.gradient(fx, axis=0)[0] + np.gradient(fy, axis=1)[1]

        # Compute the temporal derivative of the flow field
        ft = next_frame_gray - prev_frame_gray + np.sum(curr_flow * np.dstack((fx, fy)), axis=2)

        # Compute the update to the flow field
        numerator = fxx * fy ** 2 - 2 * fx * fy * fxy + fyy * fx ** 2 - ft * fx * fy
        denominator = fx ** 2 + fy ** 2 + alpha
        update = numerator / (denominator[..., np.newaxis] + epsilon)

        # Update the flow field
        curr_flow += update

        # Visualize the current flow field
        if display:
            flow_image = flow_to_color(curr_flow)
            display_image(flow_image)

    # Convert the flow field to polar coordinates
    magnitude, angle = cv2.cartToPolar(curr_flow[..., 0], curr_flow[..., 1], angleInDegrees=True)

    #return curr_flow, magnitude, angle

# Visualization

Done Using [flowvid](https://github.com/diegoroyo/flowvid)

In [19]:
import flowvid
import numpy as np
from IPython.display import HTML

import flowvid as fv

flo_data = fv.input.flo('flow_gt.flo')

# You can normalize by frame OR the whole video
# Normalize each flo file independently
flo_frame = fv.normalize_frame(flo_data)
# Normalize all flo files at once, applying a clamp/gamma curve
flo_video = fv.normalize_video(flo_data, clamp_pct=0.8, gamma=1.5)

# Conversion from flow data to RGB
rgb_frames = fv.flow_to_rgb(flo_video)

out5 = fv.output.show_plot(title='Flow colors', framerate=10)
out5.show_all(rgb_frames, show_count=True)





AssertionError: File flow_gt.flo has wrong tag (1212500352.0)

In [1]:
# IMport dataset with public ground truth
%pip install wget
import os
import subprocess
import wget
import zipfile

# Specify the URLs of the files to download
url1 = "https://vision.middlebury.edu/flow/data/comp/zip/other-gt-flow.zip"
url2 = "https://vision.middlebury.edu/flow/data/comp/zip/other-color-allframes.zip"
url3 = "https://vision.middlebury.edu/flow/data/comp/zip/eval-color-allframes.zip"
# Specify the destination folder to store the downloaded files
destination_folder = "dataset/"

# Download the files using wget
file1 = wget.download(url1, out=destination_folder)
file2 = wget.download(url2, out=destination_folder)
file3 = wget.download(url3, out=destination_folder)

# Unzip the files
with zipfile.ZipFile(file1, 'r') as zip_ref:
    zip_ref.extractall(destination_folder)

with zipfile.ZipFile(file2, 'r') as zip_ref:
    zip_ref.extractall(destination_folder)
    
with zipfile.ZipFile(file3, 'r') as zip_ref:
    zip_ref.extractall(destination_folder)

# Remove the zip files if needed
os.remove(file1)
os.remove(file2)
os.remove(file3)

Note: you may need to restart the kernel to use updated packages.


# Bench Marking


In [None]:
import pandas as pd

def display_results():
    # Create an empty dataframe to store the results
    results_df = pd.DataFrame(columns=['DataSet', 'Average Angular Error '])
    
    # Iterate through the function calls and store the results
    for fc in fcalls:
        # Call the function
        result = fc()
        
        # Add the function name and result to the dataframe
        results_df = results_df.append({'Function': fc.__name__, 'Result': result}, ignore_index=True)
    
    # Display the results as a markdown table
    display(results_df.to_markdown(index=False))