### Optical flow calculation per video

In [9]:
import numpy as np
import cv2
from cv2 import VideoCapture, cvtColor, COLOR_BGR2GRAY, calcOpticalFlowFarneback
import os
import glob
import pandas as pd
import re
from tqdm import tqdm

In [10]:
def calculate_optical_flow(video_path, L=5):
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video: {video_path}")
        return []

    ret, frame1 = cap.read()
    prvs = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    
    # Initialize lists to store optical flow components and angles for L frames
    flow_stack = []

    while True:
        ret, frame2 = cap.read()
        if not ret:
            break
        next = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)

        # Calculate optical flow
        flow = cv2.calcOpticalFlowFarneback(prvs, next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        
        # Split the flow into horizontal and vertical components
        flow_horizontal, flow_vertical = flow[..., 0], flow[..., 1]

        # Calculate the angle of the flow
        angle = np.arctan2(flow_vertical, flow_horizontal)

        # Add to flow stack
        flow_stack.extend([flow_horizontal, flow_vertical, angle])

        # If the flow_stack has 3L channels (horizontal, vertical, angle), stop adding more
        if len(flow_stack) == 3 * L:
            break

        prvs = next

    cap.release()
    return np.stack(flow_stack, axis=-1)

In [6]:
def process_batch(video_paths, L=5):
    file_identifiers = []
    all_flow_stacks = []
    flow_averages = []
    flow_maximums = []
    angle_averages = []
    angle_maximums = []

    for path in tqdm(video_paths, desc="Processing Videos"):
        # Extract file identifier
        file_name = os.path.basename(path)
        identifier = re.match(r'(\d{4})_', file_name)
        file_id = identifier.group(1) if identifier else "Unknown"
    
        # Calculate optical flow stack
        flow_stack = calculate_optical_flow(path, L)

        # Calculate average and maximum flow (horizontal and vertical components)
        avg_flow = np.mean(flow_stack[..., :2 * L])  # Exclude angles for average flow calculation
        max_flow = np.max(flow_stack[..., :2 * L])  # Exclude angles for maximum flow calculation

        # Calculate average and maximum angle
        avg_angle = np.mean(flow_stack[..., 2 * L:])  # Consider only angles
        max_angle = np.max(flow_stack[..., 2 * L:])  # Consider only angles
    
        # Append to lists
        file_identifiers.append(file_id)
        all_flow_stacks.append(flow_stack)
        flow_averages.append(avg_flow)
        flow_maximums.append(max_flow)
        angle_averages.append(avg_angle)
        angle_maximums.append(max_angle)

    # Create DataFrame for the batch
    batch_df = pd.DataFrame({
        'File_ID': file_identifiers,
        'Optical_Flow_Stack': all_flow_stacks,
        'Average_Flow': flow_averages,
        'Maximum_Flow': flow_maximums,
        'Average_Angle': angle_averages,
        'Maximum_Angle': angle_maximums
    })

    return batch_df

In [5]:
#Directory containing the videos
video_directory = "./AlgonautsVideos268_All_30fpsmax"
video_paths = glob.glob(os.path.join(video_directory, "*.mp4"))

In [None]:
import gc

# Parameters
batch_size = 100
total_videos = len(video_paths)
num_batches = total_videos // batch_size + (1 if total_videos % batch_size != 0 else 0)


# Function to get the last processed batch number
def get_last_processed_batch():
    try:
        with open('last_processed_batch.txt', 'r') as file:
            return int(file.read().strip())
    except FileNotFoundError:
        return 0

# Function to save the last processed batch number
def save_last_processed_batch(batch_number):
    with open('last_processed_batch.txt', 'w') as file:
        file.write(str(batch_number))


# Start processing from the last processed batch
start_batch = get_last_processed_batch()

for batch_number in tqdm(range(start_batch, num_batches), desc="Processing Batches"):
    try:
        start_index = batch_number * batch_size
        end_index = min(start_index + batch_size, total_videos)
        current_batch_paths = video_paths[start_index:end_index]

        # Process the batch and create a dataframe
        batch_df = process_batch(current_batch_paths)

        # Save each column with File_IDs in separate NPZ files
        for column in ['Optical_Flow_Stack', 'Average_Flow', 'Maximum_Flow', 'Average_Angle', 'Maximum_Angle']:
            np.savez_compressed(f'batch_{batch_number}_{column}.npz', File_ID=batch_df['File_ID'].to_numpy(), Data=batch_df[column].to_numpy())

        # Clear memory
        del batch_df
        gc.collect()

        # Save the last processed batch number
        save_last_processed_batch(batch_number)

    except Exception as e:
        print(f"Error occurred in batch {batch_number}: {e}")
        break