### Optical flow calculation per video

In [8]:
import numpy as np
import cv2
from cv2 import VideoCapture, cvtColor, COLOR_BGR2GRAY, calcOpticalFlowFarneback
import os
import glob
import pandas as pd
import re
from tqdm import tqdm
import gc

In [9]:
def calculate_optical_flow(video_path, L=8, skip_frames=9):
    """
    Calculate optical flow for a video file.

    :param video_path: Path to the video file.
    :param L: Number of frames to calculate optical flow.
    :param skip_frames: Number of frames to skip between calculations.
    :return: A stack of optical flow components with shape [height, width, 2L].
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video: {video_path}")
        return np.array([])

    ret, frame1 = cap.read()
    if not ret:
        print(f"Failed to read the first frame from {video_path}")
        return np.array([])
    
    prvs = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
    flow_stack = []
    frame_counter = 0

    while len(flow_stack) < 2 * L:
        ret, frame2 = cap.read()
        if not ret:
            break

        frame_counter += 1
        if frame_counter % skip_frames != 0:
            continue

        next_frame = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
        flow = cv2.calcOpticalFlowFarneback(prvs, next_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        flow_horizontal, flow_vertical = flow[..., 0], flow[..., 1]
        flow_stack.append(flow_horizontal)
        flow_stack.append(flow_vertical)

        prvs = next_frame

    cap.release()

    if flow_stack:
        return np.dstack(flow_stack)
    else:
        return np.array([])

In [10]:
def process_batch(video_paths):
    """
    Process a batch of video files to calculate optical flow.

    :param video_paths: List of paths to video files.
    :return: DataFrame with optical flow data for each file.
    """
    file_identifiers = []
    all_flow_stacks = []
    flow_averages = []
    flow_maximums = []

    for path in tqdm(video_paths, desc="Processing Videos"):
        file_name = os.path.basename(path)
        identifier = re.match(r'(\d{4})_', file_name)
        file_id = identifier.group(1) if identifier else "Unknown"
    
        flow_stack = calculate_optical_flow(path)

        avg_flow = np.mean(flow_stack)
        max_flow = np.max(flow_stack)
    
        file_identifiers.append(file_id)
        all_flow_stacks.append(flow_stack)
        flow_averages.append(avg_flow)
        flow_maximums.append(max_flow)

    batch_df = pd.DataFrame({
        'File_ID': file_identifiers,
        'Optical_Flow_Stack': all_flow_stacks,
        'Average_Flow': flow_averages,
        'Maximum_Flow': flow_maximums,
    })

    return batch_df

In [11]:
def get_last_processed_batch():
    """
    Read the last processed batch number from a file.

    :return: The last processed batch number or 0 if file not found.
    """
    try:
        with open('last_processed_batch.txt', 'r') as file:
            return int(file.read().strip())
    except FileNotFoundError:
        return 0

def save_last_processed_batch(batch_number):
    """
    Save the last processed batch number to a file.

    :param batch_number: The batch number to save.
    """
    with open('last_processed_batch.txt', 'w') as file:
        file.write(str(batch_number))


In [12]:
#Directory containing the videos
video_directory = "./videos_adjusted"
video_paths = glob.glob(os.path.join(video_directory, "*.mp4"))

In [14]:
import math

# Parameters
batch_size = 1102
total_videos = len(video_paths)
num_batches = math.ceil(total_videos / batch_size)
start_batch = get_last_processed_batch()



for batch_number in tqdm(range(start_batch, num_batches), desc="Processing Batches"):
    try:
        start_index = batch_number * batch_size
        end_index = min(start_index + batch_size, total_videos)
        current_batch_paths = video_paths[start_index:end_index]

        batch_df = process_batch(current_batch_paths)

        for column in ['Optical_Flow_Stack', 'Average_Flow', 'Maximum_Flow']:
            np.savez_compressed(f'batch_{batch_number}_{column}.npz', File_ID=batch_df['File_ID'].to_numpy(), Data=batch_df[column].to_numpy())

        del batch_df
        gc.collect()
        save_last_processed_batch(batch_number)

    except Exception as e:
        print(f"Error occurred in batch {batch_number}: {e}")
        break

Processing Batches:   0%|          | 0/1 [00:00<?, ?it/s]
Processing Videos:   0%|          | 0/1102 [00:00<?, ?it/s][A
Processing Videos:   0%|          | 2/1102 [00:00<01:06, 16.54it/s][A
Processing Videos:   0%|          | 4/1102 [00:00<01:07, 16.36it/s][A
Processing Videos:   1%|          | 6/1102 [00:00<01:07, 16.23it/s][A
Processing Videos:   1%|          | 8/1102 [00:00<01:05, 16.64it/s][A
Processing Videos:   1%|          | 10/1102 [00:00<01:05, 16.59it/s][A
Processing Videos:   1%|          | 12/1102 [00:00<01:06, 16.39it/s][A
Processing Videos:   1%|▏         | 14/1102 [00:00<01:04, 16.85it/s][A
Processing Videos:   1%|▏         | 16/1102 [00:00<01:04, 16.75it/s][A
Processing Videos:   2%|▏         | 18/1102 [00:01<01:05, 16.44it/s][A
Processing Videos:   2%|▏         | 20/1102 [00:01<01:06, 16.35it/s][A
Processing Videos:   2%|▏         | 22/1102 [00:01<01:05, 16.52it/s][A
Processing Videos:   2%|▏         | 24/1102 [00:01<01:04, 16.73it/s][A
Processing Videos: 