### Motion Calculation in batches for all video files

In [1]:
import numpy as np
from cv2 import VideoCapture, cvtColor, COLOR_BGR2GRAY, calcOpticalFlowFarneback
import os
import glob
import pandas as pd
import re
from tqdm import tqdm

In [2]:
def calculate_optical_flow(video_path):
    # Open the video file
    cap = VideoCapture(video_path)
    if not cap.isOpened():
        print(f"Failed to open video: {video_path}")
        return []
    ret, frame1 = cap.read()
    prvs = cvtColor(frame1, COLOR_BGR2GRAY)
    flow_data = []

    while True:
        ret, frame2 = cap.read()
        if not ret:
            break
        next = cvtColor(frame2, COLOR_BGR2GRAY)

        # Calculate optical flow
        flow = calcOpticalFlowFarneback(prvs, next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
        print(flow.shape)
        flow_data.append(flow)

        prvs = next

    cap.release()
    return flow_data

In [3]:
# Directory containing the videos
video_directory = "./AlgonautsVideos268_All_30fpsmax"
video_paths = glob.glob(os.path.join(video_directory, "*.mp4"))

Optical Flow Data Structure: Each element in flow_data is a 2D array representing the optical flow between two frames of a video. This array contains flow vectors, where each vector has two components: horizontal (x) and vertical (y) flow.

In [4]:
def process_batch(video_paths):
    file_identifiers = []
    all_flow_data = []
    flow_averages = []
    flow_maximums = []
    flow_sums = []

    for path in tqdm(video_paths, desc="Processing Videos"):
        # Extract file identifier
        file_name = os.path.basename(path)
        identifier = re.match(r'(\d{4})_', file_name)
        file_id = identifier.group(1) if identifier else "Unknown"
    
        # Calculate optical flow
        flow_data = calculate_optical_flow(path)
        
        np.linalg.norm(flow_data, axis=0)
    
        # Calculate average and maximum flow
        flow_magnitudes = [np.linalg.norm(frame_flow, axis=2) for frame_flow in flow_data]
        avg_flow = np.mean([np.mean(mag) for mag in flow_magnitudes])
        max_flow = np.max([np.max(mag) for mag in flow_magnitudes])
    
        # Append to lists
        file_identifiers.append(file_id)
        all_flow_data.append(flow_data)
        flow_averages.append(avg_flow)
        flow_maximums.append(max_flow)
        sum_flow = np.sum(np.stack(flow_data), axis=0)
        flow_sums.append(sum_flow) 

    # Create DataFrame for the batch
    batch_df = pd.DataFrame({
        'File_ID': file_identifiers,
        'Optical_Flow_Data': all_flow_data,
        'Average_Flow': flow_averages,
        'Maximum_Flow': flow_maximums,
        'Summed_Flow': flow_sums
    })

    return batch_df

In [5]:
import gc

# Parameters
batch_size = 100
total_videos = len(video_paths)
num_batches = total_videos // batch_size + (1 if total_videos % batch_size != 0 else 0)


# Function to get the last processed batch number
def get_last_processed_batch():
    try:
        with open('last_processed_batch.txt', 'r') as file:
            return int(file.read().strip())
    except FileNotFoundError:
        return 0

# Function to save the last processed batch number
def save_last_processed_batch(batch_number):
    with open('last_processed_batch.txt', 'w') as file:
        file.write(str(batch_number))


# Start processing from the last processed batch
start_batch = get_last_processed_batch()

for batch_number in tqdm(range(start_batch, num_batches), desc="Processing Batches"):
    try:
        start_index = batch_number * batch_size
        end_index = min(start_index + batch_size, total_videos)
        current_batch_paths = video_paths[start_index:end_index]

        # Process the batch and create a dataframe
        batch_df = process_batch(current_batch_paths)

        # Save each column with File_IDs in separate NPZ files
        for column in ['Optical_Flow_Data', 'Average_Flow', 'Maximum_Flow', 'Summed_Flow']:
            np.savez_compressed(f'batch_{batch_number}_{column}.npz', File_ID=batch_df['File_ID'].to_numpy(), Data=batch_df[column].to_numpy())

        # Clear memory
        del batch_df
        gc.collect()

        # Save the last processed batch number
        save_last_processed_batch(batch_number)

    except Exception as e:
        print(f"Error occurred in batch {batch_number}: {e}")
        break

# Function to load and concatenate data from NPZ files
def load_and_concatenate_data(column_name, num_batches):
    file_ids = []
    data = []
    for i in range(num_batches):
        try:
            npz_file = np.load(f'batch_{i}_{column_name}.npz')
            file_ids.append(npz_file['File_ID'])
            data.append(npz_file['Data'])
        except FileNotFoundError:
            break
    return np.concatenate(file_ids), np.concatenate(data)

Processing Batches:   0%|          | 0/12 [00:00<?, ?it/s]
Processing Videos:   0%|          | 0/100 [00:00<?, ?it/s][A

(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 


Processing Videos:   1%|          | 1/100 [00:01<02:14,  1.35s/it]

(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)


[A

(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 


Processing Videos:   2%|▏         | 2/100 [00:02<02:10,  1.33s/it][A

(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 268, 2)
(268, 


Processing Videos:   3%|▎         | 3/100 [00:04<02:10,  1.35s/it][A
Processing Batches:   0%|          | 0/12 [00:04<?, ?it/s]


(268, 268, 2)
(268, 268, 2)


KeyboardInterrupt: 