In [None]:
import os
import cv2
import numpy as np
import librosa
import pandas as pd
from scipy.optimize import linear_sum_assignment
from scipy.signal import resample
from concurrent.futures import ProcessPoolExecutor, as_completed
import multiprocessing

# Set a fixed length to which all feature time series will be resampled.
# Reduced fixed length for faster computation.
FIXED_LENGTH = 50  # Lower resolution trade-off for speed

# Set frame skip factor (e.g. process every 5th frame)
FRAME_SKIP = 100

def extract_video_features(video_path):
    """
    Extracts a motion feature time series from a video by computing the mean absolute difference
    between consecutive grayscale frames (with frame skipping), then normalizes and resamples the feature vector.
    """
    cap = cv2.VideoCapture(video_path)
    features = []
    ret, prev_frame = cap.read()
    if not ret:
        cap.release()
        return np.zeros(FIXED_LENGTH)
    prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
    frame_count = 0
    
    while True:
        # Skip FRAME_SKIP frames
        for _ in range(FRAME_SKIP):
            ret, _ = cap.read()
            if not ret:
                break
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        diff = cv2.absdiff(gray, prev_gray)
        motion_intensity = np.mean(diff)
        features.append(motion_intensity)
        prev_gray = gray
        frame_count += 1
        
    cap.release()
    features = np.array(features)
    if len(features) > 0:
        std_val = np.std(features) if np.std(features) > 0 else 1
        features = (features - np.mean(features)) / std_val
        features = resample(features, FIXED_LENGTH)
    else:
        features = np.zeros(FIXED_LENGTH)
    return features

def extract_audio_features(audio_path):
    """
    Extracts an acoustic feature time series from audio using the onset strength envelope.
    The resulting vector is normalized and resampled to a fixed length.
    """
    y, sr = librosa.load(audio_path, sr=22050)
    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
    if len(onset_env) > 0:
        std_val = np.std(onset_env) if np.std(onset_env) > 0 else 1
        onset_env = (onset_env - np.mean(onset_env)) / std_val
        onset_env = resample(onset_env, FIXED_LENGTH)
    else:
        onset_env = np.zeros(FIXED_LENGTH)
    return onset_env

# Paths to your folders (update as needed)
audio_folder = "audio_only"  # Contains files like: audio_only_<ID>_.wav
video_folder = "video_only"  # Contains files like: video_only_<ID>_.mp4

audio_files = sorted([f for f in os.listdir(audio_folder) if f.endswith('.wav')])
video_files = sorted([f for f in os.listdir(video_folder) if f.endswith('.mp4')])

# Use parallel processing to extract features from audio and video files.
# Set max_workers to number of CPU cores.
num_workers = multiprocessing.cpu_count()
with ProcessPoolExecutor(max_workers=num_workers) as executor:
    # Extract audio features in parallel
    audio_paths = [os.path.join(audio_folder, f) for f in audio_files]
    audio_futures = {executor.submit(extract_audio_features, path): path for path in audio_paths}
    audio_features = []
    for future in as_completed(audio_futures):
        audio_features.append(future.result())
    
    # Extract video features in parallel
    video_paths = [os.path.join(video_folder, f) for f in video_files]
    video_futures = {executor.submit(extract_video_features, path): path for path in video_paths}
    video_features = []
    for future in as_completed(video_futures):
        video_features.append(future.result())

# Ensure ordering is maintained (assuming filenames are unique and sorted)
audio_features = np.stack(audio_features)
video_features = np.stack(video_features)

# Compute the similarity matrix in a fully vectorized manner.
similarity_matrix = np.dot(audio_features, video_features.T) / FIXED_LENGTH

# Solve the assignment problem using the Hungarian algorithm.
row_ind, col_ind = linear_sum_assignment(-similarity_matrix)

# Build the match list and export to CSV.
matches = [(audio_files[i], video_files[j]) for i, j in zip(row_ind, col_ind)]
df = pd.DataFrame(matches, columns=['Audio File', 'Video File'])
output_csv = "matches_fast_optimized.csv"
df.to_csv(output_csv, index=False)
print(f"Matching completed. Results saved to {output_csv}")
