In [7]:
import pandas as pd
import cv2
import numpy as np
from tqdm import tqdm
import os
from pathlib import Path

# Base directory (adjust if needed)
PROJECT_ROOT = Path("/Users/Video anomaly")  # Update to your actual root
PROCESSED_DIR = PROJECT_ROOT / "data" / "processed"
RAW_VIDEOS_DIR = PROJECT_ROOT / "data" / "raw_videos"

# Load metadata
metadata_path = PROCESSED_DIR / "metadata" / "video_metadata.csv"
try:
    df = pd.read_csv(metadata_path)
    print("🔍 Metadata columns:", df.columns.tolist())
except FileNotFoundError:
    print(f"❌ Metadata file not found at {metadata_path}")
    exit()

# Fix features path to absolute
df['features_path'] = df['features_path'].apply(lambda x: str(PROCESSED_DIR / x))

# Build absolute video paths using video_id and category
df['video_path_abs'] = df.apply(
    lambda row: str(RAW_VIDEOS_DIR / row['category'] / f"{row['video_id']}.mp4"),  # Assuming .mp4, adjust extension if needed
    axis=1
)

# Verify file existence
df['video_exists'] = df['video_path_abs'].apply(os.path.exists)
df['features_exists'] = df['features_path'].apply(os.path.exists)
valid_df = df[df['video_exists'] & df['features_exists']].copy()
print(f"📊 Valid videos: {len(valid_df)}/{len(df)}")

if len(valid_df) < len(df):
    print("⚠️ Missing files sample:")
    print(df[~df.index.isin(valid_df.index)][['video_id', 'video_path_abs', 'features_path']].head())

# Optical flow calculation
def calc_optical_flow(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print(f"❌ Cannot open: {video_path}")
        return None
    prev_frame = None
    flow_magnitudes = []
    
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        if prev_frame is not None:
            flow = cv2.calcOpticalFlowFarneback(prev_frame, gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
            magnitude = np.sqrt(flow[..., 0]**2 + flow[..., 1]**2)
            flow_magnitudes.append(np.mean(magnitude))
        prev_frame = gray
    
    cap.release()
    return {
        'mean_flow': np.mean(flow_magnitudes) if flow_magnitudes else 0,
        'max_flow': np.max(flow_magnitudes) if flow_magnitudes else 0
    }

# Process videos
enhanced_data = []
for _, row in tqdm(valid_df.iterrows(), total=len(valid_df), desc="Processing"):
    try:
        # Load existing features
        features = np.load(row['features_path'], allow_pickle=True).item()
        # Calculate optical flow
        flow_features = calc_optical_flow(row['video_path_abs'])
        if flow_features:
            enhanced_data.append({
                'video_id': row['video_id'],
                'label': row['label'],
                **features,
                **flow_features
            })
    except Exception as e:
        print(f"❌ Failed {row['video_id']}: {str(e)}")

# Save enhanced features
if enhanced_data:
    enhanced_df = pd.DataFrame(enhanced_data)
    output_path = PROCESSED_DIR / "metadata" / "enhanced_features.csv"
    enhanced_df.to_csv(output_path, index=False)
    print(f"✅ Saved enhanced features for {len(enhanced_data)} videos to {output_path}")
else:
    print("❌ No videos processed. Check paths and files.")

🔍 Metadata columns: ['video_id', 'category', 'label', 'features_path', 'frames_dir', 'frame_count', 'mean_motion', 'max_motion', 'std_motion']
📊 Valid videos: 199/199


Processing: 100%|██████████| 199/199 [2:21:18<00:00, 42.61s/it]    

✅ Saved enhanced features for 199 videos to /Users/Video anomaly/data/processed/metadata/enhanced_features.csv



