In [2]:
import os
import pandas as pd

# 📂 Base Path
base_dir = "/Users/Video anomaly/data/raw_videos"

# 🔍 Scan for videos
video_data = []
for category in ["anomaly", "normal"]:
    folder_path = os.path.join(base_dir, category)
    
    if not os.path.exists(folder_path):
        print(f"❌ Folder missing: {folder_path}")
        continue
    
    for file in os.listdir(folder_path):
        if file.endswith((".mp4", ".avi", ".mov")):  # ✅ Support multiple formats
            video_id = os.path.splitext(file)[0]  # Remove extension
            video_path = os.path.join(folder_path, file)
            video_data.append({"video_id": video_id, "category": category, "video_path": video_path})

# 📄 Save to CSV
csv_path = "/Users/Video anomaly/data/video_paths.csv"
pd.DataFrame(video_data).to_csv(csv_path, index=False)
print(f"✅ Video paths saved to {csv_path} ({len(video_data)} videos found)")

✅ Video paths saved to /Users/Video anomaly/data/video_paths.csv (199 videos found)


In [3]:
import pandas as pd

csv_path = "/Users/Video anomaly/data/video_paths.csv"
df = pd.read_csv(csv_path)

print(df.head())  # Show the first few rows

        video_id category                                         video_path
0  Abuse027_x264  anomaly  /Users/Video anomaly/data/raw_videos/anomaly/A...
1  Abuse026_x264  anomaly  /Users/Video anomaly/data/raw_videos/anomaly/A...
2  Abuse019_x264  anomaly  /Users/Video anomaly/data/raw_videos/anomaly/A...
3  Abuse018_x264  anomaly  /Users/Video anomaly/data/raw_videos/anomaly/A...
4  Abuse013_x264  anomaly  /Users/Video anomaly/data/raw_videos/anomaly/A...


In [1]:
import os
import pandas as pd

# 📄 Load CSV
csv_path = "/Users/Video anomaly/data/video_paths.csv"
df = pd.read_csv(csv_path)

# 🔍 Check if each video exists
df["exists"] = df["video_path"].apply(lambda x: os.path.exists(x))

# 📊 Summary
valid_count = df["exists"].sum()
invalid_count = len(df) - valid_count

print(f"✅ Valid videos: {valid_count}/{len(df)}")
print(f"❌ Missing videos: {invalid_count}")

# 🚨 Show missing videos
if invalid_count > 0:
    print("\n🚨 Missing Video Paths:")
    print(df[~df["exists"]][["video_id", "video_path"]])

✅ Valid videos: 199/199
❌ Missing videos: 0


In [1]:
import os
import pandas as pd

csv_path = "/Users/Video anomaly/data/video_paths.csv"
df = pd.read_csv(csv_path)

# Show first 5 rows
print(df.head())

# Check if the first video file actually exists
video_path = df['video_path'][0]
print(f"Checking file: {video_path}")
print("Exists:", os.path.exists(video_path))

        video_id category                                         video_path
0  Abuse027_x264  anomaly  /Users/Video anomaly/data/raw_videos/anomaly/A...
1  Abuse026_x264  anomaly  /Users/Video anomaly/data/raw_videos/anomaly/A...
2  Abuse019_x264  anomaly  /Users/Video anomaly/data/raw_videos/anomaly/A...
3  Abuse018_x264  anomaly  /Users/Video anomaly/data/raw_videos/anomaly/A...
4  Abuse013_x264  anomaly  /Users/Video anomaly/data/raw_videos/anomaly/A...
Checking file: /Users/Video anomaly/data/raw_videos/anomaly/Abuse027_x264.mp4
Exists: True


In [2]:
df['video_path'] = df['video_path'].str.strip()
df['video_exists'] = df['video_path'].apply(lambda x: os.path.exists(x))

In [3]:
df['video_path'] = df['video_path'].apply(lambda x: os.path.abspath(x))

In [4]:
df['features_path'] = df['video_path'].str.replace(
    "raw_videos", "processed/features"
).str.replace(".mp4", ".npy")

# Check if the first feature file exists
print("Feature Exists:", os.path.exists(df['features_path'][0]))

Feature Exists: False


In [5]:
import os
import pandas as pd

csv_path = "/Users/Video anomaly/data/video_paths.csv"
df = pd.read_csv(csv_path)

# Generate feature file paths based on video paths
df['features_path'] = df['video_path'].str.replace(
    "raw_videos", "processed/features"
).str.replace(".mp4", ".npy")

# Check if feature files exist
df['features_exists'] = df['features_path'].apply(lambda x: os.path.exists(x))

# Count missing features
missing_features = df[~df['features_exists']]
print(f"❌ Missing features: {len(missing_features)}/{len(df)}")

# Print the first missing feature path
if not missing_features.empty:
    print("🚨 First missing feature file:", missing_features['features_path'].iloc[0])

❌ Missing features: 199/199
🚨 First missing feature file: /Users/Video anomaly/data/processed/features/anomaly/Abuse027_x264.npy


In [14]:
import pandas as pd

# Load CSVs
video_path_db = pd.read_csv(r"/Users/Video anomaly/data/processed/video_path_db.csv")
video_metadata = pd.read_csv(r"/Users/Video anomaly/data/processed/metadata/video_metadata.csv")

# Print column names
print("Columns in video_path_db.csv:", video_path_db.columns.tolist())
print("Columns in video_metadata.csv:", video_metadata.columns.tolist())

Columns in video_path_db.csv: ['video_id', 'category', 'video_path_abs']
Columns in video_metadata.csv: ['video_id', 'category', 'label', 'features_path', 'frames_dir', 'frame_count', 'mean_motion', 'max_motion', 'std_motion']


In [15]:
import pandas as pd
import os

# Define file paths
video_path_db_path = r"/Users/Video anomaly/data/processed/video_path_db.csv"
video_metadata_path = r"/Users/Video anomaly/data/processed/metadata/video_metadata.csv"

# Load CSVs
df_paths = pd.read_csv(video_path_db_path)
df_metadata = pd.read_csv(video_metadata_path)

# Ensure required columns exist
if "video_id" not in df_paths.columns or "video_path_abs" not in df_paths.columns:
    raise KeyError("video_path_db.csv must contain 'video_id' and 'video_path_abs' columns")

if "video_id" not in df_metadata.columns or "features_path" not in df_metadata.columns:
    raise KeyError("video_metadata.csv must contain 'video_id' and 'features_path' columns")

# Merge both dataframes on 'video_id' and 'category'
df = pd.merge(df_metadata, df_paths, on=['video_id', 'category'], how='left')

# Check for missing paths
df['video_exists'] = df['video_path_abs'].apply(lambda x: os.path.exists(str(x)) if pd.notna(x) else False)
df['features_exists'] = df['features_path'].apply(lambda x: os.path.exists(str(x)) if pd.notna(x) else False)

# Show missing file paths
missing_videos = df[~df['video_exists']]
missing_features = df[~df['features_exists']]

print(f"Total Videos: {len(df)}")
print(f"Missing Videos: {len(missing_videos)}")
print(f"Missing Features: {len(missing_features)}")

# Save results
df.to_csv(r"/Users/Video anomaly/data/processed/merged_video_data.csv", index=False)
print("\n✅ Merged data saved to 'merged_video_data.csv'")

Total Videos: 199
Missing Videos: 0
Missing Features: 199

✅ Merged data saved to 'merged_video_data.csv'
