In [1]:
import os
import json
import yaml
import h5py
import pickle
import librosa
import numpy as np
import pandas as pd
import cv2
from datetime import datetime
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# ---------------- CONFIG ----------------
BASE_DIR = r"C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder"
INPUTS = [
    ("1.mp4", "1.txt"),
    ("2.mp4", "2.txt"),
    ("3.mp4", "3.txt"),
    ("4.mp4", "4.txt"),
    ("5.mp4", "5.txt"),
]
OUT_H5 = os.path.join(BASE_DIR, "processed_video.h5")
OUT_SCENE_MODEL = os.path.join(BASE_DIR, "scene_model.pkl")
OUT_MOOD_MODEL = os.path.join(BASE_DIR, "mood_model.pkl")
OUT_JSON = os.path.join(BASE_DIR, "insights.json")
OUT_YAML = os.path.join(BASE_DIR, "build_metadata.yaml")

# ---------------- HELPERS ----------------
def extract_video_features(video_path, frame_skip=30):
    """Extract average color histograms as simple scene features."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if idx % frame_skip == 0:
            hist = cv2.calcHist([frame], [0, 1, 2], None, [8, 8, 8],
                                [0, 256, 0, 256, 0, 256])
            hist = cv2.normalize(hist, hist).flatten()
            frames.append(hist)
        idx += 1
    cap.release()
    return np.array(frames)

def extract_audio_features(video_path):
    """Extract MFCC audio features."""
    try:
        y, sr = librosa.load(video_path, sr=22050)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        return mfcc.mean(axis=1)
    except Exception as e:
        print(f"[WARN] Audio extraction failed for {video_path}: {e}")
        return np.zeros(13)

def extract_text_features(sub_path):
    """Extract sentiment score from subtitles/text."""
    analyzer = SentimentIntensityAnalyzer()
    try:
        with open(sub_path, "r", encoding="utf-8") as f:
            text = f.read()
        score = analyzer.polarity_scores(text)
        return np.array([score["neg"], score["neu"], score["pos"], score["compound"]])
    except Exception as e:
        print(f"[WARN] Text extraction failed for {sub_path}: {e}")
        return np.zeros(4)

# ---------------- MAIN ----------------
def main():
    os.makedirs(BASE_DIR, exist_ok=True)

    all_features = []
    all_labels = []  # dummy labels for training

    # Collect features
    for vid, sub in INPUTS:
        vpath = os.path.join(BASE_DIR, "archive", vid)
        spath = os.path.join(BASE_DIR, "archive", sub)
        print(f"[INFO] Processing {vpath} + {spath}")

        vfeat = extract_video_features(vpath).mean(axis=0)
        afeat = extract_audio_features(vpath)
        tfeat = extract_text_features(spath)

        feat = np.concatenate([vfeat, afeat, tfeat])
        all_features.append(feat)
        all_labels.append(np.random.randint(0, 2))  # fake labels (0/1)

    X = np.array(all_features)
    y = np.array(all_labels)

    # Save to HDF5
    with h5py.File(OUT_H5, "w") as hf:
        hf.create_dataset("features", data=X)
        hf.create_dataset("labels", data=y)

    # Train dummy models
    pipe = Pipeline([
        ("scaler", StandardScaler()),
        ("clf", LogisticRegression())
    ])
    pipe.fit(X, y)

    with open(OUT_SCENE_MODEL, "wb") as f:
        pickle.dump(pipe, f)
    with open(OUT_MOOD_MODEL, "wb") as f:
        pickle.dump(pipe, f)

    # Save insights.json
    insights = {
        "num_videos": len(INPUTS),
        "feature_shape": X.shape,
        "label_distribution": {int(k): int(v) for k, v in zip(*np.unique(y, return_counts=True))}
    }
    with open(OUT_JSON, "w") as f:
        json.dump(insights, f, indent=4)

    # Save metadata.yaml
    metadata = {
        "project": "SceneSense",
        "generated_on": datetime.now().isoformat(),
        "inputs": [f"{v}+{s}" for v, s in INPUTS],
        "outputs": {
            "h5": OUT_H5,
            "scene_model": OUT_SCENE_MODEL,
            "mood_model": OUT_MOOD_MODEL,
            "json": OUT_JSON,
            "yaml": OUT_YAML
        }
    }
    with open(OUT_YAML, "w") as f:
        yaml.dump(metadata, f)

    print("[DONE] Outputs written to", BASE_DIR)

if __name__ == "__main__":
    main()


[INFO] Processing C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\1.mp4 + C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\1.txt


  y, sr = librosa.load(video_path, sr=22050)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


[WARN] Audio extraction failed for C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\1.mp4: 
[INFO] Processing C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\2.mp4 + C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\2.txt
[WARN] Audio extraction failed for C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\2.mp4: 
[INFO] Processing C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\3.mp4 + C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\3.txt
[WARN] Audio extraction failed for C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\3.mp4: 
[INFO] Processing C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\4.mp4 + C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\4.txt
[WARN] Audio extraction failed for C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\4.mp4: 
[INFO] Processing C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\5.mp4 + C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\5.txt
[WARN] Audio ext