In [1]:
import os
import pickle
import numpy as np
import pandas as pd
import cv2
import librosa
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# ---------------- CONFIG ----------------
BASE_DIR = r"C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder"
INPUTS = [
    ("1.mp4", "1.txt"),
    ("2.mp4", "2.txt"),
    ("3.mp4", "3.txt"),
    ("4.mp4", "4.txt"),
    ("5.mp4", "5.txt"),
]
MODEL_PATH = os.path.join(BASE_DIR, "mood_model.pkl")
OUT_CSV = os.path.join(BASE_DIR, "predictions.csv")

# ---------------- HELPERS ----------------
def extract_video_features(video_path, frame_skip=30):
    """Extract average color histograms as simple scene features."""
    cap = cv2.VideoCapture(video_path)
    frames = []
    idx = 0
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        if idx % frame_skip == 0:
            hist = cv2.calcHist([frame], [0, 1, 2], None, [8, 8, 8],
                                [0, 256, 0, 256, 0, 256])
            hist = cv2.normalize(hist, hist).flatten()
            frames.append(hist)
        idx += 1
    cap.release()
    return np.array(frames).mean(axis=0)

def extract_audio_features(video_path):
    """Extract MFCC audio features."""
    try:
        y, sr = librosa.load(video_path, sr=22050)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        return mfcc.mean(axis=1)
    except Exception as e:
        print(f"[WARN] Audio extraction failed for {video_path}: {e}")
        return np.zeros(13)

def extract_text_features(sub_path):
    """Extract sentiment score from subtitles/text."""
    analyzer = SentimentIntensityAnalyzer()
    try:
        with open(sub_path, "r", encoding="utf-8") as f:
            text = f.read()
        score = analyzer.polarity_scores(text)
        return np.array([score["neg"], score["neu"], score["pos"], score["compound"]])
    except Exception as e:
        print(f"[WARN] Text extraction failed for {sub_path}: {e}")
        return np.zeros(4)

# ---------------- MAIN ----------------
def main():
    if not os.path.exists(MODEL_PATH):
        raise FileNotFoundError(f"Model not found at {MODEL_PATH}. Run training first.")

    # Load trained model
    with open(MODEL_PATH, "rb") as f:
        model = pickle.load(f)

    results = []

    # Predict for each video+subtitle pair
    for vid, sub in INPUTS:
        vpath = os.path.join(BASE_DIR, "archive", vid)
        spath = os.path.join(BASE_DIR, "archive", sub)

        print(f"[INFO] Predicting {vpath} + {spath}")

        vfeat = extract_video_features(vpath)
        afeat = extract_audio_features(vpath)
        tfeat = extract_text_features(spath)

        feat = np.concatenate([vfeat, afeat, tfeat]).reshape(1, -1)

        pred = model.predict(feat)[0]
        prob = model.predict_proba(feat)[0].max()

        results.append({
            "video": vid,
            "subtitle": sub,
            "predicted_label": int(pred),
            "confidence": float(prob)
        })

    # Save results to CSV
    df = pd.DataFrame(results)
    df.to_csv(OUT_CSV, index=False)

    print(f"[DONE] Predictions saved to {OUT_CSV}")
    print(df)

if __name__ == "__main__":
    main()


[INFO] Predicting C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\1.mp4 + C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\1.txt


  y, sr = librosa.load(video_path, sr=22050)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


[WARN] Audio extraction failed for C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\1.mp4: 
[INFO] Predicting C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\2.mp4 + C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\2.txt


  y, sr = librosa.load(video_path, sr=22050)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


[WARN] Audio extraction failed for C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\2.mp4: 
[INFO] Predicting C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\3.mp4 + C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\3.txt


  y, sr = librosa.load(video_path, sr=22050)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


[WARN] Audio extraction failed for C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\3.mp4: 
[INFO] Predicting C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\4.mp4 + C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\4.txt


  y, sr = librosa.load(video_path, sr=22050)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


[WARN] Audio extraction failed for C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\4.mp4: 
[INFO] Predicting C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\5.mp4 + C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\5.txt
[WARN] Audio extraction failed for C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\archive\5.mp4: 
[DONE] Predictions saved to C:\Users\NXTWAVE\Downloads\AI Movie Scene Finder\predictions.csv
   video subtitle  predicted_label  confidence
0  1.mp4    1.txt                1    0.989873
1  2.mp4    2.txt                1    1.000000
2  3.mp4    3.txt                1    0.992424
3  4.mp4    4.txt                0    0.982196
4  5.mp4    5.txt                0    0.918798


  y, sr = librosa.load(video_path, sr=22050)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
