In [1]:
# ============================================================
# 1. Imports & Setup
# ============================================================
import pandas as pd
import subprocess
import json
import yt_dlp
import warnings
warnings.filterwarnings("ignore")


# ============================================================
# 2. Movie List (30 Movies: Blockbusters + Hits + Flops)
# ============================================================
movies = [
    # ----------------- BLOCKBUSTERS -----------------
    ("Avengers: Endgame", 2019, "blockbuster"),
    ("Avatar: The Way of Water", 2022, "blockbuster"),
    ("Spider-Man: No Way Home", 2021, "blockbuster"),
    ("Top Gun: Maverick", 2022, "blockbuster"),
    ("Barbie", 2023, "blockbuster"),
    ("Oppenheimer", 2023, "blockbuster"),
    ("Frozen II", 2019, "blockbuster"),
    ("Joker", 2019, "blockbuster"),
    ("Black Panther", 2018, "blockbuster"),
    ("Incredibles 2", 2018, "blockbuster"),

    # ---------------------- HITS ---------------------
    ("Dune", 2021, "hit"),
    ("The Batman", 2022, "hit"),
    ("Interstellar", 2014, "hit"),
    ("Inception", 2010, "hit"),
    ("John Wick", 2014, "hit"),
    ("The Hunger Games", 2012, "hit"),
    ("Get Out", 2017, "hit"),
    ("La La Land", 2016, "hit"),
    ("Mission Impossible: Fallout", 2018, "hit"),
    ("Jumanji: Welcome to the Jungle", 2017, "hit"),

    # ---------------------- FLOPS --------------------
    ("The Flash", 2023, "flop"),
    ("Morbius", 2022, "flop"),
    ("Cats", 2019, "flop"),
    ("Green Lantern", 2011, "flop"),
    ("The Mummy", 2017, "flop"),
    ("John Carter", 2012, "flop"),  
    ("Dark Phoenix", 2019, "flop"),
    ("Pan", 2015, "flop"),
    ("Jupiter Ascending", 2015, "flop"),
    ("King Arthur: Legend of the Sword", 2017, "flop"),
]

df_movies = pd.DataFrame(movies, columns=["title", "year", "category"])

# Add empty fields for later population
df_movies["trailer_url"] = None
df_movies["youtube_id"] = None
df_movies["trends_keyword"] = df_movies["title"]



# ============================================================
# 3. Year-Aware Trailer Fetcher
# ============================================================
def get_trailer(title, year):
    """
    Search YouTube using yt-dlp.
    Priority:
    1. Search top 5 results for one containing the YEAR
    2. Else fallback to first result
    """
    query = f"ytsearch5:{title} official trailer"
    command = ["yt-dlp", query, "--dump-json", "--no-check-certificate"]

    try:
        result = subprocess.run(
            command,
            capture_output=True,
            text=True
        )

        # Parse each line (yt-dlp outputs 1 JSON per line)
        entries = []
        for line in result.stdout.splitlines():
            try:
                entries.append(json.loads(line))
            except:
                pass

        # Try to match the YEAR in the title
        for e in entries:
            if str(year) in str(e.get("title", "")):
                return e.get("webpage_url"), e.get("id")

        # Fallback to the first result if no year match
        if len(entries) > 0:
            return entries[0].get("webpage_url"), entries[0].get("id")

        return None, None

    except Exception as e:
        print(f"Error fetching trailer for {title}: {e}")
        return None, None



# ============================================================
# 4. Loop Over Movies and Populate Trailer Info
# ============================================================
trailer_urls = []
youtube_ids = []

for title, year in zip(df_movies["title"], df_movies["year"]):
    url, vid_id = get_trailer(title, year)
    trailer_urls.append(url)
    youtube_ids.append(vid_id)

df_movies["trailer_url"] = trailer_urls
df_movies["youtube_id"] = youtube_ids



# ============================================================
# 5. Final Output
# ============================================================
df_movies


Unnamed: 0,title,year,category,trailer_url,youtube_id,trends_keyword
0,Avengers: Endgame,2019,blockbuster,https://www.youtube.com/watch?v=TcMBFSGVi1c,TcMBFSGVi1c,Avengers: Endgame
1,Avatar: The Way of Water,2022,blockbuster,https://www.youtube.com/watch?v=d9MyW72ELq0,d9MyW72ELq0,Avatar: The Way of Water
2,Spider-Man: No Way Home,2021,blockbuster,https://www.youtube.com/watch?v=JfVOs4VSpmA,JfVOs4VSpmA,Spider-Man: No Way Home
3,Top Gun: Maverick,2022,blockbuster,https://www.youtube.com/watch?v=qSqVVswa420,qSqVVswa420,Top Gun: Maverick
4,Barbie,2023,blockbuster,https://www.youtube.com/watch?v=GZuubCclyS0,GZuubCclyS0,Barbie
5,Oppenheimer,2023,blockbuster,https://www.youtube.com/watch?v=bK6ldnjE3Y0,bK6ldnjE3Y0,Oppenheimer
6,Frozen II,2019,blockbuster,https://www.youtube.com/watch?v=suVQt0pfOLc,suVQt0pfOLc,Frozen II
7,Joker,2019,blockbuster,https://www.youtube.com/watch?v=zAGVQLHvwOY,zAGVQLHvwOY,Joker
8,Black Panther,2018,blockbuster,https://www.youtube.com/watch?v=xjDjIWPwcPU,xjDjIWPwcPU,Black Panther
9,Incredibles 2,2018,blockbuster,https://www.youtube.com/watch?v=c8GELmpvzuk,c8GELmpvzuk,Incredibles 2


In [2]:
# =============================================================
#   üéû TRAILER EMOTION PIPELINE (TRANSFER LEARNING + FINE-TUNE READY)
# =============================================================

import os
import cv2
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm
import onnxruntime as ort
from insightface.app import FaceAnalysis

# PyTorch (fine-tuned model support)
import torch
import torch.nn as nn


# =============================================================
# 0. CONFIG
# =============================================================

TRAILER_DIR = "/Users/shrey24/Desktop/trailers"
EMOTION_MODEL_PATH = "emotion-ferplus-8.onnx"

USE_FINETUNED_MODEL = False             # ‚Üê flip to True after training
SAVE_FACES = True                  # ‚Üê flip to True to build training dataset
DATASET_SAVE_DIR = "face_dataset"
os.makedirs(DATASET_SAVE_DIR, exist_ok=True)


# =============================================================
# 1. LOAD MODELS (SCRFD + FER+ + Fine-Tuned Model Option)
# =============================================================

# ---------- SCRFD FACE DETECTOR ----------
face_app = FaceAnalysis(
    name="scrfd_2.5g",
    providers=["CPUExecutionProvider"]
)
face_app.prepare(ctx_id=0, det_size=(640, 640))


# ---------- ONNX FER+ MODEL ----------
emotion_sess = ort.InferenceSession(
    EMOTION_MODEL_PATH,
    providers=["CPUExecutionProvider"]
)

EMO_LABELS = [
    "neutral", "happiness", "surprise",
    "sadness", "anger", "disgust", "fear", "contempt"
]


# ---------- FINETUNED PYTORCH MODEL ----------
class FineTunedEmotionNet(nn.Module):
    def __init__(self, num_classes=8):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.AdaptiveAvgPool2d((1,1))
        )
        self.classifier = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)


if USE_FINETUNED_MODEL:
    FINETUNED_PATH = "finetuned_trailer_emotion.pth"
    ft_model = FineTunedEmotionNet()
    ft_model.load_state_dict(torch.load(FINETUNED_PATH, map_location="cpu"))
    ft_model.eval()
else:
    ft_model = None


# =============================================================
# 2. HELPERS
# =============================================================

def preprocess_face(face_crop):
    if face_crop is None or face_crop.size == 0:
        return None

    gray = cv2.cvtColor(face_crop, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(gray, (64, 64))

    # FER+ expects raw 0‚Äì255 float input, NOT normalized to 0‚Äì1
    tensor = resized.astype("float32")
    
    # reshape to (1, 1, 64, 64)
    tensor = tensor.reshape(1, 1, 64, 64)

    return tensor



# ---------- FACE SAVER FOR FINE-TUNING ----------
def save_face_for_training(face_crop, movie_title, timestamp):
    # skip invalid or empty crops
    if face_crop is None or face_crop.size == 0:
        return
    
    movie_dir = os.path.join(DATASET_SAVE_DIR, movie_title)
    os.makedirs(movie_dir, exist_ok=True)
    
    filename = os.path.join(movie_dir, f"{timestamp:.2f}.jpg")
    cv2.imwrite(filename, face_crop)


def auto_label_face(face_crop, movie_title, timestamp):
    """Use FER+ to auto-label and write to labels.csv."""
    emo_dict = predict_emotion(face_crop)  # FER+ runs here

    if emo_dict is None:
        return

    # pick max prob emotion
    emo = max(emo_dict, key=emo_dict.get)

    # Ensure movie folder exists
    movie_dir = os.path.join(DATASET_SAVE_DIR, movie_title)
    os.makedirs(movie_dir, exist_ok=True)

    # Append to CSV
    label_path = os.path.join(movie_dir, "labels.csv")
    file_exists = os.path.exists(label_path)

    with open(label_path, "a") as f:
        if not file_exists:
            f.write("filename,emotion\n")
        f.write(f"{timestamp:.2f}.jpg,{emo}\n")



# =============================================================
# 3. UNIFIED EMOTION PREDICTOR (FER+ or FINETUNED MODEL)
# =============================================================
def predict_emotion(face_crop):
    tensor = preprocess_face(face_crop)
    if tensor is None:
        return None

    # ----- CASE 1: USE FINE-TUNED MODEL -----
    if USE_FINETUNED_MODEL and ft_model is not None:
        with torch.no_grad():
            x = torch.tensor(tensor).float()
            logits = ft_model(x)
            probs = torch.softmax(logits, dim=1)[0].numpy()
        return dict(zip(EMO_LABELS, probs))

    # ----- CASE 2: FER+ (ONNX) -----
    input_name = emotion_sess.get_inputs()[0].name   # <-- FIXES NEUTRAL ISSUE
    ort_inputs = {input_name: tensor}

    raw = emotion_sess.run(None, ort_inputs)[0][0]
    probs = np.exp(raw) / np.exp(raw).sum()
    return dict(zip(EMO_LABELS, probs))


# =============================================================
# 4. EMOTION MAPPINGS (Arousal + Valence)
# =============================================================
AROUSAL_EMOTIONS = ["happiness", "surprise", "fear", "anger"]
NEGATIVE_EMOTIONS = ["fear", "anger", "sadness"]
POSITIVE_EMOTIONS = ["happiness"]


# =============================================================
# 5. FRAME ANALYSIS (SCRFD ‚Üí Face ‚Üí Emotion)
# =============================================================
def analyze_trailer_frames(video_path, frame_sample_rate=1.5):

    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS) or 24
    frame_interval = int(frame_sample_rate * fps)

    rows = []
    frame_idx = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    movie_title = os.path.basename(video_path).replace(".mp4","")

    pbar = tqdm(total=total_frames, desc="Frames", leave=False)

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx % frame_interval == 0:

            faces = face_app.get(frame)

            if len(faces) > 0:
                H, W = frame.shape[:2]
                best_face = None
                best_dist = 1e9

                # pick most central face
                for f in faces:
                    x1, y1, x2, y2 = map(int, f.bbox)
                    cx = (x1 + x2) / 2
                    cy = (y1 + y2) / 2
                    dist = (cx - W/2)**2 + (cy - H/2)**2
                    if dist < best_dist:
                        best_dist = dist
                        best_face = (x1, y1, x2, y2)

                if best_face:
                    x1, y1, x2, y2 = best_face
                    face_crop = frame[y1:y2, x1:x2]

                    # üö® Skip tiny or invalid face crops
                    if face_crop is None or face_crop.size == 0:
                        continue
                    if face_crop.shape[0] < 20 or face_crop.shape[1] < 20:
                        continue

                    # SAVE FOR FINE-TUNING
                    if SAVE_FACES:
                        timestamp = frame_idx / fps
                        save_face_for_training(face_crop, movie_title, timestamp)
                        auto_label_face(face_crop, movie_title, timestamp)

                    emo = predict_emotion(face_crop)
                    if emo:
                        rows.append(emo)

        frame_idx += 1
        pbar.update(1)

    pbar.close()
    cap.release()

    if not rows:
        return None

    df = pd.DataFrame(rows)
    df["time_sec"] = np.arange(len(df)) * frame_sample_rate

    df[EMO_LABELS] = df[EMO_LABELS].div(df[EMO_LABELS].sum(axis=1), axis=0)

    df["arousal"] = df[AROUSAL_EMOTIONS].sum(axis=1)
    df["valence"] = df[POSITIVE_EMOTIONS].sum(axis=1) - df[NEGATIVE_EMOTIONS].sum(axis=1)

    return df



# =============================================================
# 6. AUDIO RMS
# =============================================================
def extract_audio_rms(video_path, sr_target=22050, hop=1024):
    y, sr = librosa.load(video_path, sr=sr_target, mono=True)
    rms = librosa.feature.rms(y=y, hop_length=hop)[0]
    t = librosa.frames_to_time(np.arange(len(rms)), sr=sr, hop_length=hop)
    return t, rms


# =============================================================
# 7. GENRE MISMATCH
# =============================================================
GENRE_PROTOTYPES = {
    "Action": {"arousal":0.8,"valence":0.1},
    "Adventure": {"arousal":0.7,"valence":0.2},
    "Animation": {"arousal":0.6,"valence":0.4},
    "Comedy": {"arousal":0.5,"valence":0.5},
    "Drama": {"arousal":0.4,"valence":0.1},
    "Horror": {"arousal":0.9,"valence":-0.3},
    "Thriller": {"arousal":0.85,"valence":-0.1},
    "Family": {"arousal":0.5,"valence":0.4},
    "Romance": {"arousal":0.4,"valence":0.3},
    "Science Fiction": {"arousal":0.7,"valence":0.1},
}

def compute_genre_mismatch(df, genre_label):
    if df is None:
        return np.nan

    if genre_label not in GENRE_PROTOTYPES:
        return np.nan

    mean_ar = df["arousal"].mean()
    mean_val = df["valence"].mean()
    proto = GENRE_PROTOTYPES[genre_label]

    return np.sqrt(
        (mean_ar - proto["arousal"])**2 +
        (mean_val - proto["valence"])**2
    )


# =============================================================
# 8. BEST TRAILER MATCH
# =============================================================
files = [f for f in os.listdir(TRAILER_DIR) if f.lower().endswith(".mp4")]
clean_map = {f: f.replace(".mp4","").lower().strip() for f in files}
from difflib import get_close_matches

def find_best_trailer(title):
    title_clean = title.lower().strip()
    match = get_close_matches(title_clean, clean_map.values(), n=1, cutoff=0.6)
    if not match:
        return None
    for fname, clean in clean_map.items():
        if clean == match[0]:
            return os.path.join(TRAILER_DIR, fname)


# =============================================================
# 9. MAIN FEATURE EXTRACTOR
# =============================================================
def extract_trailer_emotion_features(video_path, genre_label=None):

    df = analyze_trailer_frames(video_path)
    if df is None:
        return {"emotion_valid": 0}

    t = df["time_sec"].values
    ar = df["arousal"].values
    val = df["valence"].values

    feats = {
        "emotion_valid": 1,
        "excitement_peak_density": (ar > np.quantile(ar, 0.75)).mean(),
        "fear_peak_density": (df["fear"] > np.quantile(df["fear"], 0.75)).mean(),
        "emotional_arc_slope": np.polyfit(t, val, 1)[0] if len(t) > 1 else np.nan,
        "arousal_volatility": np.std(ar),
        "genre_emotion_mismatch": compute_genre_mismatch(df, genre_label),
    }

    for emo in ["happiness", "fear", "sadness", "arousal", "valence"]:
        feats[f"emo_emb_{emo}_mean"] = df[emo].mean()

    ta, rms = extract_audio_rms(video_path)
    mask = ta <= t[-1]

    if mask.sum() > 5:
        a_interp = np.interp(ta[mask], t, ar)
        feats["music_emotion_corr"] = np.corrcoef(a_interp, rms[mask])[0, 1]
    else:
        feats["music_emotion_corr"] = np.nan

    return feats


Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/shrey24/.insightface/models/scrfd_2.5g/scrfd_2.5g.onnx detection [1, 3, '?', '?'] 127.5 128.0
set det-size: (640, 640)


In [3]:
df_movies["trailer_path"] = df_movies["title"].apply(find_best_trailer)

emotion_rows = []
for _, r in df_movies.iterrows():
    print("üé¨", r["title"])
    feats = extract_trailer_emotion_features(r["trailer_path"], r.get("genre"))
    feats["title"] = r["title"]
    emotion_rows.append(feats)

df_emotion = pd.DataFrame(emotion_rows)
display(df_emotion)


üé¨ Avengers: Endgame


                                                            

üé¨ Avatar: The Way of Water


                                                            

üé¨ Spider-Man: No Way Home


                                                            

üé¨ Top Gun: Maverick


                                                            

üé¨ Barbie


                                                             

üé¨ Oppenheimer


                                                            

üé¨ Frozen II


                                                             

üé¨ Joker


                                                            

üé¨ Black Panther


                                                            

üé¨ Incredibles 2


                                                            

üé¨ Dune


                                                             

üé¨ The Batman


                                                             

üé¨ Interstellar


                                                             

üé¨ Inception


                                                            

üé¨ John Wick


                                                            

üé¨ The Hunger Games


                                                            

üé¨ Get Out


                                                            

üé¨ La La Land


                                                            

üé¨ Mission Impossible: Fallout


                                                             

üé¨ Jumanji: Welcome to the Jungle


                                                             

üé¨ The Flash


                                                                

üé¨ Morbius


                                                            

üé¨ Cats


                                                             

üé¨ Green Lantern


                                                             

üé¨ The Mummy


                                                             

üé¨ John Carter


                                                             

üé¨ Dark Phoenix


                                                             

üé¨ Pan


                                                             

üé¨ Jupiter Ascending


                                                             

üé¨ King Arthur: Legend of the Sword


                                                             

Unnamed: 0,emotion_valid,excitement_peak_density,fear_peak_density,emotional_arc_slope,arousal_volatility,genre_emotion_mismatch,emo_emb_happiness_mean,emo_emb_fear_mean,emo_emb_sadness_mean,emo_emb_arousal_mean,emo_emb_valence_mean,music_emotion_corr,title
0,1,0.255319,0.255319,0.001413,0.14163,,0.043073,0.000733,0.12879,0.082249,-0.115831,0.180361,Avengers: Endgame
1,1,0.263158,0.263158,-0.003414,0.327082,,0.125967,0.011369,0.103021,0.219728,0.000615,0.212467,Avatar: The Way of Water
2,1,0.254902,0.254902,-0.005491,0.319219,,0.177731,0.000582,0.109718,0.224711,0.026007,0.019042,Spider-Man: No Way Home
3,1,0.269231,0.269231,-0.002649,0.27943,,0.213196,0.00135,0.096297,0.237388,0.102345,-0.159983,Top Gun: Maverick
4,1,0.261905,0.261905,-0.007611,0.396182,,0.325242,0.012539,0.082294,0.372972,0.226864,-0.178338,Barbie
5,1,0.25,0.25,0.002182,0.094521,,0.035162,0.001086,0.170852,0.06116,-0.152014,0.424367,Oppenheimer
6,1,0.277778,0.277778,-0.002644,0.233632,,0.035907,0.024676,0.044519,0.189399,-0.046925,0.07993,Frozen II
7,1,0.257576,0.257576,0.003309,0.355627,,0.244318,0.000847,0.09957,0.277823,0.119234,0.124842,Joker
8,1,0.25,0.25,0.003159,0.286395,,0.142109,0.000966,0.076588,0.175195,0.044596,-0.00473,Black Panther
9,1,0.25,0.25,0.00606,0.374458,,0.192818,0.035334,0.082578,0.452143,0.037109,0.118116,Incredibles 2


In [4]:
# =============================================================
#   üéì FINE-TUNING EMOTION MODEL ‚Äî NO NUMPY VERSION
# =============================================================

import os
import pandas as pd
from PIL import Image, ImageOps
import random

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


DATASET_DIR = "face_dataset"
SAVE_MODEL_PATH = "finetuned_trailer_emotion.pth"
BATCH_SIZE = 32
LR = 1e-4
EPOCHS = 10
DEVICE = "mps" if torch.backends.mps.is_available() else "cpu"

EMO_LABELS = [
    "neutral", "happiness", "surprise",
    "sadness", "anger", "disgust", "fear", "contempt"
]
EMO2IDX = {e: i for i, e in enumerate(EMO_LABELS)}


# -------------------------------------------------------------
# 1. MANUAL TRANSFORMS (NO NUMPY, NO TORCHVISION)
# -------------------------------------------------------------
def pil_to_tensor(img):
    """Convert PIL grayscale image to torch tensor (1,64,64) without numpy."""
    data = img.tobytes()             # raw bytes
    tensor = torch.ByteTensor(torch.ByteStorage.from_buffer(data))
    tensor = tensor.view(64, 64).float() / 255.0
    return tensor.unsqueeze(0)


def random_horizontal_flip(img, p=0.5):
    return img.transpose(Image.FLIP_LEFT_RIGHT) if random.random() < p else img


def random_rotation(img, max_deg=8):
    deg = random.uniform(-max_deg, max_deg)
    return img.rotate(deg)


def train_transform(img):
    img = ImageOps.grayscale(img)
    img = img.resize((64, 64))
    img = random_horizontal_flip(img)
    img = random_rotation(img)
    return pil_to_tensor(img)


def val_transform(img):
    img = ImageOps.grayscale(img)
    img = img.resize((64, 64))
    return pil_to_tensor(img)


# -------------------------------------------------------------
# 2. DATASET
# -------------------------------------------------------------
class EmotionDataset(Dataset):
    def __init__(self, root, transform):
        self.items = []
        self.transform = transform

        for movie in sorted(os.listdir(root)):
            csv_path = os.path.join(root, movie, "labels.csv")
            if not os.path.exists(csv_path):
                continue

            df = pd.read_csv(csv_path)
            for _, r in df.iterrows():
                imgp = os.path.join(root, movie, r["filename"])
                if os.path.exists(imgp):
                    self.items.append((imgp, EMO2IDX[r["emotion"]]))

        print("Loaded samples:", len(self.items))

    def __len__(self):
        return len(self.items)

    def __getitem__(self, idx):
        path, label = self.items[idx]
        img = Image.open(path)
        img = self.transform(img)
        return img, label


# -------------------------------------------------------------
# 3. MODEL
# -------------------------------------------------------------
class FineTunedEmotionNet(nn.Module):
    def __init__(self, num_classes=8):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.AdaptiveAvgPool2d((1,1))
        )
        self.classifier = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)


# -------------------------------------------------------------
# 4. LOAD DATA
# -------------------------------------------------------------
full_ds = EmotionDataset(DATASET_DIR, transform=None)

val_size = int(0.2 * len(full_ds))
train_size = len(full_ds) - val_size

train_ds, val_ds = torch.utils.data.random_split(full_ds, [train_size, val_size])

train_ds.dataset.transform = train_transform
val_ds.dataset.transform = val_transform

train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE)


# -------------------------------------------------------------
# 5. TRAINING
# -------------------------------------------------------------
model = FineTunedEmotionNet().to(DEVICE)
criterion = nn.CrossEntropyLoss()
opt = optim.Adam(model.parameters(), lr=LR)

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0

    for X, y in train_loader:
        X, y = X.to(DEVICE), y.to(DEVICE)
        opt.zero_grad()
        logits = model(X)
        loss = criterion(logits, y)
        loss.backward()
        opt.step()
        total_loss += loss.item()

    # validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X, y in val_loader:
            X, y = X.to(DEVICE), y.to(DEVICE)
            preds = model(X).argmax(1)
            correct += (preds == y).sum().item()
            total += len(y)

    print(f"Epoch {epoch+1}/{EPOCHS} | Loss={total_loss:.3f} | Val Acc={correct/total:.3f}")

torch.save(model.state_dict(), SAVE_MODEL_PATH)
print("Saved:", SAVE_MODEL_PATH)


Loaded samples: 2828
Epoch 1/10 | Loss=141.810 | Val Acc=0.869
Epoch 2/10 | Loss=106.124 | Val Acc=0.869
Epoch 3/10 | Loss=64.638 | Val Acc=0.869
Epoch 4/10 | Loss=47.545 | Val Acc=0.869
Epoch 5/10 | Loss=42.685 | Val Acc=0.869
Epoch 6/10 | Loss=40.468 | Val Acc=0.869
Epoch 7/10 | Loss=39.190 | Val Acc=0.869
Epoch 8/10 | Loss=38.297 | Val Acc=0.869
Epoch 9/10 | Loss=37.590 | Val Acc=0.869
Epoch 10/10 | Loss=37.084 | Val Acc=0.869
Saved: finetuned_trailer_emotion.pth


In [5]:
# =============================================================
#   üéû TRAILER EMOTION PIPELINE (TRANSFER LEARNING + FINE-TUNE READY)
# =============================================================

import os
import cv2
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm
import onnxruntime as ort
from insightface.app import FaceAnalysis

# PyTorch (fine-tuned model support)
import torch
import torch.nn as nn


# =============================================================
# 0. CONFIG
# =============================================================

TRAILER_DIR = "/Users/shrey24/Desktop/trailers"
EMOTION_MODEL_PATH = "emotion-ferplus-8.onnx"

USE_FINETUNED_MODEL = True             # ‚Üê flip to True after training
SAVE_FACES = False                  # ‚Üê flip to True to build training dataset
DATASET_SAVE_DIR = "face_dataset"
os.makedirs(DATASET_SAVE_DIR, exist_ok=True)


# =============================================================
# 1. LOAD MODELS (SCRFD + FER+ + Fine-Tuned Model Option)
# =============================================================

# ---------- SCRFD FACE DETECTOR ----------
face_app = FaceAnalysis(
    name="scrfd_2.5g",
    providers=["CPUExecutionProvider"]
)
face_app.prepare(ctx_id=0, det_size=(640, 640))


# ---------- ONNX FER+ MODEL ----------
emotion_sess = ort.InferenceSession(
    EMOTION_MODEL_PATH,
    providers=["CPUExecutionProvider"]
)

EMO_LABELS = [
    "neutral", "happiness", "surprise",
    "sadness", "anger", "disgust", "fear", "contempt"
]


# ---------- FINETUNED PYTORCH MODEL ----------
class FineTunedEmotionNet(nn.Module):
    def __init__(self, num_classes=8):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1), nn.ReLU(),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.AdaptiveAvgPool2d((1,1))
        )
        self.classifier = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)


if USE_FINETUNED_MODEL:
    FINETUNED_PATH = "finetuned_trailer_emotion.pth"
    ft_model = FineTunedEmotionNet()
    ft_model.load_state_dict(torch.load(FINETUNED_PATH, map_location="cpu"))
    ft_model.eval()
else:
    ft_model = None


# =============================================================
# 2. HELPERS
# =============================================================

def preprocess_face(face_crop):
    if face_crop is None or face_crop.size == 0:
        return None

    gray = cv2.cvtColor(face_crop, cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(gray, (64, 64))

    # FER+ expects raw 0‚Äì255 float input, NOT normalized to 0‚Äì1
    tensor = resized.astype("float32")
    
    # reshape to (1, 1, 64, 64)
    tensor = tensor.reshape(1, 1, 64, 64)

    return tensor



# ---------- FACE SAVER FOR FINE-TUNING ----------
def save_face_for_training(face_crop, movie_title, timestamp):
    # skip invalid or empty crops
    if face_crop is None or face_crop.size == 0:
        return
    
    movie_dir = os.path.join(DATASET_SAVE_DIR, movie_title)
    os.makedirs(movie_dir, exist_ok=True)
    
    filename = os.path.join(movie_dir, f"{timestamp:.2f}.jpg")
    cv2.imwrite(filename, face_crop)


def auto_label_face(face_crop, movie_title, timestamp):
    """Use FER+ to auto-label and write to labels.csv."""
    emo_dict = predict_emotion(face_crop)  # FER+ runs here

    if emo_dict is None:
        return

    # pick max prob emotion
    emo = max(emo_dict, key=emo_dict.get)

    # Ensure movie folder exists
    movie_dir = os.path.join(DATASET_SAVE_DIR, movie_title)
    os.makedirs(movie_dir, exist_ok=True)

    # Append to CSV
    label_path = os.path.join(movie_dir, "labels.csv")
    file_exists = os.path.exists(label_path)

    with open(label_path, "a") as f:
        if not file_exists:
            f.write("filename,emotion\n")
        f.write(f"{timestamp:.2f}.jpg,{emo}\n")



# =============================================================
# 3. UNIFIED EMOTION PREDICTOR (FER+ or FINETUNED MODEL)
# =============================================================
def predict_emotion(face_crop):
    tensor = preprocess_face(face_crop)
    if tensor is None:
        return None
    
    # --------------------------------------------------------
    # CASE 1: USE FINETUNED PYTORCH MODEL (NO NUMPY ANYWHERE)
    # --------------------------------------------------------
    if USE_FINETUNED_MODEL and ft_model is not None:

        # manual grayscale ‚Üí float32 array in 0‚Äì1 range (MATCH TRAINING)
        gray = cv2.cvtColor(face_crop, cv2.COLOR_BGR2GRAY)
        resized = cv2.resize(gray, (64, 64))

        # NORMALIZE HERE ‚úî
        arr = resized.astype("float32") / 255.0

        # convert raw bytes ‚Üí float storage
        arr_flat = arr.reshape(-1)
        raw = arr_flat.tobytes()

        storage = torch.UntypedStorage.from_buffer(
            raw,
            dtype=torch.float32,
            byte_order="little"
        )

        x = torch.FloatTensor(storage).view(1, 1, 64, 64)

        with torch.no_grad():
            logits = ft_model(x)
            probs = torch.softmax(logits, dim=1)[0]

        return dict(zip(EMO_LABELS, probs.tolist()))

    # --------------------------------------------------------
    # CASE 2: BASELINE FER+ (ONNX)
    # --------------------------------------------------------
    input_name = emotion_sess.get_inputs()[0].name
    ort_inputs = {input_name: tensor}
    raw = emotion_sess.run(None, ort_inputs)[0][0]

    # softmax manually without numpy
    exp_vals = [torch.exp(torch.tensor(v)).item() for v in raw]
    denom = sum(exp_vals)
    probs_list = [v / denom for v in exp_vals]

    return dict(zip(EMO_LABELS, probs_list))






# =============================================================
# 4. EMOTION MAPPINGS (Arousal + Valence)
# =============================================================
AROUSAL_EMOTIONS = ["happiness", "surprise", "fear", "anger"]
NEGATIVE_EMOTIONS = ["fear", "anger", "sadness"]
POSITIVE_EMOTIONS = ["happiness"]


# =============================================================
# 5. FRAME ANALYSIS (SCRFD ‚Üí Face ‚Üí Emotion)
# =============================================================
def analyze_trailer_frames(video_path, frame_sample_rate=1.5):

    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS) or 24
    frame_interval = int(frame_sample_rate * fps)

    rows = []
    frame_idx = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    movie_title = os.path.basename(video_path).replace(".mp4","")

    pbar = tqdm(total=total_frames, desc="Frames", leave=False)

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_idx % frame_interval == 0:

            faces = face_app.get(frame)

            if len(faces) > 0:
                H, W = frame.shape[:2]
                best_face = None
                best_dist = 1e9

                # pick most central face
                for f in faces:
                    x1, y1, x2, y2 = map(int, f.bbox)
                    cx = (x1 + x2) / 2
                    cy = (y1 + y2) / 2
                    dist = (cx - W/2)**2 + (cy - H/2)**2
                    if dist < best_dist:
                        best_dist = dist
                        best_face = (x1, y1, x2, y2)

                if best_face:
                    x1, y1, x2, y2 = best_face
                    face_crop = frame[y1:y2, x1:x2]

                    # üö® Skip tiny or invalid face crops
                    if face_crop is None or face_crop.size == 0:
                        continue
                    if face_crop.shape[0] < 20 or face_crop.shape[1] < 20:
                        continue

                    # SAVE FOR FINE-TUNING
                    if SAVE_FACES:
                        timestamp = frame_idx / fps
                        save_face_for_training(face_crop, movie_title, timestamp)
                        auto_label_face(face_crop, movie_title, timestamp)

                    emo = predict_emotion(face_crop)
                    if emo:
                        rows.append(emo)

        frame_idx += 1
        pbar.update(1)

    pbar.close()
    cap.release()

    if not rows:
        return None

    df = pd.DataFrame(rows)
    df["time_sec"] = np.arange(len(df)) * frame_sample_rate

    df[EMO_LABELS] = df[EMO_LABELS].div(df[EMO_LABELS].sum(axis=1), axis=0)

    df["arousal"] = df[AROUSAL_EMOTIONS].sum(axis=1)
    df["valence"] = df[POSITIVE_EMOTIONS].sum(axis=1) - df[NEGATIVE_EMOTIONS].sum(axis=1)

    return df



# =============================================================
# 6. AUDIO RMS
# =============================================================
def extract_audio_rms(video_path, sr_target=22050, hop=1024):
    y, sr = librosa.load(video_path, sr=sr_target, mono=True)
    rms = librosa.feature.rms(y=y, hop_length=hop)[0]
    t = librosa.frames_to_time(np.arange(len(rms)), sr=sr, hop_length=hop)
    return t, rms


# =============================================================
# 7. GENRE MISMATCH
# =============================================================
GENRE_PROTOTYPES = {
    "Action": {"arousal":0.8,"valence":0.1},
    "Adventure": {"arousal":0.7,"valence":0.2},
    "Animation": {"arousal":0.6,"valence":0.4},
    "Comedy": {"arousal":0.5,"valence":0.5},
    "Drama": {"arousal":0.4,"valence":0.1},
    "Horror": {"arousal":0.9,"valence":-0.3},
    "Thriller": {"arousal":0.85,"valence":-0.1},
    "Family": {"arousal":0.5,"valence":0.4},
    "Romance": {"arousal":0.4,"valence":0.3},
    "Science Fiction": {"arousal":0.7,"valence":0.1},
}

def compute_genre_mismatch(df, genre_label):
    if df is None:
        return np.nan

    if genre_label not in GENRE_PROTOTYPES:
        return np.nan

    mean_ar = df["arousal"].mean()
    mean_val = df["valence"].mean()
    proto = GENRE_PROTOTYPES[genre_label]

    return np.sqrt(
        (mean_ar - proto["arousal"])**2 +
        (mean_val - proto["valence"])**2
    )


# =============================================================
# 8. BEST TRAILER MATCH
# =============================================================
files = [f for f in os.listdir(TRAILER_DIR) if f.lower().endswith(".mp4")]
clean_map = {f: f.replace(".mp4","").lower().strip() for f in files}
from difflib import get_close_matches

def find_best_trailer(title):
    title_clean = title.lower().strip()
    match = get_close_matches(title_clean, clean_map.values(), n=1, cutoff=0.6)
    if not match:
        return None
    for fname, clean in clean_map.items():
        if clean == match[0]:
            return os.path.join(TRAILER_DIR, fname)


# =============================================================
# 9. MAIN FEATURE EXTRACTOR
# =============================================================
def extract_trailer_emotion_features(video_path, genre_label=None):

    df = analyze_trailer_frames(video_path)
    if df is None:
        return {"emotion_valid": 0}

    t = df["time_sec"].values
    ar = df["arousal"].values
    val = df["valence"].values

    feats = {
        "emotion_valid": 1,
        "excitement_peak_density": (ar > np.quantile(ar, 0.75)).mean(),
        "fear_peak_density": (df["fear"] > np.quantile(df["fear"], 0.75)).mean(),
        "emotional_arc_slope": np.polyfit(t, val, 1)[0] if len(t) > 1 else np.nan,
        "arousal_volatility": np.std(ar),
        "genre_emotion_mismatch": compute_genre_mismatch(df, genre_label),
    }

    for emo in ["happiness", "fear", "sadness", "arousal", "valence"]:
        feats[f"emo_emb_{emo}_mean"] = df[emo].mean()

    ta, rms = extract_audio_rms(video_path)
    mask = ta <= t[-1]

    if mask.sum() > 5:
        a_interp = np.interp(ta[mask], t, ar)
        feats["music_emotion_corr"] = np.corrcoef(a_interp, rms[mask])[0, 1]
    else:
        feats["music_emotion_corr"] = np.nan

    return feats


Applied providers: ['CPUExecutionProvider'], with options: {'CPUExecutionProvider': {}}
find model: /Users/shrey24/.insightface/models/scrfd_2.5g/scrfd_2.5g.onnx detection [1, 3, '?', '?'] 127.5 128.0
set det-size: (640, 640)


In [6]:
df_movies["trailer_path"] = df_movies["title"].apply(find_best_trailer)

emotion_rows = []
for _, r in df_movies.iterrows():
    print("üé¨", r["title"])
    feats = extract_trailer_emotion_features(r["trailer_path"], r.get("genre"))
    feats["title"] = r["title"]
    emotion_rows.append(feats)

df_emotion = pd.DataFrame(emotion_rows)
display(df_emotion)


üé¨ Avengers: Endgame


                                                            

üé¨ Avatar: The Way of Water


                                                             

üé¨ Spider-Man: No Way Home


                                                             

üé¨ Top Gun: Maverick


                                                            

üé¨ Barbie


                                                             

üé¨ Oppenheimer


                                                             

üé¨ Frozen II


                                                             

üé¨ Joker


                                                            

üé¨ Black Panther


                                                             

üé¨ Incredibles 2


                                                            

üé¨ Dune


                                                             

üé¨ The Batman


                                                             

üé¨ Interstellar


                                                             

üé¨ Inception


                                                            

üé¨ John Wick


                                                             

üé¨ The Hunger Games


                                                             

üé¨ Get Out


                                                             

üé¨ La La Land


                                                            

üé¨ Mission Impossible: Fallout


                                                            

üé¨ Jumanji: Welcome to the Jungle


                                                             

üé¨ The Flash


                                                               

üé¨ Morbius


                                                             

üé¨ Cats


                                                             

üé¨ Green Lantern


                                                             

üé¨ The Mummy


                                                             

üé¨ John Carter


                                                            

üé¨ Dark Phoenix


                                                            

üé¨ Pan


                                                            

üé¨ Jupiter Ascending


                                                             

üé¨ King Arthur: Legend of the Sword


                                                             

Unnamed: 0,emotion_valid,excitement_peak_density,fear_peak_density,emotional_arc_slope,arousal_volatility,genre_emotion_mismatch,emo_emb_happiness_mean,emo_emb_fear_mean,emo_emb_sadness_mean,emo_emb_arousal_mean,emo_emb_valence_mean,music_emotion_corr,title
0,1,0.255319,0.255319,-0.000119,0.003446,,0.097735,0.002731,0.006239,0.125078,0.075933,-0.229651,Avengers: Endgame
1,1,0.263158,0.263158,-3.6e-05,0.004198,,0.096971,0.002804,0.006392,0.124814,0.074706,-0.018933,Avatar: The Way of Water
2,1,0.254902,0.254902,5e-06,0.005088,,0.09768,0.002832,0.006263,0.12468,0.075973,-0.202825,Spider-Man: No Way Home
3,1,0.269231,0.269231,0.000176,0.003318,,0.097913,0.002268,0.005429,0.122639,0.078563,-0.197172,Top Gun: Maverick
4,1,0.261905,0.261905,0.00022,0.002074,,0.108425,0.001324,0.003508,0.126107,0.095225,-0.0129,Barbie
5,1,0.25,0.25,3.8e-05,0.004212,,0.096074,0.002957,0.006657,0.124917,0.072948,-0.023883,Oppenheimer
6,1,0.277778,0.277778,-0.000165,0.000849,,0.102437,0.002181,0.005289,0.126744,0.08351,0.132152,Frozen II
7,1,0.257576,0.257576,5.3e-05,0.004768,,0.097278,0.002891,0.006494,0.125315,0.074766,-0.130781,Joker
8,1,0.25,0.25,7.1e-05,0.001734,,0.092561,0.003991,0.008502,0.127524,0.063789,0.045129,Black Panther
9,1,0.25,0.25,0.000221,0.004183,,0.10505,0.001489,0.003694,0.122916,0.091444,-0.091513,Incredibles 2


In [7]:
df_emotion.to_parquet("df_emotion.parquet", index=False)
print("Saved as df_emotion.parquet")

Saved as df_emotion.parquet
