In [5]:
# multimodal_ensemble_train.py
"""
Multimodal training + optional stacking meta-learner.

Usage:
 - Edit paths under USER CONFIG.
 - Run: python multimodal_ensemble_train.py
Notes:
 - If ALIGN_CSV is set to a CSV that contains aligned samples across modalities with
   columns ['label','text','image_path','audio_feat_path'] then a stacking meta-learner
   (logistic regression) will be trained after the base models using validation predictions.
 - If ALIGN_CSV is None (default) the script will only train/evaluate each base model.
"""

import os
import time
import copy
import random
import pickle
from typing import List, Optional, Tuple, Dict

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image

from transformers import BertTokenizer, BertModel, get_linear_schedule_with_warmup
from torch.optim import AdamW

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, label_binarize
from sklearn.metrics import confusion_matrix, roc_curve, auc, precision_score, recall_score, f1_score
from tqdm import tqdm

# ------------------------
# USER CONFIG (edit paths & hyperparams)
# ------------------------
# Text CSVs used by text model
CSV_PATHS = [
    r"K:\Code\Project\Research Paper\Emotion Detection\archive_text\tweet_emotions.csv",
    r"K:\Code\Project\Research Paper\Emotion Detection\archive_text\dataset2.csv",
    r"K:\Code\Project\Research Paper\Emotion Detection\archive_text\dataset3_excel.csv"
]
# Face data roots used by face model
ROOT_DATA_DIRS = [ r"K:\Code\Project\Research Paper\Emotion Detection\facial_data\archive" ]
# Speech cached features path used by speech model (cached_features.pkl expected)
SPEECH_FEATURES_PKL = "cached_features.pkl"  # your file produced during speech training

# Optional: an alignment CSV with rows that have label + modalities (for stacking)
# required columns if using stacking: label, content, image_path, audio_feat_path
ALIGN_CSV = r"K:\Code\Project\Research Paper\Emotion Detection\meld dataset\MELD.Raw\self\fusion_dataset.csv"  # Example: r"K:\...\fusion_dataset_aligned.csv"  (set to None to skip stacking)

# Models / labels save paths
TEXT_MODEL_NAME = "bert-base-multilingual-cased"
TEXT_BEST_MODEL = r"K:\Code\Project\Research Paper\Emotion Detection\Code\bert_emotion_text_final.pth"
TEXT_LABELS_NPY = r"K:\Code\Project\Research Paper\Emotion Detection\Code\label_classes.npy"

FACE_MODEL_PATH = r"K:\Code\Project\Research Paper\Emotion Detection\inceptionresnetv3_face_emotion.pth"
FACE_LABELS_NPY = r"K:\Code\Project\Research Paper\Emotion Detection\emotions_face.npy"

SPEECH_MODEL_PATH = "./best_transformer_speech_model.pth"

# Hyperparams
SEED = 42
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
BATCH_SIZE = 64
NUM_WORKERS = 0
PIN_MEMORY = True

# Text hyperparams
MAX_LEN = 64
TEXT_EPOCHS = 2
TEXT_LR = 2e-5
TEXT_BATCH = 64

# Face hyperparams
IMG_SIZE = 299
FACE_EPOCHS = 2
FACE_LR = 1e-4
FACE_BATCH = 64

# Speech hyperparams
SPEECH_EPOCHS = 2
SPEECH_LR = 1e-4
SPEECH_BATCH = 64

# Ensemble stacking config
STACKER_SOLVER = 'liblinear'  # logistic regression solver (scikit-learn)
STACKER_C = 1.0

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.benchmark = True

print(f"[INFO] Device: {DEVICE}")

# ===========================================
# ================ TEXT PART ================
# ===========================================
from transformers import BertTokenizer
tokenizer = BertTokenizer.from_pretrained(TEXT_MODEL_NAME, use_fast=True)

class SimpleTextDataset(Dataset):
    def __init__(self, texts: List[str], labels: List[int], tokenizer, max_len: int):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self): return len(self.labels)

    def __getitem__(self, idx):
        txt = self.texts[idx]
        enc = self.tokenizer(txt, padding="max_length", truncation=True, max_length=self.max_len, return_tensors="pt")
        return {
            "input_ids": enc["input_ids"].squeeze(0),
            "attention_mask": enc["attention_mask"].squeeze(0),
            "labels": torch.tensor(self.labels[idx], dtype=torch.long)
        }

class BERTClassifier(nn.Module):
    def __init__(self, model_name: str, num_classes: int, dropout: float = 0.2):
        super().__init__()
        self.bert = BertModel.from_pretrained(model_name)
        self.dropout = nn.Dropout(dropout)
        self.classifier = nn.Linear(self.bert.config.hidden_size, num_classes)

    def forward(self, input_ids, attention_mask):
        out = self.bert(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
        pooled = out.pooler_output
        x = self.dropout(pooled)
        return self.classifier(x)

def load_csvs(paths: List[str]) -> pd.DataFrame:
    dfs = []
    for p in paths:
        if os.path.exists(p):
            dfs.append(pd.read_csv(p))
        else:
            print(f"[WARN] CSV missing, skipping: {p}")

    if not dfs:
        raise RuntimeError("No CSV files found. Update CSV_PATHS.")

    df = pd.concat(dfs, ignore_index=True)

    assert "content" in df.columns and "label" in df.columns, \
        "CSV must have 'text' and 'label'"

    df = df.dropna(subset=["content", "label"])
    df["content"] = df["content"].astype(str).str.strip()
    df = df[df["content"] != ""]

    #  Drop label "pray"
    df = df[df["label"] != "pray"]

    return df


def train_text_model():
    df = load_csvs(CSV_PATHS)
    le = LabelEncoder()
    df["label"] = le.fit_transform(df["label"].astype(str))
    texts = df["content"].tolist()
    labels = df["label"].tolist()
    class_names = list(le.classes_)
    print(f"[TEXT] Samples: {len(texts)} | Classes: {class_names}")

    np.save(TEXT_LABELS_NPY, class_names)

    idxs = list(range(len(texts)))
    train_idx, val_idx = train_test_split(idxs, test_size=0.2, random_state=SEED, stratify=labels)

    train_texts = [texts[i] for i in train_idx]
    train_labels = [labels[i] for i in train_idx]
    val_texts = [texts[i] for i in val_idx]
    val_labels = [labels[i] for i in val_idx]

    train_ds = SimpleTextDataset(train_texts, train_labels, tokenizer, MAX_LEN)
    val_ds = SimpleTextDataset(val_texts, val_labels, tokenizer, MAX_LEN)

    train_loader = DataLoader(train_ds, batch_size=TEXT_BATCH, shuffle=True, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)
    val_loader = DataLoader(val_ds, batch_size=TEXT_BATCH, shuffle=False, num_workers=NUM_WORKERS, pin_memory=PIN_MEMORY)

    model = BERTClassifier(TEXT_MODEL_NAME, num_classes=len(class_names)).to(DEVICE)
    optimizer = AdamW(model.parameters(), lr=TEXT_LR, weight_decay=1e-2)
    total_steps = len(train_loader) * TEXT_EPOCHS
    scheduler = get_linear_schedule_with_warmup(optimizer, int(0.1 * total_steps), total_steps)
    criterion = nn.CrossEntropyLoss()

    best_val = -1.0
    train_accs, val_accs = [], []
    all_val_probs, all_val_targets = [], []

    for epoch in range(1, TEXT_EPOCHS+1):
        model.train()
        running_correct = 0; running_total = 0
        pbar = tqdm(train_loader, desc=f"[TEXT] Epoch {epoch}/{TEXT_EPOCHS}", leave=False)
        for batch in pbar:
            input_ids = batch["input_ids"].to(DEVICE)
            attention_mask = batch["attention_mask"].to(DEVICE)
            labels = batch["labels"].to(DEVICE)

            optimizer.zero_grad()
            out = model(input_ids, attention_mask)
            loss = criterion(out, labels)
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            if scheduler: scheduler.step()

            preds = out.argmax(dim=1)
            running_total += labels.size(0)
            running_correct += (preds == labels).sum().item()
            pbar.set_postfix({"TrainAcc": f"{100.0*running_correct/running_total:.2f}%"})

        epoch_train_acc = 100.0 * running_correct / running_total
        train_accs.append(epoch_train_acc)

        # validation
        model.eval()
        v_total = 0; v_correct = 0
        val_probs, val_targets = [], []
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch["input_ids"].to(DEVICE)
                attention_mask = batch["attention_mask"].to(DEVICE)
                labels = batch["labels"].to(DEVICE)

                out = model(input_ids, attention_mask)
                probs = F.softmax(out, dim=1)
                preds = probs.argmax(dim=1)
                v_total += labels.size(0)
                v_correct += (preds == labels).sum().item()
                val_probs.append(probs.cpu().numpy()); val_targets.append(labels.cpu().numpy())

        epoch_val_acc = 100.0 * v_correct / v_total
        val_accs.append(epoch_val_acc)
        print(f"[TEXT] Epoch {epoch}/{TEXT_EPOCHS} → Train Acc: {epoch_train_acc:.2f}% | Val Acc: {epoch_val_acc:.2f}%")

        if epoch_val_acc > best_val:
            best_val = epoch_val_acc
            torch.save({'model_state': model.state_dict(), 'class_names': class_names}, TEXT_BEST_MODEL)
            print(f"[TEXT] Best model saved (Val Acc {best_val:.2f}%) -> {TEXT_BEST_MODEL}")

        all_val_probs.append(np.vstack(val_probs))
        all_val_targets.append(np.concatenate(val_targets))

    # prepare return
    y_score = np.vstack(all_val_probs) if all_val_probs else np.array([])
    y_true = np.concatenate(all_val_targets) if all_val_targets else np.array([])
    # store training curves
    pickle.dump({'train_acc': train_accs, 'val_acc': val_accs}, open("text_training_curves.pkl","wb"))
    # final metrics
    return {'model': model, 'best_val': best_val, 'y_score': y_score, 'y_true': y_true, 'classes': class_names}

# ===========================================
# ================ FACE PART ================
# ===========================================
class FaceEmotionDataset(Dataset):
    def __init__(self, image_paths, labels, transform=None):
        self.image_paths = image_paths
        self.labels = labels
        self.transform = transform

    def __len__(self): return len(self.image_paths)
    def __getitem__(self, idx):
        path = self.image_paths[idx]
        label = self.labels[idx]
        img = Image.open(path).convert("RGB")
        if self.transform:
            img_t = self.transform(img)
        else:
            img_t = transforms.ToTensor()(img)
        return img_t, label

class InceptionResNetV3_Emotion(nn.Module):
    def __init__(self, num_classes=8, pretrained=True):
        super().__init__()
        base = models.inception_v3(weights=models.Inception_V3_Weights.IMAGENET1K_V1 if pretrained else None, aux_logits=True)
        in_features = base.fc.in_features
        base.fc = nn.Sequential(nn.Linear(in_features, 512), nn.ReLU(inplace=True), nn.Dropout(0.4), nn.Linear(512, num_classes))
        if hasattr(base, 'AuxLogits'):
            base.AuxLogits.fc = nn.Linear(base.AuxLogits.fc.in_features, num_classes)
        self.base = base

    def forward(self, x):
        out = self.base(x)
        if isinstance(out, tuple):
            return out[0]
        return out

def load_face_paths(root_dirs: List[str]):
    image_paths, labels = [], []
    emotions = set()
    for root in root_dirs:
        for subdir, _, files in os.walk(root):
            imgs = [f for f in files if f.lower().endswith(('.jpg','.jpeg','.png'))]
            if not imgs: continue
            emotion = os.path.basename(subdir).lower()
            emotions.add(emotion)
            for f in imgs:
                image_paths.append(os.path.join(subdir, f))
                labels.append(emotion)
    emotions = sorted(list(emotions))
    idx_map = {e:i for i,e in enumerate(emotions)}
    numeric_labels = [ idx_map[l] for l in labels ]
    return image_paths, numeric_labels, emotions

def train_face_model():
    image_paths, numeric_labels, emotions = load_face_paths(ROOT_DATA_DIRS)
    print(f"[FACE] Found {len(image_paths)} images across {len(emotions)} emotions: {emotions}")
    np.save(FACE_LABELS_NPY, emotions)

    X_train, X_val, y_train, y_val = train_test_split(image_paths, numeric_labels, test_size=0.3, random_state=SEED, stratify=numeric_labels)

    preprocess = transforms.Compose([transforms.Resize((IMG_SIZE, IMG_SIZE)), transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])])
    train_ds = FaceEmotionDataset(X_train, y_train, transform=preprocess)
    val_ds = FaceEmotionDataset(X_val, y_val, transform=preprocess)
    train_loader = DataLoader(train_ds, batch_size=FACE_BATCH, shuffle=True, num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_ds, batch_size=FACE_BATCH, shuffle=False, num_workers=NUM_WORKERS)

    model = InceptionResNetV3_Emotion(num_classes=len(emotions)).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=FACE_LR)
    criterion = nn.CrossEntropyLoss()

    best_val = -1.0
    train_acc_hist, val_acc_hist = [], []
    all_val_probs, all_val_targets = [], []

    for epoch in range(1, FACE_EPOCHS+1):
        model.train()
        running_correct, running_total = 0, 0
        pbar = tqdm(train_loader, desc=f"[FACE] Epoch {epoch}/{FACE_EPOCHS}", leave=False)
        for imgs, labels in pbar:
            imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            preds = outputs.argmax(dim=1)
            running_total += labels.size(0)
            running_correct += (preds == labels).sum().item()
            pbar.set_postfix({"TrainAcc": f"{100.0*running_correct/running_total:.2f}%"})

        epoch_train_acc = 100.0 * running_correct / running_total
        train_acc_hist.append(epoch_train_acc)

        # val
        model.eval()
        val_correct, val_total = 0, 0
        val_probs, val_targets = [], []
        with torch.no_grad():
            for imgs, labels in val_loader:
                imgs, labels = imgs.to(DEVICE), labels.to(DEVICE)
                outputs = model(imgs)
                probs = F.softmax(outputs, dim=1)
                preds = probs.argmax(dim=1)
                val_total += labels.size(0)
                val_correct += (preds == labels).sum().item()
                val_probs.append(probs.cpu().numpy()); val_targets.append(labels.cpu().numpy())

        epoch_val_acc = 100.0 * val_correct / val_total
        val_acc_hist.append(epoch_val_acc)
        print(f"[FACE] Epoch {epoch}/{FACE_EPOCHS} → Train Acc: {epoch_train_acc:.2f}% | Val Acc: {epoch_val_acc:.2f}%")

        if epoch_val_acc > best_val:
            best_val = epoch_val_acc
            torch.save({'model_state': model.state_dict(), 'classes': emotions}, FACE_MODEL_PATH)
            print(f"[FACE] Best model saved (Val Acc {best_val:.2f}%) -> {FACE_MODEL_PATH}")

        all_val_probs.append(np.vstack(val_probs))
        all_val_targets.append(np.concatenate(val_targets))

    y_score = np.vstack(all_val_probs) if all_val_probs else np.array([])
    y_true = np.concatenate(all_val_targets) if all_val_targets else np.array([])
    pickle.dump({'train_acc': train_acc_hist, 'val_acc': val_acc_hist}, open("face_training_curves.pkl","wb"))
    return {'model': model, 'best_val': best_val, 'y_score': y_score, 'y_true': y_true, 'classes': emotions}

# ===========================================
# ================ SPEECH PART ==============
# ===========================================
class TransformerLSTM(nn.Module):
    def __init__(self, num_emotions, d_model=128, nhead=4, num_layers=2):
        super().__init__()
        self.feature_proj = nn.Linear(40, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=256,
                                                   dropout=0.3, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.lstm = nn.LSTM(d_model, 128, batch_first=True)
        self.fc = nn.Linear(128, num_emotions)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        # expects [B, timesteps, feat_dim] or [B,1,feat_dim,timesteps]
        if x.dim() == 4:
            x = x.squeeze(1).permute(0,2,1)
        x = self.feature_proj(x)
        x = self.transformer(x)
        x, _ = self.lstm(x)
        x = self.dropout(x[:, -1, :])
        return self.fc(x)

def load_speech_cached(pkl_path: str):
    if not os.path.exists(pkl_path):
        raise FileNotFoundError("Speech cached features (.pkl) not found: " + pkl_path)
    with open(pkl_path, "rb") as f:
        X, y, meta = pickle.load(f)  # X: list/array of feature arrays, y: labels numeric, meta optional
    return X, y, meta

class SpeechDataset(Dataset):
    def __init__(self, X_list, y_list):
        self.X = X_list
        self.y = y_list
    def __len__(self): return len(self.X)
    def __getitem__(self, idx):
        arr = np.array(self.X[idx], dtype=np.float32)
        if arr.ndim == 1:
            # try folding into (timesteps, feat_dim) if multiple of 40
            if arr.size % 40 == 0:
                arr = arr.reshape(-1, 40)
            else:
                arr = arr.reshape(1, -1)
        # expected shape -> (timesteps, feat_dim)
        return torch.tensor(arr, dtype=torch.float32), torch.tensor(self.y[idx], dtype=torch.long)

def train_speech_model():
    X, y, meta = load_speech_cached(SPEECH_FEATURES_PKL)
    print(f"[SPEECH] Loaded {len(X)} samples.")
    # split
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.15, random_state=SEED, stratify=y)
    train_ds = SpeechDataset(X_train, y_train)
    val_ds = SpeechDataset(X_val, y_val)
    train_loader = DataLoader(train_ds, batch_size=SPEECH_BATCH, shuffle=True, collate_fn=None)
    val_loader = DataLoader(val_ds, batch_size=SPEECH_BATCH, shuffle=False, collate_fn=None)

    num_emotions = len(set(y))
    model = TransformerLSTM(num_emotions=num_emotions).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=SPEECH_LR)
    criterion = nn.CrossEntropyLoss()

    best_val = -1.0
    train_acc_hist, val_acc_hist = [], []
    all_val_probs, all_val_targets = [], []

    for epoch in range(1, SPEECH_EPOCHS+1):
        model.train()
        running_correct, running_total = 0, 0
        pbar = tqdm(train_loader, desc=f"[SPEECH] Epoch {epoch}/{SPEECH_EPOCHS}", leave=False)
        for Xb, yb in pbar:
            Xb, yb = Xb.to(DEVICE), yb.to(DEVICE)
            optimizer.zero_grad()
            outputs = model(Xb)
            loss = criterion(outputs, yb)
            loss.backward(); optimizer.step()
            preds = outputs.argmax(dim=1)
            running_total += yb.size(0)
            running_correct += (preds == yb).sum().item()
            pbar.set_postfix({"TrainAcc": f"{100.0*running_correct/running_total:.2f}%"})

        epoch_train_acc = 100.0 * running_correct / running_total
        train_acc_hist.append(epoch_train_acc)

        model.eval()
        val_correct, val_total = 0, 0
        val_probs, val_targets = [], []
        with torch.no_grad():
            for Xb, yb in val_loader:
                Xb, yb = Xb.to(DEVICE), yb.to(DEVICE)
                outputs = model(Xb)
                probs = F.softmax(outputs, dim=1)
                preds = probs.argmax(dim=1)
                val_total += yb.size(0)
                val_correct += (preds == yb).sum().item()
                val_probs.append(probs.cpu().numpy()); val_targets.append(yb.cpu().numpy())

        epoch_val_acc = 100.0 * val_correct / val_total
        val_acc_hist.append(epoch_val_acc)
        print(f"[SPEECH] Epoch {epoch}/{SPEECH_EPOCHS} → Train Acc: {epoch_train_acc:.2f}% | Val Acc: {epoch_val_acc:.2f}%")

        if epoch_val_acc > best_val:
            best_val = epoch_val_acc
            torch.save({'model': model.state_dict(), 'emotions': list(sorted(set(y)))}, SPEECH_MODEL_PATH)
            print(f"[SPEECH] Best model saved (Val Acc {best_val:.2f}%) -> {SPEECH_MODEL_PATH}")

        all_val_probs.append(np.vstack(val_probs))
        all_val_targets.append(np.concatenate(val_targets))

    y_score = np.vstack(all_val_probs) if all_val_probs else np.array([])
    y_true = np.concatenate(all_val_targets) if all_val_targets else np.array([])
    pickle.dump({'train_acc': train_acc_hist, 'val_acc': val_acc_hist}, open("speech_training_curves.pkl","wb"))
    return {'model': model, 'best_val': best_val, 'y_score': y_score, 'y_true': y_true, 'classes': sorted(list(set(y)))}

# ===========================================
# ================ STACKER (optional) ======
# ===========================================
def train_stacker_from_alignment(text_best_path: str, face_best_path: str, speech_best_path: str, align_csv: str):
    """
    align_csv should contain: label, content, image_path, audio_feat_path
    For each row:
     - load text -> get probs via text model
     - load image -> get probs via face model
     - load audio_feat_path (.npy) -> get probs via speech model
    Then train logistic regression on concatenated probs (stacking).
    """
    if align_csv is None:
        print("[STACKER] ALIGN_CSV is None -> skipping stacking.")
        return None

    from sklearn.linear_model import LogisticRegression
    df = pd.read_csv(align_csv)
    required = ['label','text','image_path','audio_feat_path']
    for c in required:
        if c not in df.columns:
            raise RuntimeError(f"[STACKER] Alignment CSV must contain column: {c}")

    # load models + label spaces
    text_ck = torch.load(text_best_path, map_location=DEVICE)
    text_classes = text_ck.get('class_names') or np.load(TEXT_LABELS_NPY, allow_pickle=True).tolist()
    text_model = BERTClassifier(TEXT_MODEL_NAME, num_classes=len(text_classes)).to(DEVICE)
    text_model.load_state_dict(text_ck['model_state']); text_model.eval()
    text_token = BertTokenizer.from_pretrained(TEXT_MODEL_NAME, use_fast=True)

    face_ck = torch.load(face_best_path, map_location=DEVICE)
    face_classes = face_ck.get('classes') or np.load(FACE_LABELS_NPY, allow_pickle=True).tolist()
    face_model = InceptionResNetV3_Emotion(num_classes=len(face_classes)).to(DEVICE)
    face_model.load_state_dict(face_ck['model_state']); face_model.eval()
    preprocess = transforms.Compose([transforms.Resize((IMG_SIZE,IMG_SIZE)), transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])])

    speech_ck = torch.load(speech_best_path, map_location=DEVICE)
    speech_classes = speech_ck.get('emotions')
    speech_model = TransformerLSTM(num_emotions=len(speech_classes)).to(DEVICE)
    speech_model.load_state_dict(speech_ck['model']); speech_model.eval()

    # union classes mapping (we'll require same label names across modalities in alignment CSV)
    labels = sorted(df['label'].unique().tolist())
    label_to_idx = {l:i for i,l in enumerate(labels)}
    y = df['label'].map(label_to_idx).values

    X_stack = []
    for idx, row in tqdm(df.iterrows(), total=len(df), desc="[STACKER] build features"):
        # text probs
        text_input = text_token([row['text']], padding=True, truncation=True, max_length=MAX_LEN, return_tensors='pt')
        t_in = text_input['input_ids'].to(DEVICE); t_att = text_input['attention_mask'].to(DEVICE)
        with torch.no_grad():
            t_out = text_model(t_in, t_att)
            t_prob = F.softmax(t_out, dim=1).cpu().numpy().squeeze(0)

        # image probs
        try:
            img = Image.open(row['image_path']).convert('RGB')
            img_t = preprocess(img).unsqueeze(0).to(DEVICE)
            with torch.no_grad():
                f_out = face_model(img_t)
                f_prob = F.softmax(f_out, dim=1).cpu().numpy().squeeze(0)
        except Exception:
            f_prob = np.zeros((len(face_classes),), dtype=float)

        # audio probs
        try:
            audio_np = np.load(row['audio_feat_path'], allow_pickle=True)
            arr = np.array(audio_np, dtype=np.float32)
            if arr.ndim == 1 and arr.size % 40 == 0:
                arr = arr.reshape(-1, 40)
            X_t = torch.tensor(arr, dtype=torch.float32).unsqueeze(0).to(DEVICE)
            with torch.no_grad():
                s_out = speech_model(X_t)
                s_prob = F.softmax(s_out, dim=1).cpu().numpy().squeeze(0)
        except Exception:
            s_prob = np.zeros((len(speech_classes),), dtype=float)

        # For stacking we will concatenate probs but to keep dims manageable we map each probs vector to *label set defined by alignment CSV*
        # If your class sets differ, you should remap them; here we will simply zero-pad/truncate to fit the label count of the CSV.
        # Simplifying assumption: the label names in alignment CSV correspond to the CSV-level label set.
        # Map each modality probs to length = len(labels) by simple heuristic: if class names match label names, place them; else project by sum.
        def project_probs(src_probs, src_classes, target_labels):
            out = np.zeros(len(target_labels), dtype=float)
            try:
                for i, cname in enumerate(src_classes):
                    if cname in target_labels:
                        out[target_labels.index(cname)] = src_probs[i]
                if out.sum() == 0:
                    # fallback: take argmax and place
                    out[np.argmax(src_probs) % len(target_labels)] = src_probs.max()
            except Exception:
                out[:] = 0.0
            return out

        t_proj = project_probs(t_prob, text_classes, labels) if len(text_prob:=t_prob)>0 else np.zeros(len(labels))
        f_proj = project_probs(f_prob, face_classes, labels) if len(f_prob)>0 else np.zeros(len(labels))
        s_proj = project_probs(s_prob, speech_classes, labels) if len(s_prob)>0 else np.zeros(len(labels))

        feat = np.concatenate([t_proj, f_proj, s_proj])
        X_stack.append(feat)

    X_stack = np.vstack(X_stack)
    y = np.array(y)

    # split stack data
    X_tr, X_val_st, y_tr, y_val_st = train_test_split(X_stack, y, test_size=0.2, random_state=SEED, stratify=y)

    from sklearn.linear_model import LogisticRegression
    clf = LogisticRegression(C=STACKER_C, solver=STACKER_SOLVER, max_iter=1000, multi_class='ovr')
    clf.fit(X_tr, y_tr)
    y_pred = clf.predict(X_val_st)
    precision = precision_score(y_val_st, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_val_st, y_pred, average='macro', zero_division=0)
    f1 = f1_score(y_val_st, y_pred, average='macro', zero_division=0)
    print(f"[STACKER] Validation metrics -> Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}")

    # save
    pickle.dump({'stacker':clf, 'labels':labels}, open("stacker.pkl","wb"))
    return clf

# ===========================================
# ================ RUN ALL ==================
# ===========================================
def run_all():
    t0 = time.time()
    print("=== TRAIN TEXT MODEL ===")
    text_res = train_text_model()
    print("=== TRAIN FACE MODEL ===")
    face_res = train_face_model()
    print("=== TRAIN SPEECH MODEL ===")
    speech_res = train_speech_model()
    print(f"[ALL] Finished base training in {(time.time()-t0)/60:.2f} minutes.")

    # Print per-model summary metrics if available
    for name, res in [('TEXT', text_res), ('FACE', face_res), ('SPEECH', speech_res)]:
        if res['y_score'].size != 0:
            y_true = res['y_true']
            y_score = res['y_score']
            y_pred = y_score.argmax(axis=1)
            prec = precision_score(y_true, y_pred, average='macro', zero_division=0)
            rec = recall_score(y_true, y_pred, average='macro', zero_division=0)
            f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
            print(f"[SUMMARY {name}] Val samples: {len(y_true)} | Precision: {prec:.4f} Recall: {rec:.4f} F1: {f1:.4f}")
        else:
            print(f"[SUMMARY {name}] No validation predictions available for metrics.")

    # Optional stacking
    if ALIGN_CSV:
        print("=== TRAIN STACKER FROM ALIGNMENT CSV ===")
        try:
            clf = train_stacker_from_alignment(TEXT_BEST_MODEL, FACE_MODEL_PATH, SPEECH_MODEL_PATH, ALIGN_CSV)
            print("[STACKER] Trained and saved stacker.pkl")
        except Exception as e:
            print("[STACKER] Failed:", e)
    else:
        print("[STACKER] ALIGN_CSV not provided -> skipping stacker training.")

if __name__ == "__main__":
    run_all()


[INFO] Device: cuda
=== TRAIN TEXT MODEL ===


AssertionError: CSV must have 'text' and 'label'

In [4]:
# Multimodal Fusion Training Script
# Uses your existing text, face and speech models as experts and trains a fusion head.
# - Loads CSV with columns: content, image_path, audio_feat_path, label
# - Extracts embeddings from expert models (using their penultimate layers)
# - Projects each embedding to a common dim, concatenates, and trains an MLP fusion head
# - Saves best fusion checkpoint and prints metrics

import os
import time
import copy
import random
from typing import List, Optional

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image

from transformers import BertTokenizer, BertModel
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
from sklearn.preprocessing import label_binarize
from tqdm import tqdm
import pickle

# ----------------------
# USER CONFIG (edit if needed)
# ----------------------
FUSION_CSV = r"K:\Code\Project\Research Paper\Emotion Detection\meld dataset\MELD.Raw\self\fusion_dataset.csv"
OUTPUT_DIR = r"K:\Code\Project\Research Paper\Emotion Detection\meld dataset\MELD.Raw\self\fusion_output"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Expert model checkpoints (from your code)
TEXT_BEST_MODEL = r"K:\Code\Project\Research Paper\Emotion Detection\Code\bert_emotion_text_final.pth"
TEXT_LABELS_NPY = r"K:\Code\Project\Research Paper\Emotion Detection\Code\label_classes.npy"
TEXT_MODEL_NAME = "bert-base-multilingual-cased"

FACE_BEST_MODEL = r"K:\Code\Project\Research Paper\Emotion Detection\inceptionresnetv3_face_emotion.pth"
FACE_LABELS_NPY = r"K:\Code\Project\Research Paper\Emotion Detection\emotions_face.npy"

SPEECH_BEST_MODEL = r"./best_transformer_speech_model.pth"
SPEECH_CACHED_FEATURES = r"cached_features.pkl"  # optional fallback if audio paths missing

DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("[INFO] Device:", DEVICE)

# hyperparams
PROJECT_DIM = 256
FUSION_HIDDEN = 512
BATCH_SIZE = 32
EPOCHS = 12
LR = 1e-4
NUM_WORKERS = 0
RANDOM_SEED = 42

random.seed(RANDOM_SEED); np.random.seed(RANDOM_SEED); torch.manual_seed(RANDOM_SEED)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(RANDOM_SEED)

# ----------------------
# Utilities
# ----------------------
def load_fusion_csv(path: str) -> pd.DataFrame:
    if not os.path.exists(path):
        raise FileNotFoundError("Fusion CSV not found: " + path)
    df = pd.read_csv(path)
    required = ['text','image_path','audio_path','label']
    for c in required:
        if c not in df.columns:
            raise RuntimeError(f"Fusion CSV must contain column: {c}")
    df = df.dropna(subset=['text','label'])
    df['text'] = df['text'].astype(str).str.strip()
    return df.reset_index(drop=True)

# ----------------------
# Expert model wrappers (extract penultimate features)
# ----------------------
# TEXT encoder (BERT pooler)
class TextEncoder(nn.Module):
    def __init__(self, model_name=TEXT_MODEL_NAME):
        super().__init__()
        self.bert = BertModel.from_pretrained(model_name)
    def forward(self, input_ids, attention_mask):
        out = self.bert(input_ids=input_ids, attention_mask=attention_mask, return_dict=True)
        return out.pooler_output  # [B, hidden]

# FACE encoder: use inception_v3 and return 512-d feature (after first fc in your face model)
class FaceEncoder(nn.Module):
    def __init__(self, pretrained=True):
        super().__init__()
        base = models.inception_v3(
        weights=models.Inception_V3_Weights.IMAGENET1K_V1,
        aux_logits=True   # must be True for pretrained weights
    )

        in_features = base.fc.in_features
        # replicate your face model's fc structure, but we'll keep final linear as identity to extract 512-d
        fc = nn.Sequential(nn.Linear(in_features, 512), nn.ReLU(inplace=True), nn.Dropout(0.4), nn.Identity())
        base.fc = fc
        self.base = base
    def forward(self, x):
        return self.base(x)  # [B, 512]

# SPEECH encoder: use TransformerLSTM but return 128-d LSTM hidden vector before final fc
class SpeechEncoder(nn.Module):
    def __init__(self, d_model=128, nhead=4, num_layers=2):
        super().__init__()
        self.feature_proj = nn.Linear(40, d_model)
        encoder_layer = nn.TransformerEncoderLayer(d_model=d_model, nhead=nhead, dim_feedforward=256, dropout=0.3, batch_first=True)
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.lstm = nn.LSTM(d_model, 128, batch_first=True)
        self.dropout = nn.Dropout(0.3)
        # no final fc here
    def forward(self, x):
        # x: [B, timesteps, feat_dim] or [B,1,feat_dim,timesteps]
        if x.dim() == 4:
            x = x.squeeze(1).permute(0,2,1)
        x = self.feature_proj(x)
        x = self.transformer(x)
        x, _ = self.lstm(x)
        x = self.dropout(x[:, -1, :])
        return x  # [B,128]

# ----------------------
# Fusion head
# ----------------------
class FusionHead(nn.Module):
    def __init__(self, in_dim, hidden=FUSION_HIDDEN, num_classes=7, dropout=0.3):
        super().__init__()
        self.fc1 = nn.Linear(in_dim, hidden)
        self.act = nn.ReLU()
        self.drop = nn.Dropout(dropout)
        self.fc2 = nn.Linear(hidden, num_classes)
    def forward(self, x):
        x = self.fc1(x)
        x = self.act(x)
        x = self.drop(x)
        return self.fc2(x)

# ----------------------
# Fusion Dataset
# ----------------------
class FusionDataset(Dataset):
    def __init__(self, df: pd.DataFrame, tokenizer, img_transform, label_encoder: LabelEncoder):
        self.df = df.reset_index(drop=True)
        self.tokenizer = tokenizer
        self.img_transform = img_transform
        self.le = label_encoder
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        text = str(row['text'])
        enc = self.tokenizer(text, padding='max_length', truncation=True, max_length=64, return_tensors='pt')
        input_ids = enc['input_ids'].squeeze(0)
        attention_mask = enc['attention_mask'].squeeze(0)
        # image
        img_path = row['image_path']
        try:
            img = Image.open(img_path).convert('RGB')
            img_t = self.img_transform(img)
        except Exception:
            # fallback to zeros
            img_t = torch.zeros(3, 299, 299)
        # audio features (.npy expected)
        audio_feat = None
        afp = row.get('audio_feat_path', None)
        if isinstance(afp, str) and os.path.exists(afp):
            try:
                arr = np.load(afp, allow_pickle=True)
                arr = np.array(arr, dtype=np.float32)
                if arr.ndim == 1 and arr.size % 40 == 0:
                    arr = arr.reshape(-1, 40)
                audio_feat = torch.tensor(arr, dtype=torch.float32)
            except Exception:
                audio_feat = torch.zeros(1,40)
        else:
            audio_feat = torch.zeros(1,40)
        label = self.le.transform([row['label']])[0]
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'image': img_t,
            'audio': audio_feat,
            'label': torch.tensor(label, dtype=torch.long)
        }

# ----------------------
# Helpers for loading pretrained experts (and mapping)
# ----------------------
def load_text_encoder(checkpoint_path: str):
    model = TextEncoder().to(DEVICE)
    if os.path.exists(checkpoint_path):
        ck = torch.load(checkpoint_path, map_location=DEVICE)
        # checkpoint may store {'model_state':...}
        st = ck if isinstance(ck, dict) and 'model_state' in ck else ck
        try:
            model_state = st['model_state'] if isinstance(st, dict) and 'model_state' in st else st
            # We only need bert weights; if a full classifier was saved, load bert's weights via matching keys
            model.bert.load_state_dict({k.replace('bert.',''):v for k,v in model_state.items() if k.startswith('bert.')}, strict=False)
            print('[INFO] Loaded text encoder weights (partial).')
        except Exception:
            # fallback: try direct load
            try:
                model.load_state_dict(model_state)
                print('[INFO] Loaded text encoder full state.')
            except Exception:
                print('[WARN] Could not load text checkpoint fully; using base BERT weights.')
    return model


def load_face_encoder(checkpoint_path: str):
    enc = FaceEncoder(pretrained=True).to(DEVICE)
    if os.path.exists(checkpoint_path):
        ck = torch.load(checkpoint_path, map_location=DEVICE)
        st = ck if isinstance(ck, dict) and 'model_state' in ck else ck
        try:
            state = st['model_state'] if isinstance(st, dict) and 'model_state' in st else st
            # load state carefully
            enc_state = {}
            for k,v in state.items():
                # map original face model keys to our encoder where base.fc last Linear replaced by Identity
                enc_state[k] = v
            enc.load_state_dict(enc_state, strict=False)
            print('[INFO] Loaded face encoder weights (partial).')
        except Exception:
            print('[WARN] Could not load face checkpoint fully; using ImageNet init.')
    return enc


def load_speech_encoder(checkpoint_path: str):
    enc = SpeechEncoder().to(DEVICE)
    if os.path.exists(checkpoint_path):
        ck = torch.load(checkpoint_path, map_location=DEVICE)
        st = ck if isinstance(ck, dict) and ('model' in ck or 'model_state' in ck) else ck
        try:
            model_state = st.get('model', st.get('model_state', st))
            enc.load_state_dict(model_state, strict=False)
            print('[INFO] Loaded speech encoder weights (partial).')
        except Exception:
            print('[WARN] Could not load speech checkpoint fully; using random init.')
    return enc

# ----------------------
# Training & evaluation for fusion head
# ----------------------

def train_fusion(fusion_model, text_enc, face_enc, speech_enc, train_loader, val_loader, num_classes, epochs=EPOCHS):
    fusion_model.to(DEVICE)
    # freeze encoders
    text_enc.eval(); face_enc.eval(); speech_enc.eval()
    for p in text_enc.parameters(): p.requires_grad = False
    for p in face_enc.parameters(): p.requires_grad = False
    for p in speech_enc.parameters(): p.requires_grad = False

    optimizer = torch.optim.Adam(fusion_model.parameters(), lr=LR)
    criterion = nn.CrossEntropyLoss()
    best_val = -1.0
    train_accs, val_accs = [], []

    for epoch in range(1, epochs+1):
        fusion_model.train()
        running_correct = 0; running_total = 0
        pbar = tqdm(train_loader, desc=f"[FUSION] Epoch {epoch}/{epochs}", leave=False)
        for batch in pbar:
            # move inputs
            input_ids = batch['input_ids'].to(DEVICE)
            attention_mask = batch['attention_mask'].to(DEVICE)
            images = batch['image'].to(DEVICE)
            audios = batch['audio']
            # audios is list of tensors with variable timesteps; pad to max in batch
            # we expect audios: Tensor or list; ensure tensor
            if isinstance(audios, list) or audios.dim()==3 and audios.shape[0]!=input_ids.shape[0]:
                # try to stack safely
                audios = torch.stack([a if isinstance(a, torch.Tensor) else torch.tensor(a, dtype=torch.float32) for a in audios]).to(DEVICE)
            else:
                audios = audios.to(DEVICE)
            labels = batch['label'].to(DEVICE)

            with torch.no_grad():
                t_feat = text_enc(input_ids, attention_mask)  # [B,768]
                f_feat = face_enc(images)  # [B,512]
                s_feat = speech_enc(audios)  # [B,128]

            # project
            t_proj = proj_text(t_feat)
            f_proj = proj_face(f_feat)
            s_proj = proj_speech(s_feat)

            fused = torch.cat([t_proj, f_proj, s_proj], dim=1)
            logits = fusion_model(fused)
            loss = criterion(logits, labels)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            preds = logits.argmax(dim=1)
            running_total += labels.size(0)
            running_correct += (preds == labels).sum().item()
            pbar.set_postfix({'TrainAcc': f"{100.0*running_correct/running_total:.2f}%", 'Loss': f"{loss.item():.4f}"})

        train_acc = 100.0 * running_correct / running_total
        train_accs.append(train_acc)

        # Validation
        fusion_model.eval()
        v_total = 0; v_correct = 0
        all_probs, all_targets = [], []
        with torch.no_grad():
            for batch in val_loader:
                input_ids = batch['input_ids'].to(DEVICE)
                attention_mask = batch['attention_mask'].to(DEVICE)
                images = batch['image'].to(DEVICE)
                audios = batch['audio']
                if isinstance(audios, list) or (isinstance(audios, torch.Tensor) and audios.shape[0]!=input_ids.shape[0]):
                    audios = torch.stack([a if isinstance(a, torch.Tensor) else torch.tensor(a, dtype=torch.float32) for a in audios]).to(DEVICE)
                else:
                    audios = audios.to(DEVICE)
                labels = batch['label'].to(DEVICE)

                t_feat = text_enc(input_ids, attention_mask)
                f_feat = face_enc(images)
                s_feat = speech_enc(audios)

                t_proj = proj_text(t_feat)
                f_proj = proj_face(f_feat)
                s_proj = proj_speech(s_feat)
                fused = torch.cat([t_proj, f_proj, s_proj], dim=1)
                out = fusion_model(fused)
                probs = F.softmax(out, dim=1)
                preds = probs.argmax(dim=1)
                v_total += labels.size(0)
                v_correct += (preds == labels).sum().item()
                all_probs.append(probs.cpu().numpy()); all_targets.append(labels.cpu().numpy())

        val_acc = 100.0 * v_correct / v_total if v_total>0 else 0.0
        val_accs.append(val_acc)
        print(f"[FUSION] Epoch {epoch}/{epochs} → Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%")

        if val_acc > best_val:
            best_val = val_acc
            torch.save({'fusion_state': fusion_model.state_dict(), 'proj_text': proj_text.state_dict(), 'proj_face': proj_face.state_dict(), 'proj_speech': proj_speech.state_dict(), 'classes': le.classes_.tolist()}, os.path.join(OUTPUT_DIR, 'best_fusion.pth'))
            print(f"[FUSION] Best saved (Val Acc {best_val:.2f}%)")

    # final metrics
    y_score = np.vstack(all_probs) if all_probs else np.array([])
    y_true = np.concatenate(all_targets) if all_targets else np.array([])
    return {'train_accs': train_accs, 'val_accs': val_accs, 'best_val': best_val, 'y_score': y_score, 'y_true': y_true}

# ----------------------
# Main
# ----------------------
if __name__ == '__main__':
    df = load_fusion_csv(FUSION_CSV)
    # label encode
    le = LabelEncoder(); df['label_idx'] = le.fit_transform(df['label'].astype(str))
    num_classes = len(le.classes_)
    print('[INFO] Classes:', le.classes_)

    # train/val split (stratified)
    idxs = list(range(len(df)))
    from sklearn.model_selection import train_test_split
    train_idx, val_idx = train_test_split(idxs, test_size=0.2, random_state=RANDOM_SEED, stratify=df['label_idx'])
    train_df = df.iloc[train_idx].reset_index(drop=True)
    val_df = df.iloc[val_idx].reset_index(drop=True)

    # tokenizer and transforms
    tokenizer = BertTokenizer.from_pretrained(TEXT_MODEL_NAME, use_fast=True)
    img_transform = transforms.Compose([transforms.Resize((299,299)), transforms.ToTensor(), transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])])

    train_ds = FusionDataset(train_df, tokenizer, img_transform, le)
    val_ds = FusionDataset(val_df, tokenizer, img_transform, le)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

    # load encoders
    print('[INFO] Loading encoders...')
    text_enc = load_text_encoder(TEXT_BEST_MODEL)
    face_enc = load_face_encoder(FACE_BEST_MODEL)
    speech_enc = load_speech_encoder(SPEECH_BEST_MODEL)

    # projection heads
    # note: text hidden size is BERT hidden (usually 768), face encoder outputs 512, speech 128
    # create small linear proj layers
    proj_text = nn.Linear(768, PROJECT_DIM).to(DEVICE)
    proj_face = nn.Linear(512, PROJECT_DIM).to(DEVICE)
    proj_speech = nn.Linear(128, PROJECT_DIM).to(DEVICE)

    # fusion model
    fusion_in = PROJECT_DIM * 3
    fusion_model = FusionHead(fusion_in, hidden=FUSION_HIDDEN, num_classes=num_classes).to(DEVICE)

    # train fusion
    res = train_fusion(fusion_model, text_enc, face_enc, speech_enc, train_loader, val_loader, num_classes, epochs=EPOCHS)

    # plots
    if res['train_accs']:
        plt.figure(figsize=(7,4)); plt.plot(range(1,len(res['train_accs'])+1), res['train_accs'], marker='o', label='Train Acc'); plt.plot(range(1,len(res['val_accs'])+1), res['val_accs'], marker='o', label='Val Acc'); plt.xlabel('Epoch'); plt.ylabel('Accuracy (%)'); plt.title('Fusion Accuracy'); plt.legend(); plt.grid(True); plt.show()

    # final eval: confusion
    if res['y_score'].size != 0:
        y_true = res['y_true']
        y_pred = res['y_score'].argmax(axis=1)
        cm = confusion_matrix(y_true, y_pred)
        print('Confusion Matrix:')
        print(cm)
        precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
        recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
        f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
        print(f'Final Fusion Metrics → Precision: {precision:.4f}, Recall: {recall:.4f}, F1: {f1:.4f}')

    print('[DONE] Fusion training complete. Best fusion saved to', os.path.join(OUTPUT_DIR, 'best_fusion.pth'))


[INFO] Device: cuda
[INFO] Classes: ['anger' 'disgust' 'fear' 'joy' 'neutral' 'sadness' 'surprise']
[INFO] Loading encoders...
[INFO] Loaded text encoder weights (partial).
[WARN] Could not load face checkpoint fully; using ImageNet init.
[INFO] Loaded speech encoder weights (partial).


                                                                                                   

KeyboardInterrupt: 