# Phase 4.3: The First Generalist Model - Bridging the Domain Gap

**Objective:** After identifying the domain gap in Phase 2, the goal of this experiment is to solve it. 

Our strategy is to create a **"Generalist Model"** by training a single ResNet18 model on a combined, mixed dataset containing both the clean speech from **RAVDESS** and the more realistic speech from **CREMA-D**. The hypothesis is that this will force the model to learn more robust, universal features of emotion.

In [1]:
from google.colab import drive
drive.mount('/content/drive')

# Install only what's needed for training (no librosa/audio libs required here)
!pip install pandas seaborn matplotlib tqdm

Mounted at /content/drive


## Part 1: Unified Data Strategy

The core of this experiment is a new data strategy. First, we **harmonize the emotion labels** into a unified set of 6 core emotions that are common to both datasets. 

Next, we **merge the pre-computed spectrograms** from both RAVDESS and CREMA-D into a single large data pool. The model will be trained on a shuffled mix of this data, making it "domain-agnostic."

In [2]:
import torch, torch.nn as nn, torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import os, numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm
from torch.optim.lr_scheduler import StepLR
from torchvision import models

# --- Configuration ---
SPECTROGRAM_PATH = "/content/drive/MyDrive/ser_project/processed_spectrograms/"
LEARNING_RATE = 0.001; BATCH_SIZE = 64; EPOCHS = 30
CHECKPOINT_BEST_PATH = "/content/drive/MyDrive/ser_project/resnet_generalist_best_v2.pth"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu"); print(f"Using device: {device}")

# --- Mappings ---
unified_emotion_map = { "neutral": 0, "happy": 1, "sad": 2, "angry": 3, "fearful": 4, "disgust": 5 }
unified_emotion_labels = ["neutral", "happy", "sad", "angry", "fearful", "disgust"]
ravdess_map = { "01": "neutral", "03": "happy", "04": "sad", "05": "angry", "06": "fearful", "07": "disgust" }
crema_d_map = { "NEU": "neutral", "HAP": "happy", "SAD": "sad", "ANG": "angry", "FEA": "fearful", "DIS": "disgust" }

# --- A simpler and faster Dataset class for pre-computed files ---
class PrecomputedSpectrogramDataset(Dataset):
    def __init__(self, file_paths, labels, target_width=300):
        self.file_paths, self.labels, self.target_width = file_paths, labels, target_width
    def __len__(self): return len(self.file_paths)
    def __getitem__(self, idx):
        file_path, label = self.file_paths[idx], self.labels[idx]
        spectrogram = np.load(file_path)
        if spectrogram.shape[1] < self.target_width: spectrogram = np.pad(spectrogram, ((0, 0), (0, self.target_width - spectrogram.shape[1])), mode='constant')
        else: spectrogram = spectrogram[:, :self.target_width]
        spec_min, spec_max = spectrogram.min(), spectrogram.max()
        if spec_max > spec_min: spectrogram = (spectrogram - spec_min) / (spec_max - spec_min)
        spectrogram_3ch = np.stack([spectrogram, spectrogram, spectrogram], axis=0)
        return torch.tensor(spectrogram_3ch, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

# --- Prepare Data from pre-computed .npy files ---
print("Preparing data from pre-computed spectrograms...")
all_files = [os.path.join(SPECTROGRAM_PATH, f) for f in os.listdir(SPECTROGRAM_PATH) if f.endswith('.npy')]
all_labels_str = []
# This loop is designed to handle both RAVDESS and CREMA-D filenames
for f in all_files:
    filename = os.path.basename(f)
    try:
        if '03-01' in filename: # Heuristic for RAVDESS
            code = filename.split("-")[2]
            if code in ravdess_map: all_labels_str.append(ravdess_map[code])
        else: # Assumed to be CREMA-D
            code = filename.split("_")[2]
            if code in crema_d_map: all_labels_str.append(crema_d_map[code])
    except IndexError:
        # print(f"Could not parse filename: {filename}")
        continue
# Filter out files that couldn't be parsed
valid_indices = [i for i, lbl in enumerate(all_labels_str) if lbl]
all_files = [all_files[i] for i in valid_indices]

emotion_to_idx = {e: i for i, e in enumerate(unified_emotion_labels)}; all_labels = [emotion_to_idx[lbl] for lbl in all_labels_str]
train_files, val_files, train_labels, val_labels = train_test_split(all_files, all_labels, test_size=0.15, random_state=42, stratify=all_labels)
train_dataset = PrecomputedSpectrogramDataset(train_files, train_labels); val_dataset = PrecomputedSpectrogramDataset(val_files, val_labels)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0); val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

# --- Train the Model (with Early Stopping) ---
model = models.resnet18(weights='IMAGENET1K_V1'); model.fc = nn.Linear(model.fc.in_features, len(unified_emotion_labels)); model = model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE); criterion = nn.CrossEntropyLoss(); scheduler = StepLR(optimizer, step_size=7, gamma=0.1)

best_val_acc = 0.0
print("Starting training...")
for epoch in range(EPOCHS):
    model.train(); running_loss = 0.0
    for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Train]"):
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad(); outputs = model(inputs); loss = criterion(outputs, labels)
        loss.backward(); optimizer.step(); running_loss += loss.item() * inputs.size(0)
    train_loss = running_loss / len(train_dataset)

    model.eval(); val_loss = 0.0; correct = 0; total = 0
    with torch.no_grad():
        for inputs, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{EPOCHS} [Val]"):
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs); loss = criterion(outputs, labels); val_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1); total += labels.size(0); correct += (predicted == labels).sum().item()
    val_accuracy = 100 * correct / total; val_loss /= len(val_dataset)
    print(f"Epoch {epoch+1}/{EPOCHS} | Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_accuracy:.2f}%")

    if val_accuracy > best_val_acc:
        best_val_acc = val_accuracy
        print(f"🎉 New best validation accuracy: {best_val_acc:.2f}%. Saving model...")
        torch.save({'model_state_dict': model.state_dict()}, CHECKPOINT_BEST_PATH)
    scheduler.step()

# --- Final Evaluation (using the held-out portion of the validation set as a test set) ---
print("\n--- FINAL EVALUATION ---")
# The val_files/val_labels serve as our final test set in this simplified script
test_loader_final = val_loader
print(f"Loading best model (from epoch with {best_val_acc:.2f}% validation accuracy) for final testing...")
best_checkpoint = torch.load(CHECKPOINT_BEST_PATH); model.load_state_dict(best_checkpoint['model_state_dict']); model.eval()
all_preds = []; all_true = []
with torch.no_grad():
    for inputs, labels in tqdm(test_loader_final, desc="Final Evaluation"):
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs); _, preds = torch.max(outputs, 1); all_preds.extend(preds.cpu().numpy()); all_true.extend(labels.cpu().numpy())
accuracy = accuracy_score(all_true, all_preds)
print(f"\nFinal Generalist Model Accuracy on the Test Set: {accuracy * 100:.2f}%")
print("\nClassification Report:"); print(classification_report(all_true, all_preds, target_names=unified_emotion_labels, zero_division=0))

Using device: cuda
Preparing data from pre-computed spectrograms...
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 190MB/s]


Starting training...


Epoch 1/30 [Train]: 100%|██████████| 113/113 [04:19<00:00,  2.29s/it]
Epoch 1/30 [Val]: 100%|██████████| 20/20 [00:06<00:00,  3.24it/s]


Epoch 1/30 | Train Loss: 1.4187 | Val Loss: 1.5573 | Val Acc: 36.94%
🎉 New best validation accuracy: 36.94%. Saving model...


Epoch 2/30 [Train]: 100%|██████████| 113/113 [00:42<00:00,  2.69it/s]
Epoch 2/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.67it/s]


Epoch 2/30 | Train Loss: 1.2295 | Val Loss: 3.0299 | Val Acc: 23.22%


Epoch 3/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.75it/s]
Epoch 3/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.55it/s]


Epoch 3/30 | Train Loss: 1.1472 | Val Loss: 1.3326 | Val Acc: 45.49%
🎉 New best validation accuracy: 45.49%. Saving model...


Epoch 4/30 [Train]: 100%|██████████| 113/113 [00:42<00:00,  2.64it/s]
Epoch 4/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.67it/s]


Epoch 4/30 | Train Loss: 1.0291 | Val Loss: 2.4633 | Val Acc: 32.94%


Epoch 5/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.75it/s]
Epoch 5/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.54it/s]


Epoch 5/30 | Train Loss: 0.9244 | Val Loss: 1.7096 | Val Acc: 41.88%


Epoch 6/30 [Train]: 100%|██████████| 113/113 [00:40<00:00,  2.77it/s]
Epoch 6/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.67it/s]


Epoch 6/30 | Train Loss: 0.8146 | Val Loss: 1.4930 | Val Acc: 48.00%
🎉 New best validation accuracy: 48.00%. Saving model...


Epoch 7/30 [Train]: 100%|██████████| 113/113 [00:42<00:00,  2.66it/s]
Epoch 7/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.62it/s]


Epoch 7/30 | Train Loss: 0.7022 | Val Loss: 1.5719 | Val Acc: 45.10%


Epoch 8/30 [Train]: 100%|██████████| 113/113 [00:40<00:00,  2.76it/s]
Epoch 8/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.63it/s]


Epoch 8/30 | Train Loss: 0.4847 | Val Loss: 1.1349 | Val Acc: 59.37%
🎉 New best validation accuracy: 59.37%. Saving model...


Epoch 9/30 [Train]: 100%|██████████| 113/113 [00:42<00:00,  2.66it/s]
Epoch 9/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.60it/s]


Epoch 9/30 | Train Loss: 0.3819 | Val Loss: 1.2359 | Val Acc: 58.35%


Epoch 10/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.75it/s]
Epoch 10/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.73it/s]


Epoch 10/30 | Train Loss: 0.3353 | Val Loss: 1.3499 | Val Acc: 56.63%


Epoch 11/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.76it/s]
Epoch 11/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.63it/s]


Epoch 11/30 | Train Loss: 0.3072 | Val Loss: 1.3866 | Val Acc: 57.41%


Epoch 12/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.73it/s]
Epoch 12/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.60it/s]


Epoch 12/30 | Train Loss: 0.2900 | Val Loss: 1.4753 | Val Acc: 56.71%


Epoch 13/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.73it/s]
Epoch 13/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.44it/s]


Epoch 13/30 | Train Loss: 0.2698 | Val Loss: 1.5088 | Val Acc: 56.47%


Epoch 14/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.71it/s]
Epoch 14/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.62it/s]


Epoch 14/30 | Train Loss: 0.2486 | Val Loss: 1.5624 | Val Acc: 56.94%


Epoch 15/30 [Train]: 100%|██████████| 113/113 [00:42<00:00,  2.68it/s]
Epoch 15/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.61it/s]


Epoch 15/30 | Train Loss: 0.2217 | Val Loss: 1.5697 | Val Acc: 57.02%


Epoch 16/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.71it/s]
Epoch 16/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.59it/s]


Epoch 16/30 | Train Loss: 0.2169 | Val Loss: 1.5843 | Val Acc: 55.92%


Epoch 17/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.70it/s]
Epoch 17/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.69it/s]


Epoch 17/30 | Train Loss: 0.2132 | Val Loss: 1.5766 | Val Acc: 56.31%


Epoch 18/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.70it/s]
Epoch 18/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.54it/s]


Epoch 18/30 | Train Loss: 0.2078 | Val Loss: 1.5968 | Val Acc: 56.47%


Epoch 19/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.71it/s]
Epoch 19/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.60it/s]


Epoch 19/30 | Train Loss: 0.2035 | Val Loss: 1.6206 | Val Acc: 55.45%


Epoch 20/30 [Train]: 100%|██████████| 113/113 [00:42<00:00,  2.68it/s]
Epoch 20/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.51it/s]


Epoch 20/30 | Train Loss: 0.1986 | Val Loss: 1.6114 | Val Acc: 56.16%


Epoch 21/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.71it/s]
Epoch 21/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.55it/s]


Epoch 21/30 | Train Loss: 0.1925 | Val Loss: 1.6395 | Val Acc: 56.47%


Epoch 22/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.71it/s]
Epoch 22/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.55it/s]


Epoch 22/30 | Train Loss: 0.1873 | Val Loss: 1.6574 | Val Acc: 56.24%


Epoch 23/30 [Train]: 100%|██████████| 113/113 [00:42<00:00,  2.68it/s]
Epoch 23/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.54it/s]


Epoch 23/30 | Train Loss: 0.1860 | Val Loss: 1.6531 | Val Acc: 55.84%


Epoch 24/30 [Train]: 100%|██████████| 113/113 [00:42<00:00,  2.66it/s]
Epoch 24/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.48it/s]


Epoch 24/30 | Train Loss: 0.1872 | Val Loss: 1.6723 | Val Acc: 56.00%


Epoch 25/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.70it/s]
Epoch 25/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.66it/s]


Epoch 25/30 | Train Loss: 0.1853 | Val Loss: 1.6476 | Val Acc: 56.16%


Epoch 26/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.70it/s]
Epoch 26/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.57it/s]


Epoch 26/30 | Train Loss: 0.1850 | Val Loss: 1.6505 | Val Acc: 56.00%


Epoch 27/30 [Train]: 100%|██████████| 113/113 [00:42<00:00,  2.68it/s]
Epoch 27/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.63it/s]


Epoch 27/30 | Train Loss: 0.1844 | Val Loss: 1.6622 | Val Acc: 56.55%


Epoch 28/30 [Train]: 100%|██████████| 113/113 [00:42<00:00,  2.69it/s]
Epoch 28/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.51it/s]


Epoch 28/30 | Train Loss: 0.1841 | Val Loss: 1.6464 | Val Acc: 56.16%


Epoch 29/30 [Train]: 100%|██████████| 113/113 [00:42<00:00,  2.69it/s]
Epoch 29/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.52it/s]


Epoch 29/30 | Train Loss: 0.1843 | Val Loss: 1.6671 | Val Acc: 55.69%


Epoch 30/30 [Train]: 100%|██████████| 113/113 [00:41<00:00,  2.70it/s]
Epoch 30/30 [Val]: 100%|██████████| 20/20 [00:05<00:00,  3.57it/s]


Epoch 30/30 | Train Loss: 0.1834 | Val Loss: 1.6559 | Val Acc: 55.84%

--- FINAL EVALUATION ---
Loading best model (from epoch with 59.37% validation accuracy) for final testing...


Final Evaluation: 100%|██████████| 20/20 [00:05<00:00,  3.47it/s]


Final Generalist Model Accuracy on the Test Set: 59.37%

Classification Report:
              precision    recall  f1-score   support

     neutral       0.71      0.60      0.65       178
       happy       0.64      0.58      0.61       220
         sad       0.41      0.68      0.51       219
       angry       0.71      0.61      0.66       219
     fearful       0.58      0.52      0.55       220
     disgust       0.69      0.58      0.63       219

    accuracy                           0.59      1275
   macro avg       0.62      0.59      0.60      1275
weighted avg       0.62      0.59      0.60      1275






## Part 2: The Verdict - Evaluating the Generalist

This final step is the ultimate test of our strategy. We take the single Generalist Model trained on the mixed data and evaluate its performance on the **RAVDESS and CREMA-D test sets separately**. This allows for a direct comparison against the failed Specialist Model from Phase 2, providing a definitive measure of whether the domain gap has been bridged.

In [3]:
# ===================================================================
# FINAL SCRIPT: Evaluating the Generalist Model on Separate Domains
# ===================================================================
import torch, torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import os, numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm
from torchvision import models

# --- Configuration ---
SPECTROGRAM_PATH = "/content/drive/MyDrive/ser_project/processed_spectrograms/"
CHECKPOINT_BEST_PATH = "/content/drive/MyDrive/ser_project/resnet_generalist_best_v2.pth"
BATCH_SIZE = 64
device = torch.device("cuda" if torch.cuda.is_available() else "cpu"); print(f"Using device: {device}")

# --- Mappings and Dataset Class (same as before) ---
unified_emotion_labels = ["neutral", "happy", "sad", "angry", "fearful", "disgust"]
class PrecomputedSpectrogramDataset(Dataset):
    def __init__(self, file_paths, labels, target_width=300): #... (rest of the class is the same)
        self.file_paths, self.labels, self.target_width = file_paths, labels, target_width
    def __len__(self): return len(self.file_paths)
    def __getitem__(self, idx):
        file_path, label = self.file_paths[idx], self.labels[idx]
        spectrogram = np.load(file_path)
        if spectrogram.shape[1] < self.target_width: spectrogram = np.pad(spectrogram, ((0, 0), (0, self.target_width - spectrogram.shape[1])), mode='constant')
        else: spectrogram = spectrogram[:, :self.target_width]
        spec_min, spec_max = spectrogram.min(), spectrogram.max()
        if spec_max > spec_min: spectrogram = (spectrogram - spec_min) / (spec_max - spec_min)
        spectrogram_3ch = np.stack([spectrogram, spectrogram, spectrogram], axis=0)
        return torch.tensor(spectrogram_3ch, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

# --- Load the Best Generalist Model ---
print("Loading the best generalist model...")
model = models.resnet18(); model.fc = nn.Linear(model.fc.in_features, len(unified_emotion_labels));
best_checkpoint = torch.load(CHECKPOINT_BEST_PATH); model.load_state_dict(best_checkpoint['model_state_dict']);
model = model.to(device)
model.eval()

# --- Prepare the separate Test Sets ---
# We need to recreate the exact same split to get our test set
all_files = [os.path.join(SPECTROGRAM_PATH, f) for f in os.listdir(SPECTROGRAM_PATH) if f.endswith('.npy')]
all_labels_str = []
ravdess_map = { "01": "neutral", "03": "happy", "04": "sad", "05": "angry", "06": "fearful", "07": "disgust" }
crema_d_map = { "NEU": "neutral", "HAP": "happy", "SAD": "sad", "ANG": "angry", "FEA": "fearful", "DIS": "disgust" }
for f in all_files:
    filename = os.path.basename(f)
    try:
        if '03-01' in filename:
            code = filename.split("-")[2]
            if code in ravdess_map: all_labels_str.append(ravdess_map[code])
        else:
            code = filename.split("_")[2]
            if code in crema_d_map: all_labels_str.append(crema_d_map[code])
    except IndexError: continue
valid_indices = [i for i, lbl in enumerate(all_labels_str) if lbl]
all_files = [all_files[i] for i in valid_indices]
emotion_to_idx = {e: i for i, e in enumerate(unified_emotion_labels)}; all_labels = [emotion_to_idx[lbl] for lbl in all_labels_str]
_, test_files, _, test_labels = train_test_split(all_files, all_labels, test_size=0.1, random_state=42, stratify=all_labels)

# Filter the test set for each dataset
ravdess_test_files = [f for f in test_files if 'RAVDESS' in f.upper()]
ravdess_test_labels = [l for i, l in enumerate(test_labels) if 'RAVDESS' in test_files[i].upper()]
crema_d_test_files = [f for f in test_files if 'CREMA-D' in f.upper() or '10' in os.path.basename(f)] # Heuristic for CREMA-D files
crema_d_test_labels = [l for i, l in enumerate(test_labels) if 'CREMA-D' in test_files[i].upper() or '10' in os.path.basename(test_files[i])]

# --- Run the Evaluations ---
def evaluate(files, labels, name):
    dataset = PrecomputedSpectrogramDataset(files, labels)
    loader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=False)
    all_preds, all_true = [], []
    with torch.no_grad():
        for inputs, labs in tqdm(loader, desc=f"Evaluating on {name}"):
            inputs, labs = inputs.to(device), labs.to(device)
            outputs = model(inputs); _, preds = torch.max(outputs, 1); all_preds.extend(preds.cpu().numpy()); all_true.extend(labs.cpu().numpy())
    accuracy = accuracy_score(all_true, all_preds)
    print(f"\n>>> Accuracy on {name}: {accuracy * 100:.2f}%")
    print(f"Classification Report for {name}:"); print(classification_report(all_true, all_preds, target_names=unified_emotion_labels, zero_division=0))

if ravdess_test_files: evaluate(ravdess_test_files, ravdess_test_labels, "RAVDESS Test Set")
if crema_d_test_files: evaluate(crema_d_test_files, crema_d_test_labels, "CREMA-D Test Set")

Using device: cuda
Loading the best generalist model...


Evaluating on CREMA-D Test Set: 100%|██████████| 12/12 [01:31<00:00,  7.65s/it]


>>> Accuracy on CREMA-D Test Set: 64.61%
Classification Report for CREMA-D Test Set:
              precision    recall  f1-score   support

     neutral       0.71      0.68      0.69       110
       happy       0.69      0.66      0.68       127
         sad       0.48      0.63      0.55       131
       angry       0.78      0.69      0.73       132
     fearful       0.59      0.56      0.57       133
     disgust       0.71      0.66      0.69       130

    accuracy                           0.65       763
   macro avg       0.66      0.65      0.65       763
weighted avg       0.66      0.65      0.65       763




