In [3]:
from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
import torch

model_path = "../models/wav2vec2-emotion"

# Cargar procesador y modelo
processor = Wav2Vec2Processor.from_pretrained(model_path)
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_path)

# Ejemplo: preprocesar un archivo de audio y predecir

import soundfile as sf

audio_input, sr = sf.read("../data/test_data/03-01-01-01-01-01-01.wav")

# Procesar audio (recuerda que debe coincidir la frecuencia de muestreo)
inputs = processor(audio_input, sampling_rate=sr, return_tensors="pt", padding=True)

with torch.no_grad():
    logits = model(**inputs).logits

predicted_class_id = logits.argmax().item()
predicted_label = model.config.id2label[predicted_class_id]

print(f"Predicción: {predicted_label}")


Predicción: neutral


In [4]:
import os
import pandas as pd
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import soundfile as sf

model_path = "../models/wav2vec2-emotion"
test_dir = "../data/test_data"

# Map de IDs a emociones (igual que en entrenamiento)
id2emotion = {
    '01': 'neutral',
    '02': 'calm',
    '03': 'happy',
    '04': 'sad',
    '05': 'angry',
    '06': 'fearful',
    '07': 'disgust',
    '08': 'surprise'
}

# Cargar modelo y procesador
processor = Wav2Vec2Processor.from_pretrained(model_path)
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_path)
model.eval()

# Función para extraer etiqueta real del nombre del archivo
def extract_label(filename):
    parts = filename.split('-')
    emotion_id = parts[2]
    return id2emotion.get(emotion_id)

# Listar archivos test
files = [f for f in os.listdir(test_dir) if f.endswith('.wav')]

true_labels = []
pred_labels = []

for f in files:
    # Leer audio
    audio_path = os.path.join(test_dir, f)
    audio_input, sr = sf.read(audio_path)

    # Procesar entrada
    inputs = processor(audio_input, sampling_rate=sr, return_tensors="pt", padding=True)

    with torch.no_grad():
        logits = model(**inputs).logits

    predicted_class_id = logits.argmax().item()
    predicted_label = model.config.id2label[predicted_class_id]

    true_label = extract_label(f)

    true_labels.append(true_label)
    pred_labels.append(predicted_label)

# Calcular métricas
accuracy = accuracy_score(true_labels, pred_labels)
precision, recall, f1, _ = precision_recall_fscore_support(true_labels, pred_labels, average='weighted')

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")


Accuracy: 0.3810
Precision: 0.3372
Recall: 0.3810
F1 Score: 0.3026


  _warn_prf(average, modifier, msg_start, len(result))


In [7]:
# Añadir ruta para importar tu script
import sys
import os
scripts_path = os.path.abspath("../scripts")
if scripts_path not in sys.path:
    sys.path.append(scripts_path)

from cnn_baseline import CNN, RAVDESSMelDataset

import torch
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import numpy as np

# Configuraciones similares a las que usaste en entrenamiento
class Config:
    sample_rate = 16000
    n_mels = 128
    max_length = 5.0
    batch_size = 16
    cnn_save_path = "../models/cnn_model/cnn_model.pth"
    test_dir = "../data/test_data"

cfg = Config()

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Cargar modelo
model = CNN(num_classes=8)
model.load_state_dict(torch.load(cfg.cnn_save_path, map_location=device))
model.to(device)
model.eval()

# Dataset y dataloader test
test_dataset = RAVDESSMelDataset(cfg.test_dir, cfg.sample_rate, cfg.n_mels, cfg.max_length)
test_loader = DataLoader(test_dataset, batch_size=cfg.batch_size, shuffle=False)

all_preds = []
all_labels = []

with torch.no_grad():
    for batch in test_loader:
        inputs = batch['input'].to(device)
        labels = batch['label'].to(device)

        outputs = model(inputs)
        preds = outputs.argmax(dim=-1)

        all_preds.extend(preds.cpu().numpy())
        all_labels.extend(labels.cpu().numpy())

# Métricas
accuracy = accuracy_score(all_labels, all_preds)
precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='weighted')

print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Precision: {precision:.4f}")
print


  model.load_state_dict(torch.load(cfg.cnn_save_path, map_location=device))


Test Accuracy: 0.2321
Test Precision: 0.1562


  _warn_prf(average, modifier, msg_start, len(result))


<function print>