In [1]:
# !pip install torch torchaudio transformers librosa pandas scikit-learn tqdm

import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchaudio.transforms as T
import librosa
from transformers import AutoModelForAudioClassification
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score, recall_score

# Configuraci√≥n de Dispositivo
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"üöÄ Dispositivo detectado: {DEVICE}")

# Semilla para reproducibilidad
torch.manual_seed(42)
np.random.seed(42)

üöÄ Dispositivo detectado: cuda


In [2]:
CONFIG = {
    # --- RUTAS DE DATOS ---
    'train_csv': 'processed_combined_teacher/combined_train.csv',  # <--- AJUSTA ESTO
    'val_csv':   'processed_combined_teacher/combined_val.csv',    # <--- AJUSTA ESTO
    
    # --- MAESTRO (Teacher) ---
    # Puede ser una ruta local o un modelo de HuggingFace
    'teacher_path': 'wavlm_large_finetuned_v2', # O tu ruta: 'wavlm_finetuned_v2'
    
    # --- AUDIO PARAMS ---
    'sample_rate': 16000,
    'duration': 10.0,       # Duraci√≥n fija en segundos
    'n_mels': 64,          # Altura de la imagen (frecuencia)
    
    # --- ENTRENAMIENTO ---
    'batch_size': 16,      # Baja a 8 si te quedas sin memoria
    'epochs': 20,
    'lr': 1e-3,            # Learning Rate (un poco alto para entrenar desde cero)
    'num_classes': 4,
    
    # --- DESTILACI√ìN ---
    'temp': 4.0,           # Temperatura alta para suavizar al maestro
    'alpha': 0.5           # 50% Maestro, 50% Realidad
}

In [3]:
class DualInputDataset(Dataset):
    def __init__(self, csv_path, config):
        self.df = pd.read_csv(csv_path)
        self.config = config
        self.target_len = int(config['sample_rate'] * config['duration'])
        
        # Transformaci√≥n a Espectrograma (Para el Alumno)
        self.mel_transform = T.MelSpectrogram(
            sample_rate=config['sample_rate'],
            n_fft=1024,
            hop_length=512,
            n_mels=config['n_mels']
        )
        self.db_transform = T.AmplitudeToDB()
        
        # Mapeo de etiquetas (Ajusta seg√∫n tus datos)
        self.label_map = {'ang': 0, 'hap': 1, 'sad': 2, 'neu': 3}

    def __len__(self):
        return len(self.df)

    def _load_audio(self, path):
        # Carga con Librosa (m√°s seguro) y resampleo
        try:
            y, _ = librosa.load(path, sr=self.config['sample_rate'])
        except Exception as e:
            print(f"Error cargando {path}: {e}")
            y = np.zeros(self.target_len)
            
        # Pad o Recorte a longitud fija
        if len(y) > self.target_len:
            y = y[:self.target_len]
        else:
            y = np.pad(y, (0, self.target_len - len(y)), mode='constant')
            
        return torch.tensor(y).float()

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        wav_path = row['wav_path'] # Aseg√∫rate que tu CSV tiene esta columna
        
        # 1. Cargar Audio Crudo (1D) -> Para el Teacher
        raw_audio = self._load_audio(wav_path)
        
        # 2. Generar Espectrograma (2D) -> Para el Student
        # [n_mels, time] -> a√±adimos dimensi√≥n de canal -> [1, n_mels, time]
        mel_spec = self.mel_transform(raw_audio)
        mel_spec = self.db_transform(mel_spec)
        
        # Normalizaci√≥n simple (Instance Norm)
        mel_spec = (mel_spec - mel_spec.mean()) / (mel_spec.std() + 1e-6)
        mel_spec = mel_spec.unsqueeze(0) 

        # 3. Etiqueta
        label_str = row['emotion'] # Aseg√∫rate que tu CSV tiene esta columna
        label = self.label_map.get(label_str, 3) # 3 es neutro por defecto
        
        return raw_audio, mel_spec, torch.tensor(label, dtype=torch.long)

print("‚úÖ Dataset configurado.")

‚úÖ Dataset configurado.


In [4]:
class VanillaCNN(nn.Module):
    def __init__(self, num_classes=4):
        super().__init__()
        
        # Entrada: [Batch, 1, 64, Time]
        self.features = nn.Sequential(
            # Bloque 1
            nn.Conv2d(1, 16, 3, padding=1), nn.BatchNorm2d(16), nn.ReLU(),
            nn.MaxPool2d(2),
            # Bloque 2
            nn.Conv2d(16, 32, 3, padding=1), nn.BatchNorm2d(32), nn.ReLU(),
            nn.MaxPool2d(2),
            # Bloque 3
            nn.Conv2d(32, 64, 3, padding=1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2),
            # Bloque 4
            nn.Conv2d(64, 128, 3, padding=1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)) # Aplasta todo a 1x1
        )
        self.classifier = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.features(x)
        x = x.flatten(1)
        return self.classifier(x)

print("‚úÖ Modelo VanillaCNN definido.")

‚úÖ Modelo VanillaCNN definido.


In [5]:
def run_distillation_epoch(teacher, student, loader, optimizer, is_train=True):
    if is_train:
        student.train()
    else:
        student.eval()
        
    teacher.eval() # Teacher SIEMPRE congelado
    
    total_loss = 0
    all_preds = []
    all_labels = []
    
    pbar = tqdm(loader, desc="Entrenando" if is_train else "Validando", leave=False)
    
    for raw_audio, mel_spec, labels in pbar:
        raw_audio, mel_spec, labels = raw_audio.to(DEVICE), mel_spec.to(DEVICE), labels.to(DEVICE)
        
        with torch.set_grad_enabled(is_train):
            # 1. Obtener opini√≥n del Maestro (Audio Crudo)
            with torch.no_grad():
                # WavLM a veces requiere input_values, a veces raw. 
                # Si falla, prueba teacher(input_values=raw_audio)
                teacher_logits = teacher(raw_audio).logits 
            
            # 2. Obtener opini√≥n del Alumno (Espectrograma)
            student_logits = student(mel_spec)
            
            # 3. Calcular Loss (Solo si entrenamos)
            if is_train:
                T = CONFIG['temp']
                alpha = CONFIG['alpha']
                
                # Soft Loss (KL Divergence)
                soft_loss = nn.KLDivLoss(reduction="batchmean")(
                    F.log_softmax(student_logits / T, dim=1),
                    F.softmax(teacher_logits / T, dim=1)
                ) * (T**2)
                
                # Hard Loss (Cross Entropy)
                hard_loss = nn.CrossEntropyLoss()(student_logits, labels)
                
                loss = (alpha * soft_loss) + ((1 - alpha) * hard_loss)
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                
                total_loss += loss.item()
                pbar.set_postfix({'loss': loss.item()})
            
            # Guardar m√©tricas
            preds = torch.argmax(student_logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            
    # Calcular m√©tricas finales
    acc = accuracy_score(all_labels, all_preds)
    uar = recall_score(all_labels, all_preds, average='macro')
    avg_loss = total_loss / len(loader) if is_train else 0
    
    return avg_loss, acc, uar

In [6]:
# 1. Cargar Datos
train_ds = DualInputDataset(CONFIG['train_csv'], CONFIG)
val_ds = DualInputDataset(CONFIG['val_csv'], CONFIG)

train_loader = DataLoader(train_ds, batch_size=CONFIG['batch_size'], shuffle=True)
val_loader = DataLoader(val_ds, batch_size=CONFIG['batch_size'], shuffle=False)

# 2. Cargar Maestro
print(f"üîÑ Cargando Teacher: {CONFIG['teacher_path']}...")
try:
    teacher_model = AutoModelForAudioClassification.from_pretrained(
        CONFIG['teacher_path'], 
        num_labels=CONFIG['num_classes'],
        ignore_mismatched_sizes=True
    ).to(DEVICE)
except Exception as e:
    print(f"‚ö†Ô∏è Error cargando teacher local, intentando base: {e}")
    # Fallback a un modelo gen√©rico si la ruta falla
    teacher_model = AutoModelForAudioClassification.from_pretrained("microsoft/wavlm-base-plus", num_labels=CONFIG['num_classes']).to(DEVICE)

# 3. Inicializar Alumno
student_model = VanillaCNN(num_classes=CONFIG['num_classes']).to(DEVICE)
optimizer = optim.Adam(student_model.parameters(), lr=CONFIG['lr'])

# 4. Bucle de Entrenamiento
best_uar = 0.0

print(f"\nüöÄ Iniciando Destilaci√≥n: WavLM (Audio) -> VanillaCNN (Imagen)")
print(f"Params: Temp={CONFIG['temp']}, Alpha={CONFIG['alpha']}\n")

for epoch in range(CONFIG['epochs']):
    # Train
    train_loss, train_acc, train_uar = run_distillation_epoch(
        teacher_model, student_model, train_loader, optimizer, is_train=True
    )
    
    # Validation
    _, val_acc, val_uar = run_distillation_epoch(
        teacher_model, student_model, val_loader, optimizer, is_train=False
    )
    
    print(f"Epoch {epoch+1}/{CONFIG['epochs']}")
    print(f"  Train Loss: {train_loss:.4f} | Acc: {train_acc:.1%} | UAR: {train_uar:.1%}")
    print(f"  Val Acc:    {val_acc:.1%}   | UAR: {val_uar:.1%}")
    
    if val_uar > best_uar:
        best_uar = val_uar
        torch.save(student_model.state_dict(), "best_vanilla_student.pth")
        print("  üíæ ¬°Nuevo mejor modelo guardado!")
    print("-" * 40)

üîÑ Cargando Teacher: wavlm_large_finetuned_v2...
‚ö†Ô∏è Error cargando teacher local, intentando base: wavlm_large_finetuned_v2 is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `hf auth login` or by passing `token=<your_token>`


Some weights of WavLMForSequenceClassification were not initialized from the model checkpoint at microsoft/wavlm-base-plus and are newly initialized: ['classifier.bias', 'classifier.weight', 'projector.bias', 'projector.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



üöÄ Iniciando Destilaci√≥n: WavLM (Audio) -> VanillaCNN (Imagen)
Params: Temp=4.0, Alpha=0.5



Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 1/20
  Train Loss: 0.6789 | Acc: 35.6% | UAR: 33.8%
  Val Acc:    34.9%   | UAR: 33.1%
  üíæ ¬°Nuevo mejor modelo guardado!
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 2/20
  Train Loss: 0.6571 | Acc: 41.0% | UAR: 39.8%
  Val Acc:    15.9%   | UAR: 26.7%
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 3/20
  Train Loss: 0.6506 | Acc: 43.2% | UAR: 42.5%
  Val Acc:    22.6%   | UAR: 33.2%
  üíæ ¬°Nuevo mejor modelo guardado!
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 4/20
  Train Loss: 0.6444 | Acc: 46.5% | UAR: 46.0%
  Val Acc:    39.2%   | UAR: 34.2%
  üíæ ¬°Nuevo mejor modelo guardado!
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 5/20
  Train Loss: 0.6381 | Acc: 48.1% | UAR: 47.9%
  Val Acc:    41.2%   | UAR: 44.9%
  üíæ ¬°Nuevo mejor modelo guardado!
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 6/20
  Train Loss: 0.6337 | Acc: 49.1% | UAR: 49.1%
  Val Acc:    46.8%   | UAR: 51.6%
  üíæ ¬°Nuevo mejor modelo guardado!
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 7/20
  Train Loss: 0.6333 | Acc: 49.1% | UAR: 49.3%
  Val Acc:    46.9%   | UAR: 44.8%
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 8/20
  Train Loss: 0.6297 | Acc: 51.2% | UAR: 51.5%
  Val Acc:    46.8%   | UAR: 50.0%
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 9/20
  Train Loss: 0.6219 | Acc: 53.4% | UAR: 53.7%
  Val Acc:    47.4%   | UAR: 46.5%
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 10/20
  Train Loss: 0.6196 | Acc: 53.5% | UAR: 53.8%
  Val Acc:    46.0%   | UAR: 43.4%
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 11/20
  Train Loss: 0.6168 | Acc: 55.5% | UAR: 55.9%
  Val Acc:    48.3%   | UAR: 47.2%
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 12/20
  Train Loss: 0.6132 | Acc: 55.9% | UAR: 56.2%
  Val Acc:    53.7%   | UAR: 54.2%
  üíæ ¬°Nuevo mejor modelo guardado!
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 13/20
  Train Loss: 0.6099 | Acc: 57.0% | UAR: 57.4%
  Val Acc:    34.1%   | UAR: 37.6%
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 14/20
  Train Loss: 0.6077 | Acc: 56.9% | UAR: 57.1%
  Val Acc:    41.1%   | UAR: 42.5%
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 15/20
  Train Loss: 0.6056 | Acc: 56.9% | UAR: 57.2%
  Val Acc:    47.9%   | UAR: 50.0%
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 16/20
  Train Loss: 0.6022 | Acc: 59.1% | UAR: 59.7%
  Val Acc:    53.1%   | UAR: 55.0%
  üíæ ¬°Nuevo mejor modelo guardado!
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 17/20
  Train Loss: 0.5999 | Acc: 58.7% | UAR: 59.2%
  Val Acc:    40.7%   | UAR: 39.6%
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 18/20
  Train Loss: 0.5987 | Acc: 59.8% | UAR: 60.2%
  Val Acc:    30.8%   | UAR: 32.5%
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 19/20
  Train Loss: 0.5920 | Acc: 61.4% | UAR: 61.8%
  Val Acc:    43.7%   | UAR: 44.7%
----------------------------------------


Entrenando:   0%|          | 0/246 [00:00<?, ?it/s]

Validando:   0%|          | 0/65 [00:00<?, ?it/s]

Epoch 20/20
  Train Loss: 0.5919 | Acc: 61.8% | UAR: 62.2%
  Val Acc:    46.7%   | UAR: 42.7%
----------------------------------------


In [None]:
def run_baseline_epoch(model, loader, optimizer, device, is_train=True):
    if is_train:
        model.train()
    else:
        model.eval()

    total_loss = 0
    all_preds = []
    all_labels = []

    # Barra de progreso
    desc = "Baseline Train" if is_train else "Baseline Val"
    pbar = tqdm(loader, desc=desc, leave=False)

    criterion = nn.CrossEntropyLoss()

    for _, mel_spec, labels in pbar: 
        # NOTA: Ignoramos 'raw_audio' aqu√≠, el alumno solo ve 'mel_spec'
        mel_spec, labels = mel_spec.to(device), labels.to(device)

        with torch.set_grad_enabled(is_train):
            # 1. Forward
            logits = model(mel_spec)

            # 2. C√°lculo de Loss (Solo contra la etiqueta real)
            loss = criterion(logits, labels)

            # 3. Backward
            if is_train:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            total_loss += loss.item()
            pbar.set_postfix({'loss': loss.item()})

            # Guardar predicciones para m√©tricas
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    # M√©tricas finales del epoch
    acc = accuracy_score(all_labels, all_preds)
    uar = recall_score(all_labels, all_preds, average='macro')
    avg_loss = total_loss / len(loader)

    return avg_loss, acc, uar

In [7]:
# --- CONFIGURACI√ìN Y REINICIO ---
print(f"\nüìâ INICIANDO BASELINE: VanillaCNN Solitaria (Sin Teacher)")
print("-" * 60)

# 1. Instanciamos una VanillaCNN fresca (pesos aleatorios)
baseline_model = VanillaCNN(num_classes=CONFIG['num_classes']).to(DEVICE)

# 2. Optimizador (Mismo LR que en destilaci√≥n para ser justos)
optimizer_base = optim.Adam(baseline_model.parameters(), lr=CONFIG['lr'])

# 3. Variables para guardar el mejor
best_base_acc = 0.0
best_base_uar = 0.0

# --- BUCLE DE ENTRENAMIENTO ---
for epoch in range(CONFIG['epochs']):
    # Train
    train_loss, train_acc, train_uar = run_baseline_epoch(
        baseline_model, train_loader, optimizer_base, DEVICE, is_train=True
    )

    # Val
    _, val_acc, val_uar = run_baseline_epoch(
        baseline_model, val_loader, optimizer_base, DEVICE, is_train=False
    )

    print(f"Epoch {epoch+1}/{CONFIG['epochs']}")
    print(f"  Train Loss: {train_loss:.4f} | Acc: {train_acc:.1%}")
    print(f"  Val Acc:    {val_acc:.1%}   | UAR: {val_uar:.1%}")

    # Guardar si mejora
    if val_uar > best_base_uar:
        best_base_uar = val_uar
        best_base_acc = val_acc
        torch.save(baseline_model.state_dict(), "best_baseline_vanilla.pth")
        print("  üíæ Mejor Baseline guardado.")
    
    print("-" * 40)

print(f"\nüìä RESUMEN FINAL:")
print(f"Mejor Baseline UAR: {best_base_uar:.1%}")


üìâ INICIANDO BASELINE: VanillaCNN Solitaria (Sin Teacher)
------------------------------------------------------------


NameError: name 'run_baseline_epoch' is not defined