# üß™ Late Fusion Evaluation - HuBERT + WavLM "Dream Team"

Valuta l'ensemble Late Fusion combinando:
- **HuBERT Large**: Best PER (8.84%)
- **WavLM Weighted**: Best AUC (0.8523)

**Formula**: `final_logits = Œ± √ó logits_HuBERT + (1-Œ±) √ó logits_WavLM`

In [None]:
import os, sys, zipfile, glob, shutil

def detect_environment():
    if 'COLAB_GPU' in os.environ or 'google.colab' in sys.modules:
        return 'colab'
    elif '/kaggle' in os.getcwd() or 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
        return 'kaggle'
    return 'local'

ENV = detect_environment()
print(f'üñ•Ô∏è Ambiente: {ENV.upper()}')

In [None]:
# COLAB Setup
if ENV == 'colab':
    from google.colab import drive
    drive.mount('/content/drive')
    
    PROJECT_DIR = '/content/DeepLearning-Phoneme'
    MODELS_DIR = '/content/drive/MyDrive/phoneme_checkpoints'
    ZIP_PATH = '/content/drive/MyDrive/DeepLearning-Phoneme.zip'
    
    if os.path.exists(ZIP_PATH):
        with zipfile.ZipFile(ZIP_PATH, 'r') as z:
            z.extractall('/content')
        print('‚úì Extracted')
    else:
        raise FileNotFoundError(ZIP_PATH)
    
    os.chdir(PROJECT_DIR)
    sys.path.insert(0, PROJECT_DIR)

In [None]:
# KAGGLE Setup
if ENV == 'kaggle':
    PROJECT_DIR = '/kaggle/working/pronuncIAtion'
    MODELS_DIR = '/kaggle/input'  # Will be set after listing inputs
    
    if not os.path.exists(PROJECT_DIR):
        import subprocess
        subprocess.run(['git', 'clone', 'https://github.com/maurocarlu/pronuncIAtion.git', PROJECT_DIR])
    
    os.chdir(PROJECT_DIR)
    sys.path.insert(0, PROJECT_DIR)
    print(f'‚úì Kaggle ready: {PROJECT_DIR}')
    
    # List available model inputs
    print('\nüì¶ Available Kaggle inputs:')
    for d in os.listdir('/kaggle/input'):
        print(f'  - {d}')

In [None]:
# LOCAL Setup
if ENV == 'local':
    PROJECT_DIR = os.getcwd()
    if 'notebooks' in PROJECT_DIR:
        PROJECT_DIR = os.path.dirname(PROJECT_DIR)
    MODELS_DIR = f'{PROJECT_DIR}/outputs'

os.chdir(PROJECT_DIR)
sys.path.insert(0, PROJECT_DIR)
print(f'üìÅ Project: {PROJECT_DIR}')
print(f'ü§ñ Models: {MODELS_DIR}')

In [None]:
import subprocess
subprocess.run([sys.executable, '-m', 'pip', 'install', '-q', 'transformers', 'datasets', 'evaluate', 'jiwer', 'soundfile', 'librosa', 'scikit-learn', 'scipy', 'safetensors'])
import torch
print(f'üî• PyTorch {torch.__version__}, CUDA: {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'üìä GPU: {torch.cuda.get_device_name(0)}')

## ‚öôÔ∏è Configuration - Model Paths

**IMPORTANTE**: Modifica i path sotto per puntare ai tuoi modelli trainati.

In [None]:
# ‚ö†Ô∏è MODIFICA QUESTI PATH!
CONFIG = {
    # Path al modello HuBERT Large trainato
    'hubert_path': f'{MODELS_DIR}/hubert_large/final_model_hubert',
    
    # Path al modello WavLM Weighted trainato
    'wavlm_path': f'{MODELS_DIR}/final_model_weighted',
    
    # Pesi da testare (Œ± per HuBERT, 1-Œ± per WavLM)
    'weights': [0.3, 0.5, 0.7],
    
    # Output report
    'output_report': f'{PROJECT_DIR}/docs/fusion_benchmark_results.md',
}

print('üìã Configuration:')
for k, v in CONFIG.items():
    if 'path' in k:
        status = '‚úì' if os.path.exists(v) else '‚úó NOT FOUND'
        print(f'  {status} {k}: {v}')
    else:
        print(f'  ‚Ä¢ {k}: {v}')

## üîç Check Available Models

In [None]:
# List available models in MODELS_DIR
print(f'ü§ñ Available models in {MODELS_DIR}:')

if os.path.exists(MODELS_DIR):
    for item in sorted(os.listdir(MODELS_DIR)):
        path = os.path.join(MODELS_DIR, item)
        if os.path.isdir(path):
            # Check for config.json (indicates a model)
            has_config = os.path.exists(os.path.join(path, 'config.json'))
            marker = '‚úì' if has_config else 'üìÇ'
            print(f'  {marker} {item}')
            
            # Also check for final_model subdirs
            for sub in os.listdir(path):
                sub_path = os.path.join(path, sub)
                if os.path.isdir(sub_path) and os.path.exists(os.path.join(sub_path, 'config.json')):
                    print(f'      ‚úì {item}/{sub}')
else:
    print(f'‚ùå Directory non esiste: {MODELS_DIR}')

## üß™ Late Fusion Evaluation

Esegue benchmark su SpeechOcean762 con diversi pesi Œ±:
- **TASK A**: ASR Robustness (PER su high quality)
- **TASK B**: Scoring Correlation (Pearson/Spearman)
- **TASK C**: Mispronunciation Detection (AUC-ROC)

**Record da battere**: AUC = 0.8552

In [None]:
# üß™ RUN LATE FUSION BENCHMARK (Weight Sweep)
import subprocess

# Prepare weights argument
weights_str = ' '.join(str(w) for w in CONFIG['weights'])

cmd = [
    sys.executable, 'scripts/evaluation/evaluate_hubert_fusion.py',
    '--model-hubert', CONFIG['hubert_path'],
    '--model-wavlm', CONFIG['wavlm_path'],
    '--weights'
] + [str(w) for w in CONFIG['weights']]

print(f'üöÄ Esecuzione: {" ".join(cmd[:5])}...')
print(f'   Pesi: {CONFIG["weights"]}')
print('=' * 70)

result = subprocess.run(cmd, capture_output=False)

if result.returncode == 0:
    print('\n‚úÖ Benchmark completato!')
else:
    print(f'\n‚ùå Errore codice {result.returncode}')

## üéØ Single Weight Evaluation

Per testare un singolo peso specifico:

In [None]:
# üéØ TEST SINGLE WEIGHT
SINGLE_WEIGHT = 0.5  # ‚Üê Modifica questo valore

import subprocess
result = subprocess.run([
    sys.executable, 'scripts/evaluation/evaluate_hubert_fusion.py',
    '--model-hubert', CONFIG['hubert_path'],
    '--model-wavlm', CONFIG['wavlm_path'],
    '--weight', str(SINGLE_WEIGHT),
], capture_output=False)

print('‚úÖ Done!' if result.returncode == 0 else f'‚ùå Errore {result.returncode}')

## üìä Qualitative Analysis

Genera un report dei casi dove HuBERT e WavLM divergono.

In [None]:
# üìä QUALITATIVE ERROR ANALYSIS
import subprocess

result = subprocess.run([
    sys.executable, 'scripts/evaluation/analyze_model_gap.py',
    '--model-hubert', CONFIG['hubert_path'],
    '--model-wavlm', CONFIG['wavlm_path'],
    '--output-report', CONFIG['output_report'],
    '--max-examples', '300',
], capture_output=False)

if result.returncode == 0:
    print(f'\n‚úÖ Report salvato: {CONFIG["output_report"]}')
else:
    print(f'\n‚ùå Errore {result.returncode}')

In [None]:
# View generated report
if os.path.exists(CONFIG['output_report']):
    with open(CONFIG['output_report'], 'r', encoding='utf-8') as f:
        content = f.read()
    print(content[:3000])  # First 3000 chars
    if len(content) > 3000:
        print('\n... [truncated]')
else:
    print('‚ùå Report non trovato')

## üíæ Download Results

In [None]:
# Download qualitative report
if ENV == 'colab' and os.path.exists(CONFIG['output_report']):
    from google.colab import files
    files.download(CONFIG['output_report'])
    print('‚úì Download avviato')
elif ENV == 'kaggle' and os.path.exists(CONFIG['output_report']):
    shutil.copy(CONFIG['output_report'], '/kaggle/working/')
    print(f'‚úì Copiato in /kaggle/working/')

## üìà Manual Results Entry

Usa questa cella per registrare manualmente i risultati se necessario.

In [None]:
# Manual results tracking
RESULTS = {
    # Format: weight -> {per, pearson, auc}
    0.3: {'per': None, 'pearson': None, 'auc': None},
    0.5: {'per': None, 'pearson': None, 'auc': None},
    0.7: {'per': None, 'pearson': None, 'auc': None},
}

# Display table
print('üìä LATE FUSION RESULTS')
print('=' * 50)
print(f'{"Weight":^10} | {"PER":^10} | {"Pearson":^10} | {"AUC-ROC":^10}')
print('-' * 50)
for w, r in RESULTS.items():
    per = f"{r['per']:.2f}%" if r['per'] else 'TBD'
    pear = f"{r['pearson']:.4f}" if r['pearson'] else 'TBD'
    auc = f"{r['auc']:.4f}" if r['auc'] else 'TBD'
    print(f'{w:^10} | {per:^10} | {pear:^10} | {auc:^10}')
print('=' * 50)
print('\nüèÜ Record AUC da battere: 0.8552')