# RespiraHub — Trial 7: Multimodal (Audio + Clinical Anamnesis)

**Insight:** 6 trial audio-only semua converge di 0.69-0.73. Zambia study: audio+clinical = 0.921 vs audio-only = 0.852 (+7%). Time to add clinical features.

**Approach:**
1. Reuse dual embeddings dari Trial 6 (Wav2Vec2 768d + HeAR 1024d = 1792d)
2. Encode clinical features dari CODA TB metadata (~15-20 features)
3. Concatenate: 1792d audio + Nd clinical = combined
4. Train MLP classifier

**Available clinical features (almost zero missing!):**
- Demographics: sex, age, height, weight (→ BMI)
- Symptoms: reported_cough_dur, hemoptysis, weight_loss, fever, night_sweats
- History: tb_prior, tb_prior_Pul, tb_prior_Extrapul, smoke_lweek
- Vitals: heart_rate, temperature
- HIV: HIVstatus (3 missing)
- Context: Country

**Target:** 0.78+ (beat DREAM Challenge winner 0.743 decisively)

---
## Cell 1: Setup

In [None]:
import os, json, random
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, roc_curve
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

random.seed(42)
np.random.seed(42)
torch.manual_seed(42)

if torch.backends.mps.is_available():
    DEVICE = torch.device('mps')
elif torch.cuda.is_available():
    DEVICE = torch.device('cuda')
else:
    DEVICE = torch.device('cpu')

print(f'PyTorch {torch.__version__}, Device: {DEVICE}')

---
## Cell 2: Load Audio Embeddings from Trial 6

In [None]:
# Load pre-extracted dual embeddings from Trial 6
data = torch.load('dual_embeddings.pt', weights_only=False)
audio_embeddings = data['combined_embeddings']  # (2216, 1792)
all_labels = data['labels']  # (2216,)
all_pids = data['pids']  # (2216,)

AUDIO_DIM = audio_embeddings.shape[1]
print(f'Audio embeddings: {audio_embeddings.shape}')
print(f'Segments: {len(audio_embeddings)}')
print(f'Patients: {len(np.unique(all_pids))}')
print(f'Audio dim: {AUDIO_DIM}')

---
## Cell 3: Load & Engineer Clinical Features

In [None]:
CLINICAL_PATH = '/Users/aida/code/development/tb-datasets/data/metadata/CODA_TB_Clinical_Meta_Info.csv'
ADDITIONAL_PATH = '/Users/aida/code/development/tb-datasets/data/metadata/CODA_TB_additional_variables_train.csv'

clinical = pd.read_csv(CLINICAL_PATH)
additional = pd.read_csv(ADDITIONAL_PATH)
meta = clinical.merge(additional, on='participant', how='left')

print(f'Clinical data: {meta.shape[0]} participants, {meta.shape[1]} columns')
print(f'TB+: {meta["tb_status"].sum()}, TB-: {(meta["tb_status"]==0).sum()}')

# === Feature Engineering ===

# Binary encode Yes/No columns
binary_cols = ['hemoptysis', 'weight_loss', 'fever', 'night_sweats', 
               'smoke_lweek', 'tb_prior', 'tb_prior_Pul', 'tb_prior_Extrapul']
for col in binary_cols:
    meta[f'{col}_bin'] = (meta[col] == 'Yes').astype(float)

# Sex: Male=1, Female=0
meta['sex_bin'] = (meta['sex'] == 'Male').astype(float)

# HIV: Positive=1, Negative=0, missing=0.5 (neutral imputation)
meta['hiv_bin'] = meta['HIVstatus'].map({'Positive': 1.0, 'Negative': 0.0}).fillna(0.5)

# BMI
meta['bmi'] = meta['weight'] / ((meta['height']/100) ** 2)
meta['bmi'] = meta['bmi'].fillna(meta['bmi'].median())

# Cough duration buckets (clinical relevance: >2 weeks is TB red flag)
meta['cough_gt14d'] = (meta['reported_cough_dur'] > 14).astype(float)
meta['cough_gt30d'] = (meta['reported_cough_dur'] > 30).astype(float)

# Country one-hot (important: different TB strains, recording conditions)
country_dummies = pd.get_dummies(meta['Country'], prefix='country').astype(float)
meta = pd.concat([meta, country_dummies], axis=1)
country_cols = [c for c in meta.columns if c.startswith('country_')]

# Symptom count (composite score)
symptom_cols_bin = ['hemoptysis_bin', 'weight_loss_bin', 'fever_bin', 'night_sweats_bin']
meta['symptom_count'] = meta[symptom_cols_bin].sum(axis=1)

# === Define final feature set ===
CONTINUOUS_FEATURES = ['age', 'bmi', 'heart_rate', 'temperature', 
                       'reported_cough_dur', 'symptom_count']

BINARY_FEATURES = ['sex_bin', 'hiv_bin',
                   'hemoptysis_bin', 'weight_loss_bin', 'fever_bin', 'night_sweats_bin',
                   'smoke_lweek_bin', 'tb_prior_bin', 'tb_prior_Pul_bin', 'tb_prior_Extrapul_bin',
                   'cough_gt14d', 'cough_gt30d']

ALL_CLINICAL_FEATURES = CONTINUOUS_FEATURES + BINARY_FEATURES + country_cols
CLINICAL_DIM = len(ALL_CLINICAL_FEATURES)

print(f'\n=== Clinical Features ({CLINICAL_DIM} total) ===')
print(f'Continuous ({len(CONTINUOUS_FEATURES)}): {CONTINUOUS_FEATURES}')
print(f'Binary ({len(BINARY_FEATURES)}): {BINARY_FEATURES}')
print(f'Country ({len(country_cols)}): {country_cols}')

# Check missing
missing = meta[ALL_CLINICAL_FEATURES].isnull().sum()
if missing.sum() > 0:
    print(f'\nMissing values:')
    print(missing[missing > 0])
else:
    print(f'\nNo missing values!')

# Build participant -> clinical feature vector mapping
clinical_vectors = {}
for _, row in meta.iterrows():
    pid = row['participant']
    vec = np.array([row[f] for f in ALL_CLINICAL_FEATURES], dtype=np.float32)
    clinical_vectors[pid] = vec

print(f'\nClinical vectors built for {len(clinical_vectors)} participants')
print(f'Vector dim: {CLINICAL_DIM}')

---
## Cell 4: Combine Audio + Clinical

In [None]:
# Map clinical features to each segment (same patient = same clinical vector)
clinical_per_segment = []
valid_mask = []

for pid in all_pids:
    if pid in clinical_vectors:
        clinical_per_segment.append(clinical_vectors[pid])
        valid_mask.append(True)
    else:
        clinical_per_segment.append(np.zeros(CLINICAL_DIM, dtype=np.float32))
        valid_mask.append(False)

clinical_tensor = torch.tensor(np.array(clinical_per_segment), dtype=torch.float32)
valid_mask = np.array(valid_mask)

print(f'Segments with clinical data: {valid_mask.sum()}/{len(valid_mask)}')
print(f'Missing clinical: {(~valid_mask).sum()} segments')

# Filter to only segments with clinical data
if (~valid_mask).sum() > 0:
    audio_embeddings = audio_embeddings[valid_mask]
    clinical_tensor = clinical_tensor[valid_mask]
    all_labels = all_labels[valid_mask]
    all_pids = all_pids[valid_mask]
    print(f'After filtering: {len(audio_embeddings)} segments')

COMBINED_DIM = AUDIO_DIM + CLINICAL_DIM
print(f'\nAudio: {AUDIO_DIM}d + Clinical: {CLINICAL_DIM}d = Combined: {COMBINED_DIM}d')

---
## Cell 5: Classifier + Config

In [None]:
class MultimodalClassifier(nn.Module):
    """Two-branch architecture: audio branch + clinical branch → fusion → prediction."""
    def __init__(self, audio_dim, clinical_dim, dropout=0.3):
        super().__init__()
        # Audio branch: compress 1792d → 128d
        self.audio_branch = nn.Sequential(
            nn.LayerNorm(audio_dim),
            nn.Dropout(dropout),
            nn.Linear(audio_dim, 256),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(256, 128),
            nn.GELU(),
        )
        # Clinical branch: expand clinical features → 64d
        self.clinical_branch = nn.Sequential(
            nn.LayerNorm(clinical_dim),
            nn.Linear(clinical_dim, 64),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(64, 64),
            nn.GELU(),
        )
        # Fusion: 128 + 64 = 192 → prediction
        self.fusion = nn.Sequential(
            nn.LayerNorm(192),
            nn.Dropout(dropout),
            nn.Linear(192, 64),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(64, 1),
        )
    
    def forward(self, audio_emb, clinical_feat):
        a = self.audio_branch(audio_emb)
        c = self.clinical_branch(clinical_feat)
        combined = torch.cat([a, c], dim=-1)  # (B, 192)
        return self.fusion(combined).squeeze(-1)

class MultimodalDataset(Dataset):
    def __init__(self, audio_emb, clinical_feat, labels, pids):
        self.audio = audio_emb
        self.clinical = clinical_feat
        self.labels = torch.tensor(labels, dtype=torch.float32)
        self.pids = pids
    def __len__(self): return len(self.audio)
    def __getitem__(self, idx):
        return {
            'audio': self.audio[idx],
            'clinical': self.clinical[idx],
            'label': self.labels[idx],
            'pid': self.pids[idx]
        }

BATCH_SIZE = 64
LR = 5e-4
EPOCHS = 50
PATIENCE = 7
N_FOLDS = 10

m = MultimodalClassifier(AUDIO_DIM, CLINICAL_DIM)
n_params = sum(p.numel() for p in m.parameters())
print(f'=== Trial 7 Config ===')
print(f'Audio dim: {AUDIO_DIM}, Clinical dim: {CLINICAL_DIM}')
print(f'Architecture: Audio({AUDIO_DIM}→256→128) + Clinical({CLINICAL_DIM}→64→64) → Fusion(192→64→1)')
print(f'Total params: {n_params:,} ({n_params/1e3:.1f}K)')
print(f'Batch: {BATCH_SIZE}, LR: {LR}, Epochs: {EPOCHS}, Patience: {PATIENCE}')
del m

---
## Cell 6: Training Function

In [None]:
def train_one_fold(fold_num, tr_audio, tr_clin, tr_lab, tr_pid,
                   va_audio, va_clin, va_lab, va_pid, scaler):
    
    # Scale continuous features (fit on train only)
    tr_clin_scaled = tr_clin.clone()
    va_clin_scaled = va_clin.clone()
    
    # Scale continuous features (first N columns)
    n_cont = len(CONTINUOUS_FEATURES)
    tr_cont = tr_clin[:, :n_cont].numpy()
    va_cont = va_clin[:, :n_cont].numpy()
    tr_cont_scaled = scaler.fit_transform(tr_cont)
    va_cont_scaled = scaler.transform(va_cont)
    tr_clin_scaled[:, :n_cont] = torch.tensor(tr_cont_scaled, dtype=torch.float32)
    va_clin_scaled[:, :n_cont] = torch.tensor(va_cont_scaled, dtype=torch.float32)
    
    train_ds = MultimodalDataset(tr_audio, tr_clin_scaled, tr_lab, tr_pid)
    val_ds = MultimodalDataset(va_audio, va_clin_scaled, va_lab, va_pid)
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False)
    
    model = MultimodalClassifier(AUDIO_DIM, CLINICAL_DIM).to(DEVICE)
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=0.01)
    criterion = nn.BCEWithLogitsLoss()
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS, eta_min=1e-6)
    
    best_auroc = 0
    best_patient_logits = {}
    patience_counter = 0
    
    for epoch in range(EPOCHS):
        model.train()
        train_loss = 0
        for batch in train_loader:
            audio = batch['audio'].to(DEVICE)
            clin = batch['clinical'].to(DEVICE)
            labels = batch['label'].to(DEVICE)
            optimizer.zero_grad()
            loss = criterion(model(audio, clin), labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
        train_loss /= len(train_loader)
        scheduler.step()
        
        # Validation
        model.eval()
        seg_probs, seg_labels, seg_pids = [], [], []
        with torch.no_grad():
            for batch in val_loader:
                audio = batch['audio'].to(DEVICE)
                clin = batch['clinical'].to(DEVICE)
                probs = torch.sigmoid(model(audio, clin)).cpu().numpy()
                seg_probs.extend(probs)
                seg_labels.extend(batch['label'].numpy())
                seg_pids.extend(batch['pid'])
        
        pt_p, pt_l = {}, {}
        for pid, prob, lab in zip(seg_pids, seg_probs, seg_labels):
            pt_p.setdefault(pid, []).append(prob)
            pt_l[pid] = lab
        
        yt = np.array([pt_l[p] for p in pt_p])
        yp = np.array([np.mean(v) for v in pt_p.values()])
        auroc = roc_auc_score(yt, yp) if len(np.unique(yt)) > 1 else 0.5
        
        improved = ''
        if auroc > best_auroc:
            best_auroc = auroc
            best_patient_logits = {pid: np.mean(v) for pid, v in pt_p.items()}
            os.makedirs('checkpoints_t7', exist_ok=True)
            torch.save(model.state_dict(), f'checkpoints_t7/multimodal_fold{fold_num}.pt')
            patience_counter = 0
            improved = ' *'
        else:
            patience_counter += 1
        
        if (epoch + 1) % 5 == 0 or improved or patience_counter >= PATIENCE:
            print(f'  Epoch {epoch+1}/{EPOCHS} — loss: {train_loss:.4f}, AUROC: {auroc:.4f}{improved}')
        
        if patience_counter >= PATIENCE:
            print(f'  Early stopping at epoch {epoch+1}')
            break
    
    del model, optimizer
    return best_auroc, best_patient_logits

print('Training function ready.')

---
## Cell 7: Run 10-Fold CV

In [None]:
unique_pids = np.unique(all_pids)
pid_labels = np.array([all_labels[all_pids == pid][0] for pid in unique_pids])

n_folds_actual = min(N_FOLDS, int(min(Counter(pid_labels).values()) * 0.8))
skf = StratifiedKFold(n_splits=n_folds_actual, shuffle=True, random_state=42)

fold_aurocs = []
all_patient_logits = {}
all_patient_labels = {}

print(f'=== TRIAL 7: Multimodal (Audio + Clinical) ===')
print(f'Audio: Wav2Vec2 ({AUDIO_DIM}d) + Clinical ({CLINICAL_DIM}d)')
print(f'Segments: {len(audio_embeddings)} @ 2s')
print(f'Patients: {len(unique_pids)}')
print(f'Folds: {n_folds_actual}, Device: {DEVICE}')
print(f'\n--- Benchmarks ---')
print(f'DREAM winner (audio):    0.743')
print(f'Trial 6 (dual audio):    0.722')
print(f'Zambia (audio+clinical): 0.921')
print(f'\nStarting training...\n')

for fold, (train_idx, val_idx) in enumerate(skf.split(unique_pids, pid_labels)):
    print(f'=== Fold {fold+1}/{n_folds_actual} ===')
    
    train_pids_set = set(unique_pids[train_idx])
    val_pids_set = set(unique_pids[val_idx])
    
    tr_mask = np.array([pid in train_pids_set for pid in all_pids])
    va_mask = ~tr_mask
    
    tr_audio = audio_embeddings[tr_mask]
    tr_clin = clinical_tensor[tr_mask]
    tr_lab = all_labels[tr_mask]
    tr_pid = all_pids[tr_mask]
    va_audio = audio_embeddings[va_mask]
    va_clin = clinical_tensor[va_mask]
    va_lab = all_labels[va_mask]
    va_pid = all_pids[va_mask]
    
    print(f'  Train: {len(tr_audio)} segments ({len(train_pids_set)} patients)')
    print(f'  Val:   {len(va_audio)} segments ({len(val_pids_set)} patients)')
    
    scaler = StandardScaler()
    auroc, patient_logits = train_one_fold(
        fold+1, tr_audio, tr_clin, tr_lab, tr_pid,
        va_audio, va_clin, va_lab, va_pid, scaler
    )
    
    fold_aurocs.append(auroc)
    all_patient_logits.update(patient_logits)
    for pid in val_pids_set:
        all_patient_labels[pid] = pid_labels[unique_pids == pid][0]
    
    print(f'  \u2705 Fold {fold+1} best AUROC: {auroc:.4f}\n')

print('=' * 60)
print(f'TRIAL 7 RESULT (Multimodal: Audio + Clinical)')
print(f'Mean AUROC: {np.mean(fold_aurocs):.4f} +/- {np.std(fold_aurocs):.4f}')
print(f'Per-fold: {[f"{a:.3f}" for a in fold_aurocs]}')
print(f'\n--- Full Comparison ---')
print(f'Trial 1 (W2V audio):       0.718')
print(f'Trial 5 (HeAR audio):      0.719')
print(f'Trial 6 (dual audio):      0.722')
print(f'Trial 7 (audio+clinical):  {np.mean(fold_aurocs):.4f} +/- {np.std(fold_aurocs):.4f}')
print(f'DREAM winner (audio):      0.743')
print(f'Zambia (audio+clinical):   0.921')
delta = np.mean(fold_aurocs) - 0.722
print(f'\nDelta vs audio-only:  {delta:+.4f}')
beat_dream = np.mean(fold_aurocs) > 0.743
print(f'{"\u2705 BEAT DREAM CHALLENGE!" if beat_dream else "\u26a0\ufe0f Did not beat DREAM Challenge"}')

---
## Cell 8: ROC + Threshold Analysis

In [None]:
pids_order = sorted(all_patient_logits.keys())
y_true = np.array([all_patient_labels[p] for p in pids_order])
y_prob = np.array([all_patient_logits[p] for p in pids_order])

fpr_t7, tpr_t7, _ = roc_curve(y_true, y_prob)
auroc_t7 = roc_auc_score(y_true, y_prob)

plt.figure(figsize=(8, 7))
plt.plot(fpr_t7, tpr_t7, 'r-', lw=2.5, label=f'Trial 7 Multimodal ({auroc_t7:.3f})')
plt.plot([0,1],[0,1],'k--', alpha=0.2)
plt.axhspan(0.90, 1.0, xmin=0, xmax=0.30, alpha=0.08, color='green', label='WHO TPP zone')
plt.axhline(0.90, color='r', ls=':', alpha=0.3)
plt.axvline(0.30, color='g', ls=':', alpha=0.3)
plt.xlabel('FPR (1 - Specificity)', fontsize=12)
plt.ylabel('TPR (Sensitivity)', fontsize=12)
plt.title('RespiraHub Trial 7 \u2014 Audio + Clinical Anamnesis', fontsize=14)
plt.legend(fontsize=11); plt.grid(alpha=0.15)
plt.tight_layout()
plt.savefig('roc_trial7.png', dpi=150)
plt.show()

In [None]:
print(f'{"Thresh":>7} {"Sens":>7} {"Spec":>7} {"PPV":>7} {"NPV":>7}')
print('-' * 40)

best_t, best_j = 0.5, -1
who_met = False

for t in np.arange(0.10, 0.90, 0.05):
    pred = (y_prob >= t).astype(int)
    tp = np.sum((pred == 1) & (y_true == 1))
    tn = np.sum((pred == 0) & (y_true == 0))
    fp = np.sum((pred == 1) & (y_true == 0))
    fn = np.sum((pred == 0) & (y_true == 1))
    sens = tp/(tp+fn) if (tp+fn) else 0
    spec = tn/(tn+fp) if (tn+fp) else 0
    ppv = tp/(tp+fp) if (tp+fp) else 0
    npv = tn/(tn+fn) if (tn+fn) else 0
    flag = ' \u2705 WHO' if (sens >= 0.90 and spec >= 0.70) else ''
    if sens >= 0.90 and spec >= 0.70: who_met = True
    j = sens + spec - 1
    if j > best_j: best_j, best_t = j, t
    print(f'{t:>7.2f} {sens:>7.3f} {spec:>7.3f} {ppv:>7.3f} {npv:>7.3f}{flag}')

print(f'\nBest Youden threshold: {best_t:.2f}')
if who_met:
    print('\u2705 WHO TPP achievable with multimodal!')
else:
    print('\u26a0\ufe0f WHO TPP not yet met. Next: Indonesian data + domain adaptation.')

---
## Cell 9: Clinical-Only Baseline (important for paper)

In [None]:
# Compare: how much does audio actually contribute?
# Train same classifier but with ONLY clinical features (no audio)

class ClinicalOnlyClassifier(nn.Module):
    def __init__(self, clinical_dim, dropout=0.3):
        super().__init__()
        self.head = nn.Sequential(
            nn.LayerNorm(clinical_dim),
            nn.Linear(clinical_dim, 64),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(64, 32),
            nn.GELU(),
            nn.Dropout(0.2),
            nn.Linear(32, 1),
        )
    def forward(self, x):
        return self.head(x).squeeze(-1)

print('=== Clinical-Only Baseline ===')
clin_fold_aurocs = []

for fold, (train_idx, val_idx) in enumerate(skf.split(unique_pids, pid_labels)):
    train_pids_set = set(unique_pids[train_idx])
    
    tr_mask = np.array([pid in train_pids_set for pid in all_pids])
    va_mask = ~tr_mask
    
    tr_clin = clinical_tensor[tr_mask].clone()
    va_clin = clinical_tensor[va_mask].clone()
    
    # Scale
    scaler = StandardScaler()
    n_cont = len(CONTINUOUS_FEATURES)
    tr_clin[:, :n_cont] = torch.tensor(scaler.fit_transform(tr_clin[:, :n_cont].numpy()), dtype=torch.float32)
    va_clin[:, :n_cont] = torch.tensor(scaler.transform(va_clin[:, :n_cont].numpy()), dtype=torch.float32)
    
    tr_lab = all_labels[tr_mask]
    va_lab = all_labels[va_mask]
    va_pid = all_pids[va_mask]
    
    model = ClinicalOnlyClassifier(CLINICAL_DIM).to(DEVICE)
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=0.01)
    criterion = nn.BCEWithLogitsLoss()
    
    tr_dataset = torch.utils.data.TensorDataset(
        tr_clin, torch.tensor(tr_lab, dtype=torch.float32))
    tr_loader = DataLoader(tr_dataset, batch_size=64, shuffle=True)
    
    best_auroc = 0
    patience = 0
    for epoch in range(100):
        model.train()
        for batch_clin, batch_lab in tr_loader:
            optimizer.zero_grad()
            loss = criterion(model(batch_clin.to(DEVICE)), batch_lab.to(DEVICE))
            loss.backward()
            optimizer.step()
        
        model.eval()
        with torch.no_grad():
            va_probs = torch.sigmoid(model(va_clin.to(DEVICE))).cpu().numpy()
        
        # Patient-level
        pt_p, pt_l = {}, {}
        for pid, prob, lab in zip(va_pid, va_probs, va_lab):
            pt_p.setdefault(pid, []).append(prob)
            pt_l[pid] = lab
        yt = np.array([pt_l[p] for p in pt_p])
        yp = np.array([np.mean(v) for v in pt_p.values()])
        auroc = roc_auc_score(yt, yp) if len(np.unique(yt)) > 1 else 0.5
        
        if auroc > best_auroc:
            best_auroc = auroc
            patience = 0
        else:
            patience += 1
        if patience >= 10: break
    
    clin_fold_aurocs.append(best_auroc)
    del model, optimizer

print(f'Clinical-only AUROC: {np.mean(clin_fold_aurocs):.4f} +/- {np.std(clin_fold_aurocs):.4f}')
print(f'Per-fold: {[f"{a:.3f}" for a in clin_fold_aurocs]}')
print(f'\n=== Contribution Analysis ===')
print(f'Audio-only (Trial 6):    {0.722:.4f}')
print(f'Clinical-only:           {np.mean(clin_fold_aurocs):.4f}')
print(f'Audio + Clinical (T7):   {np.mean(fold_aurocs):.4f}')
print(f'\nAudio adds:    {np.mean(fold_aurocs) - np.mean(clin_fold_aurocs):+.4f} over clinical-only')
print(f'Clinical adds: {np.mean(fold_aurocs) - 0.722:+.4f} over audio-only')

---
## Cell 10: Save

In [None]:
results_df = pd.DataFrame({
    'participant': pids_order,
    'true_label': y_true,
    'predicted_prob': y_prob,
})
results_df.to_csv('patient_predictions_trial7.csv', index=False)

summary = {
    'trial': 7,
    'approach': 'Multimodal: Wav2Vec2+HeAR audio embeddings + clinical features',
    'audio_dim': AUDIO_DIM,
    'clinical_dim': CLINICAL_DIM,
    'clinical_features': ALL_CLINICAL_FEATURES,
    'n_participants': len(pids_order),
    'n_segments': len(audio_embeddings),
    'n_folds': n_folds_actual,
    'auroc_mean': round(float(np.mean(fold_aurocs)), 4),
    'auroc_std': round(float(np.std(fold_aurocs)), 4),
    'auroc_per_fold': [round(float(a), 4) for a in fold_aurocs],
    'clinical_only_auroc': round(float(np.mean(clin_fold_aurocs)), 4),
    'audio_only_auroc': 0.722,
    'best_threshold': round(float(best_t), 2),
    'device': str(DEVICE),
}
with open('training_summary_trial7.json', 'w') as f:
    json.dump(summary, f, indent=2)

print('Saved:')
print('  patient_predictions_trial7.csv')
print('  training_summary_trial7.json')
print('  checkpoints_t7/multimodal_fold*.pt')
print('  roc_trial7.png')
print()
print('=' * 60)
print(f'TRIAL 7 COMPLETE')
print(f'Audio-only best:  0.722')
print(f'Clinical-only:    {np.mean(clin_fold_aurocs):.4f}')
print(f'Multimodal:       {np.mean(fold_aurocs):.4f}')
print(f'DREAM winner:     0.743')
print(f'Zambia multimodal: 0.921')