# **Reproducing Unsupervised results (Table 1)**

<a target="_blank" href="https://colab.research.google.com/github/blackswan-advitamaeternam/HVAE/blob/raph/paper_experiments/Table1_exp.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>

## **Colab setup**

In [None]:
from google.colab import drive
drive.mount('/content/drive')
# to avoid having the data on your drive
%cd /content

In [None]:
!git clone https://github.com/blackswan-advitamaeternam/HVAE.git
%cd HVAE
!git checkout raph
!pip install -r requirements.txt

To allow automatic reloading

In [None]:
!pip install --upgrade ipython

In [None]:
import sys
try:
    import imp
except ImportError:
    import types
    sys.modules['imp'] = types.ModuleType('imp')

In [None]:
%load_ext autoreload
%autoreload 2

## **Imports**

In [None]:
import sys, os
# To ensure the custom package is found
path_to_repo = "/content/HVAE"
if path_to_repo not in sys.path:
    sys.path.append(path_to_repo)

In [None]:
import numpy as np
import pandas as pd
import torch
from tqdm.auto import tqdm

from svae.vae import SVAE, GaussianVAE
from svae.training import training
from paper_experiments.load_MNIST import load_mnist, ShuffledLoader

Setting device

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

In [None]:
# make splits
NUM_WORKERS = int(0.8*os.cpu_count())
FRAC = 1.0  # Use full dataset for Table 1
TRAIN_FRAC = int(50000 * FRAC)
VAL_FRAC = int(10000 * FRAC)
TEST_FRAC = None
print(f"Using {TRAIN_FRAC} train samples, {VAL_FRAC} val samples")

train_loader, val_loader, test_loader = load_mnist(
    train_size=TRAIN_FRAC,
    val_size=VAL_FRAC,
    test_size=TEST_FRAC,
    batch_size=64,
    num_workers=NUM_WORKERS,
    binarize=True
)

In [None]:
# Manifesting to memory of the device
train_batches = [[el.to(DEVICE) for el in batch] for batch in train_loader]
val_batches   = [[el.to(DEVICE) for el in batch] for batch in val_loader]
test_batches  = [[el.to(DEVICE) for el in batch] for batch in test_loader]

# Wrap for shuffling behavior
train_loader = ShuffledLoader(train_batches, shuffle_batches=True, shuffle_within=True)
val_loader = ShuffledLoader(val_batches, shuffle_batches=False, shuffle_within=False)
test_loader = ShuffledLoader(test_batches, shuffle_batches=False, shuffle_within=False)

## **Configuration**

In [None]:
base_path = "/content/drive/MyDrive/HVAE/Table1_results/"
os.makedirs(base_path, exist_ok=True)

In [None]:
# Hyperparameters matching paper (Section F.1)
EPOCHS = 500
INPUT_DIM = 784
HIDDEN_DIM = 128  # Paper: [256, 128] encoder, [128, 256] decoder

PATIENCE = 50
WARMUP = 100  # Paper: linear warm-up for 100 epochs
ONE_LAYER = False
LR = 1e-3
BETA_KL = 1.0

N_RUNS = 10  # Paper uses 10 runs
N_LL_SAMPLES = 500  # Paper uses 500 importance samples for LL estimation

LATENT_DIMS = [2, 5, 10, 20, 40]

## **Evaluation Functions**

In [None]:
def compute_metrics(model, test_tensor, N_ll_samples=500):
    """
    Compute Table 1 metrics for a trained model.
    
    Returns:
        dict with keys: LL, ELBO (L[q]), RE, KL
    """
    model.eval()
    
    with torch.no_grad():
        # Compute LL using IWAE estimator (already implemented in your code!)
        LL = model.total_marginal_ll(test_tensor, N=N_ll_samples, reduced='mean').item()
        
        # Compute RE and KL using full_step
        _, parts = model.full_step(test_tensor, beta_kl=1.0)
        RE = -parts['recon'].item()  # Paper reports negative RE
        KL = parts['kl'].item()
        
        # ELBO = -RE - KL (paper reports L[q] which is negative ELBO)
        ELBO = RE - KL  # This gives the paper's L[q]
    
    return {
        'LL': LL,
        'ELBO': ELBO,
        'RE': RE,
        'KL': KL
    }

In [None]:
def train_and_evaluate(mode, latent_dim, train_loader, val_loader, test_tensor, n_ll_samples):
    """
    Train a single model and evaluate Table 1 metrics.
    
    Args:
        mode: 'svae' or 'normal'
        latent_dim: dimension of latent space
    
    Returns:
        dict with LL, ELBO, RE, KL
    """
    addon = "[SVAE]" if mode == "svae" else "[NVAE]"
    print(f"\n{addon} Training with latent_dim={latent_dim}..")
    
    # Instantiate model
    if mode == "svae":
        model = SVAE(
            input_dim=INPUT_DIM,
            hidden_dim=HIDDEN_DIM,
            latent_dim=latent_dim,
            one_layer=ONE_LAYER,
            mode='mnist'  # Uses BCE loss
        )
    else:
        model = GaussianVAE(
            input_dim=INPUT_DIM,
            hidden_dim=HIDDEN_DIM,
            latent_dim=latent_dim,
            one_layer=ONE_LAYER,
            mode='mnist'
        )
    
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)
    
    # Train
    model, losses, all_parts = training(
        dataloader=train_loader,
        val_dataloader=val_loader,
        model=model,
        optimizer=optimizer,
        epochs=EPOCHS,
        beta_kl=BETA_KL,
        warmup=WARMUP,
        patience=PATIENCE,
        show_loss_every=50
    )
    
    # Evaluate
    print(f"{addon} Computing metrics with {n_ll_samples} importance samples..")
    metrics = compute_metrics(model, test_tensor, N_ll_samples=n_ll_samples)
    
    print(f"{addon} LL={metrics['LL']:.2f}, ELBO={metrics['ELBO']:.2f}, RE={metrics['RE']:.2f}, KL={metrics['KL']:.2f}")
    
    return metrics

In [None]:
def run_table1_experiment(latent_dims, n_runs, n_ll_samples, train_loader, val_loader, test_loader):
    """
    Run the complete Table 1 experiment.
    
    Returns:
        DataFrame with results
    """
    # Prepare test tensor (full test set)
    test_data = [batch[0] for batch in test_loader]
    test_tensor = torch.cat(test_data, dim=0).to(DEVICE)
    print(f"Test tensor shape: {test_tensor.shape}")
    
    results = []
    
    for latent_dim in tqdm(latent_dims, desc="Latent dimensions"):
        for mode in ["normal", "svae"]:
            model_name = "S-VAE" if mode == "svae" else "N-VAE"
            
            # Collect metrics over runs
            run_metrics = {'LL': [], 'ELBO': [], 'RE': [], 'KL': []}
            
            for run in tqdm(range(n_runs), desc=f"{model_name} d={latent_dim}", leave=False):
                print(f"\n{'='*50}")
                print(f"RUN {run+1}/{n_runs} | {model_name} | d={latent_dim}")
                print('='*50)
                
                metrics = train_and_evaluate(
                    mode=mode,
                    latent_dim=latent_dim,
                    train_loader=train_loader,
                    val_loader=val_loader,
                    test_tensor=test_tensor,
                    n_ll_samples=n_ll_samples
                )
                
                for key in run_metrics:
                    run_metrics[key].append(metrics[key])
            
            # Compute mean and std
            results.append({
                'Method': model_name,
                'd': latent_dim,
                'LL': f"{np.mean(run_metrics['LL']):.2f}±{np.std(run_metrics['LL']):.2f}",
                'L[q]': f"{np.mean(run_metrics['ELBO']):.2f}±{np.std(run_metrics['ELBO']):.2f}",
                'RE': f"{np.mean(run_metrics['RE']):.2f}±{np.std(run_metrics['RE']):.2f}",
                'KL': f"{np.mean(run_metrics['KL']):.2f}±{np.std(run_metrics['KL']):.2f}",
                # Raw values for analysis
                'LL_mean': np.mean(run_metrics['LL']),
                'LL_std': np.std(run_metrics['LL']),
                'ELBO_mean': np.mean(run_metrics['ELBO']),
                'ELBO_std': np.std(run_metrics['ELBO']),
                'RE_mean': np.mean(run_metrics['RE']),
                'RE_std': np.std(run_metrics['RE']),
                'KL_mean': np.mean(run_metrics['KL']),
                'KL_std': np.std(run_metrics['KL'])
            })
    
    return pd.DataFrame(results)

## **Run Experiment**

In [None]:
results_df = run_table1_experiment(
    latent_dims=LATENT_DIMS,
    n_runs=N_RUNS,
    n_ll_samples=N_LL_SAMPLES,
    train_loader=train_loader,
    val_loader=val_loader,
    test_loader=test_loader
)

In [None]:
# Save results
results_df.to_csv(base_path + "Table1_results.csv", index=False)
print(f"Results saved to {base_path}Table1_results.csv")

## **Display Results (Table 1 Format)**

In [None]:
# Format as paper's Table 1
display_df = results_df[['Method', 'd', 'LL', 'L[q]', 'RE', 'KL']].copy()
display_df

In [None]:
# Pivot for side-by-side comparison like in the paper
def format_table1_paper_style(df):
    """Format results exactly like Table 1 in the paper"""
    nvae = df[df['Method'] == 'N-VAE'].set_index('d')
    svae = df[df['Method'] == 'S-VAE'].set_index('d')
    
    print("="*100)
    print("TABLE 1: Summary of results (mean and standard-deviation over runs) of unsupervised model on MNIST")
    print("="*100)
    print(f"{'Method':<10} | {'LL':^20} | {'L[q]':^20} | {'RE':^20} | {'KL':^15}")
    print("-"*100)
    
    for d in LATENT_DIMS:
        # N-VAE row
        print(f"{'N-VAE':<7} d={d:<2} | {nvae.loc[d, 'LL']:^20} | {nvae.loc[d, 'L[q]']:^20} | {nvae.loc[d, 'RE']:^20} | {nvae.loc[d, 'KL']:^15}")
        # S-VAE row
        print(f"{'S-VAE':<7} d={d:<2} | {svae.loc[d, 'LL']:^20} | {svae.loc[d, 'L[q]']:^20} | {svae.loc[d, 'RE']:^20} | {svae.loc[d, 'KL']:^15}")
        print("-"*100)

format_table1_paper_style(results_df)

## **Generate LaTeX Table**

In [None]:
def generate_latex_table(df):
    """Generate LaTeX code for Table 1"""
    latex = r"""
\begin{table}[h]
\centering
\caption{Summary of results (mean and standard-deviation over """ + str(N_RUNS) + r""" runs) of unsupervised model on MNIST.}
\begin{tabular}{ll|cccc}
\toprule
Method & $d$ & LL & $\mathcal{L}[q]$ & RE & KL \\
\midrule
"""
    
    for d in LATENT_DIMS:
        for method in ['N-VAE', 'S-VAE']:
            row = df[(df['Method'] == method) & (df['d'] == d)].iloc[0]
            prefix = r"$\mathcal{N}$" if method == 'N-VAE' else r"$\mathcal{S}$"
            
            # Format with bold for best
            latex += f"{prefix}-VAE & {d} & {row['LL']} & {row['L[q]']} & {row['RE']} & {row['KL']} \\\\\n"
        
        if d != LATENT_DIMS[-1]:
            latex += r"\midrule" + "\n"
    
    latex += r"""
\bottomrule
\end{tabular}
\end{table}
"""
    return latex

latex_table = generate_latex_table(results_df)
print(latex_table)

# Save LaTeX
with open(base_path + "Table1_latex.tex", 'w') as f:
    f.write(latex_table)
print(f"\nLaTeX saved to {base_path}Table1_latex.tex")

## **Quick Comparison with Paper**

In [None]:
# Paper's Table 1 results for reference
paper_results = {
    'N-VAE': {
        2: {'LL': -135.73, 'ELBO': -137.08, 'RE': -129.84, 'KL': 7.24},
        5: {'LL': -110.21, 'ELBO': -112.98, 'RE': -100.16, 'KL': 12.82},
        10: {'LL': -93.84, 'ELBO': -98.36, 'RE': -78.93, 'KL': 19.44},
        20: {'LL': -88.90, 'ELBO': -94.79, 'RE': -71.29, 'KL': 23.50},
        40: {'LL': -88.93, 'ELBO': -94.91, 'RE': -71.14, 'KL': 23.77}
    },
    'S-VAE': {
        2: {'LL': -132.50, 'ELBO': -133.72, 'RE': -126.43, 'KL': 7.28},
        5: {'LL': -108.43, 'ELBO': -111.19, 'RE': -97.84, 'KL': 13.35},
        10: {'LL': -93.16, 'ELBO': -97.70, 'RE': -77.03, 'KL': 20.67},
        20: {'LL': -89.02, 'ELBO': -96.15, 'RE': -67.65, 'KL': 28.50},
        40: {'LL': -90.87, 'ELBO': -101.26, 'RE': -67.75, 'KL': 33.50}
    }
}

print("Comparison: Our Results vs Paper")
print("="*80)
for d in LATENT_DIMS:
    for method in ['N-VAE', 'S-VAE']:
        our = results_df[(results_df['Method'] == method) & (results_df['d'] == d)].iloc[0]
        paper = paper_results[method][d]
        
        print(f"{method} d={d}:")
        print(f"  LL:   Ours={our['LL_mean']:.2f} | Paper={paper['LL']:.2f} | Δ={our['LL_mean']-paper['LL']:.2f}")
        print(f"  RE:   Ours={our['RE_mean']:.2f} | Paper={paper['RE']:.2f} | Δ={our['RE_mean']-paper['RE']:.2f}")
    print("-"*80)