# **Reproducing M1+M2 results (Table 3)**

<a target="_blank" href="https://colab.research.google.com/github/blackswan-advitamaeternam/HVAE/blob/raph/paper_experiments/Table3_exp.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>

## **Colab setup**

In [None]:
from google.colab import drive
drive.mount('/content/drive')
# to avoid having the data on your drive
%cd /content

In [None]:
!git clone https://github.com/blackswan-advitamaeternam/HVAE.git
%cd HVAE
!git checkout raph
!pip install -r requirements.txt

To allow automatic reloading

In [None]:
!pip install --upgrade ipython

In [None]:
import sys
try:
    import imp
except ImportError:
    import types
    sys.modules['imp'] = types.ModuleType('imp')

In [None]:
%load_ext autoreload
%autoreload 2

## **Imports**

In [None]:
import sys, os
# To ensure the custom package is found
path_to_repo = "/content/HVAE"
if path_to_repo not in sys.path:
    sys.path.append(path_to_repo)

In [None]:
import numpy as np
import pandas as pd 
import torch
from tqdm.auto import tqdm
from sklearn.metrics import accuracy_score

from svae.vae import M1_M2, predict_classes_loader, cluster_acc
from svae.training import training_M1M2
from svae.utils import ShuffledLoader

from paper_experiments.load_MNIST import make_splits_loaders_MNIST

Setting device

In [None]:
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# make splits
NUM_WORKERS = int(0.8*os.cpu_count())
FRAC = 0.1
TRAIN_FRAC = int(50000 * FRAC)
VAL_FRAC = int(10000 * FRAC)
TEST_FRAC = None # potentially still test on all test samples
print(f"Using {NUM_WORKERS} workers.")
train_loader, val_loader, test_loader = make_splits_loaders_MNIST(train_size=TRAIN_FRAC, val_size=VAL_FRAC, test_size=TEST_FRAC,
                                                                batch_size=100,
                                                                test_batch_size=100,
                                                                num_workers=NUM_WORKERS,
                                                                prefetch_factor=2,
                                                                force=True,
                                                                persistent_workers=True,
                                                                pin_memory=True)

In [None]:
# Manifesting to memory of the device (we can afford it on colab ?)
train_batches = [[el.to(DEVICE) for el in batch] for batch in train_loader]
val_batches   = [[el.to(DEVICE) for el in batch] for batch in val_loader]
test_batches  = [[el.to(DEVICE) for el in batch] for batch in test_loader]

# Wrap for shuffling behavior
train_loader = ShuffledLoader(
    train_batches,
    shuffle_batches=True,
    shuffle_within_batch=True,
    device_for_randperm=DEVICE)

val_loader = val_batches
test_loader = test_batches

We will only test on $N = 100$ 

In [None]:
test_loader = [batch for i, batch in enumerate(test_loader) if i < 1]

## **Configuration**

In [None]:
base_path = "/content/drive/MyDrive/HVAE/Table2/"
os.makedirs(base_path, exist_ok=True)

In [None]:
EPOCHS = 500
INPUT_DIM = 784
HIDDEN_DIM = 500
N_CLUSTERS = 10
LATENT_MODE = 'sample'
WARMUP = None
PATIENCE = 50
ONE_LAYER = True
LR = 1e-3

N_RUNS = 5

In [None]:
def run_and_test(mode1, mode2, latent1, latent2, test_dataloader):
    addon = "SVAE" if mode1 == "svae" else "NVAE"
    addon = addon + " + SVAE" if mode1 == "svae" else " + NVAE"
    addon = f"[{addon}]"

    print(f"\n{addon} Instantiating SVAE and optimizer..")
    model = M1_M2(mode1,
                mode2,
                INPUT_DIM,
                HIDDEN_DIM,
                latent1,
                latent2,
                N_CLUSTERS,
                ONE_LAYER,
                )
    
    # To device
    model.to(DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=LR)

    print(f"{addon} Started training..")
    model, losses, all_parts = training_M1M2(train_loader, 
                                    val_loader,
                                    model,
                                    optimizer,
                                    epochs=EPOCHS,
                                    beta_kl=1,
                                    warmup=WARMUP,
                                    alpha=0.1,
                                    patience=PATIENCE,
                                    show_loss_every=1)

    print(f"\n{addon} Predicting classes..")
    Y, Y_hat = predict_classes_loader(model, test_dataloader, LATENT_MODE)

    test_acc = cluster_acc(Y, Y_hat)
    print(f"{addon} Test accuracy: {test_acc*100:.2f}")
    return test_acc

In [None]:
def launch_experiment(mode1, mode2, test_dataloader):
    results_df = {"Latent M1":[], "Latent M2":[], "Model":[], "Accuracy":[], "Std":[], "N_test":[]}
    for latent1 in tqdm([5, 10, 50], desc="Exploring latent1.."):
        for latent2 in tqdm([5, 10, 50], desc="Exploring latent2.."):
            accuracy_list = []
            for i in tqdm(list(range(N_RUNS)), desc="Repeated runs.."):
                print(f"\nSTARTING RUN nÂ°{i+1}")
                accuracy_list.append(run_and_test(mode1, mode2, latent1, latent2, test_dataloader))
            avg_acc = np.nanmean(accuracy_list)
            std_acc = np.nanstd(accuracy_list)

            results_df["Latent M1"].append(latent1)
            results_df["Latent M2"].append(latent2)
            results_df["Model"].append(mode1 + " + " + mode2)
            results_df["Accuracy"].append(avg_acc)
            results_df["Std"].append(std_acc)
            results_df["N_test"].append(100)
    return pd.DataFrame(results_df)

## **SVAE + SVAE**

In [None]:
results_df_svae_svae = launch_experiment("svae", "svae", test_loader)

In [None]:
results_df_svae_svae.to_csv(base_path + "SVAE_SVAE_M1M2_results.csv")
results_df_svae_svae

## **NVAE + NVAE**

In [None]:
results_df_nvae_nvae = launch_experiment("normal", "normal", test_loader)

In [None]:
results_df_nvae_nvae.to_csv(base_path + "NVAE_NVAE_M1M2_results.csv")
results_df_nvae_nvae

## **SVAE + NVAE**

In [None]:
results_df_svae_nvae = launch_experiment("svae", "normal", test_loader)

In [None]:
results_df_svae_nvae.to_csv(base_path + "SVAE_NVAE_M1M2_results.csv")
results_df_svae_nvae

## **NVAE + SVAE**

In [None]:
results_df_nvae_svae = launch_experiment("normal", "svae", test_loader)

In [None]:
results_df_nvae_svae.to_csv(base_path + "NVAE_SVAE_M1M2_results.csv")
results_df_nvae_svae