# **Reproducing M1 results (Table 2)**

<a target="_blank" href="https://colab.research.google.com/github/blackswan-advitamaeternam/HVAE/blob/main/Table2_exp.ipynb"> <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/> </a>

## **Colab setup**

In [None]:
from google.colab import drive
drive.mount('/content/drive')
# to avoid having the data on your drive
%cd /content

In [None]:
!git clone https://github.com/blackswan-advitamaeternam/HVAE.git
%cd HVAE
!git checkout raph
!pip install -r requirements.txt

## **Imports**

In [None]:
import numpy as np
import pandas as pd 

from svae.vae import M1, predict_classes_loader
from svae.training import training_M1

from paper_experiments.load_MNIST import make_splits_loaders_MNIST

In [None]:
# make splits
NUM_WORKERS = os.cpu_count() // 2
train_loader, val_loader, test_loader = make_splits_loaders_MNIST(num_workers=NUM_WORKERS, pin_memory=True)

## **Configuration**

In [None]:
base_path = "/content/drive/MyDrive/HVAE/Table2/"
os.makedirs(base_path, exist_ok=True)

In [None]:
EPOCHS = 10
INPUT_DIM = 784
HIDDEN_DIM = 128
LATENT_DIM = 10
LATENT_MODE = 'sample'
PATIENCE = max(2,int(0.1*EPOCHS))
ONE_LAYER = False
LR = 0.001

N_RUNS = 1

In [None]:
def run_and_test(mode, LATENT_DIM, test_dataloader):
    addon = "[SVAE]" if mode == "svae" else "[NVAE]"

    print(f"\n{addon} Instantiating SVAE and optimizer..")
    model = M1(mode,
                INPUT_DIM,
                HIDDEN_DIM,
                LATENT_DIM,
                ONE_LAYER,
                )
    
    optimizer = torch.optim.Adam(model.parameters(), lr=LR)

    print(f"{addon} Started training..")
    model, svae_losses, all_svae_parts = training_M1(train_loader, 
                                    val_loader,
                                    model_svae,
                                    optimizer,
                                    epochs=EPOCHS,
                                    beta_kl=1,
                                    patience=PATIENCE,
                                    show_loss_every=1,
                                    mode=LATENT_MODE)

    print(f"\n{addon} Predicting classes..")
    Y, Y_hat = predict_classes_loader(model_svae, test_dataloader, LATENT_MODE)

    test_acc = accuracy_score(Y, Y_hat)
    print(f"{addon} Test accuracy: {test_acc*100:.2f}")
    return test_acc

In [None]:
def launch_experiment(N, test_dataloader):
    results_df = {"Latent Size":[], "Model":[], "Accuracy":[], "Std":[], "N_test":[]}
    for latent in tqdm([2, 5, 10, 20, 40], desc="Exploring latent.."):
        for mode in ["svae", "normal"]:
            accuracy_list = []
            for i in tqdm(list(range(N_RUNS)), desc="Repeated runs.."):
                print(f"\nSTARTING RUN nÂ°{i+1}")
                accuracy_list.append(run_and_test(mode, latent, test_dataloader))
            avg_acc = np.nanmean(accuracy_list)
            std_acc = np.nanstd(accuracy_list)

            results_df["Latent Size"].append(latent)
            results_df["Model"].append(mode)
            results_df["Accuracy"].append(avg_acc)
            results_df["Std"].append(std_acc)
            results_df["N_test"].append(N)
    return results_df = pd.DataFrame(results_df)

## **$N = 100$**
Batch size is 100 so only test on 10 batches

In [None]:
sub_test_loader = [batch for i, batch in enumerate(test_loader) if i < 10]

In [None]:
results_df_100 = launch_experiment(100, sub_test_loader)

In [None]:
results_df_100.to_csv(base_path + "100_M1_results.csv")
results_df_100

## **$N = 600$**
Batch size is 100 so only test on 60 batches

In [None]:
sub_test_loader = [batch for i, batch in enumerate(test_loader) if i < 60]

In [None]:
results_df_600 = launch_experiment(600, sub_test_loader)

In [None]:
results_df_600.to_csv(base_path + "./600_M1_results.csv")
results_df_600

## **$N = 1000$**
test on all batches

In [None]:
results_df_1000 = launch_experiment(1000, test_loader)

In [None]:
results_df_1000.to_csv(base_path + "./1000_M1_results.csv")
results_df_1000