# Reproduce GP-VAE
## Instructions for use:
1. Clone our GP-VAE repo (**Add link here**) to your google drive
2. Place this notebook in your google drive
3. Update the ROOT_PATH to the repo folder in your g drive
4. **Only for the first time you run on a colab host**: perform initial installations and fixes:
  * Un-Comment code cell 2
  * Execute code cells 1, 2 
  * Comment cell 2 back
  * Restart the host
5. Run the notebook

**Note**: The train / test outputs (model, results) will be copied into the repository clone under 'model' folder in the end, as intended by the original writers.

In [None]:
import time
from pathlib import Path
from google.colab import drive

# Google Drive linkage
drive.mount('/content/drive')
DRIVE_ROOT_DIR = Path('/content/drive/MyDrive/colab_data/aml_project/gp_vae_for_data_generation')
DRIVE_DATA_DIR = Path('/content/drive/MyDrive/colab_data/aml_project/data/hmnist_full.npz')

# copy code to colab host
!cp -R $DRIVE_ROOT_DIR/* .
!ls -la $ROOT_DIR

Mounted at /content/drive


In [None]:
# Run one time only, then comment and restart the host

# !pip install -r requirements.txt

## Train model

In [None]:
# Create output folders for this run

# Relative paths - correct both for drive and for local (Colab) VM
datetime = time.strftime("%Y%m%d-%H%M%S")
OUTPUTS_DIR = Path(f'outputs/{datetime}')
DRIVE_OUTPUTS_DIR = DRIVE_ROOT_DIR / OUTPUTS_DIR

# Create output folders on local (Colab) VM
Path(OUTPUTS_DIR).mkdir(parents=True, exist_ok=True)
print(f'OUTPUTS_DIR = {OUTPUTS_DIR}, exists = {Path.exists(OUTPUTS_DIR)}')

Path(DRIVE_OUTPUTS_DIR).mkdir(parents=True, exist_ok=True)
print(f'DRIVE_OUTPUTS_DIR = {DRIVE_OUTPUTS_DIR}, exists = {Path.exists(DRIVE_OUTPUTS_DIR)}')

In [None]:
# #Train on limited dataset
SAMPLES_PER_DIGIT = 500
NUM_EPOCHS = 25
WHITE_FLIP_RATIO = 0.6
BLACK_FLIP_RATIO = 0.8

!python train.py --model_type gp-vae --seed 123 --data_type hmnist --banded_covar --latent_dim 256 --encoder_sizes=256,256 --decoder_sizes=256,256,256 --window_size 3 --sigma 1 --length_scale 2 --beta 0.8 --num_epochs $NUM_EPOCHS --train_class_number $SAMPLES_PER_DIGIT --data_dir $DRIVE_DATA_DIR --base_dir $OUTPUTS_DIR --black_flip_ratio $BLACK_FLIP_RATIO --white_flip_ratio $WHITE_FLIP_RATIO

In [None]:
# Copy trained models back to drive
!cp -R $OUTPUTS_DIR/* $DRIVE_OUTPUTS_DIR
print(f'DRIVE_OUTPUTS_DIR = {DRIVE_OUTPUTS_DIR}')
!ls -la $DRIVE_OUTPUTS_DIR


## Generate new samples based on the dataset used in train and the model

In [None]:
#Train on limited dataset
WHITE_FLIP_RATIO = 0.6
BLACK_FLIP_RATIO = 0.8

!python generate.py --model_type gp-vae --data_type hmnist --seed 123 --testing --banded_covar --latent_dim 256 --encoder_sizes=256,256 --decoder_sizes=256,256,256 --window_size 3 --sigma 1 --length_scale 2 --beta 0.8 --num_epochs 20 --base_dir $OUTPUTS_DIR --black_flip_ratio $BLACK_FLIP_RATIO --white_flip_ratio $WHITE_FLIP_RATIO

In [None]:
# Copy trained models back to drive
!cp -R $OUTPUTS_DIR/generated_data $DRIVE_OUTPUTS_DIR
print(f'DRIVE_OUTPUTS_DIR = {DRIVE_OUTPUTS_DIR}')
!ls -la $DRIVE_OUTPUTS_DIR/generated_data

