# Reproduce GP-VAE
## Instructions for use:
1. Clone our GP-VAE repo (**Add link here**) to your google drive
2. Place this notebook in your google drive
3. Update the ROOT_PATH to the repo folder in your g drive
4. **Only for the first time you run on a colab host**: perform initial installations and fixes:
  * Un-Comment code cell 2
  * Execute code cells 1, 2 
  * Comment cell 2 back
  * Restart the host
5. Run the notebook

**Note**: The train / test outputs (model, results) will be copied into the repository clone under 'model' folder in the end, as intended by the original writers.

In [8]:
import time
from pathlib import Path
from google.colab import drive

# Google Drive linkage
drive.mount('/content/drive')
DRIVE_ROOT_DIR = Path('/content/drive/MyDrive/colab_data/aml_project/gp_vae_for_data_generation')

# Relative paths - correct both for drive and for local (Colab) VM
datetime = time.strftime("%Y%m%d-%H%M%S")
OUTPUTS_DIR = Path(f'outputs/{datetime}')
DRIVE_OUTPUTS_DIR = DRIVE_ROOT_DIR / OUTPUTS_DIR

# Create output folders on local (Colab) VM
Path(OUTPUTS_DIR).mkdir(parents=True, exist_ok=True)
print(f'OUTPUTS_DIR = {OUTPUTS_DIR}, exists = {Path.exists(OUTPUTS_DIR)}')

Path(DRIVE_OUTPUTS_DIR).mkdir(parents=True, exist_ok=True)
print(f'DRIVE_OUTPUTS_DIR = {DRIVE_OUTPUTS_DIR}, exists = {Path.exists(DRIVE_OUTPUTS_DIR)}')

# copy code to colab host
!cp -R $DRIVE_ROOT_DIR/* .

!ls -la $ROOT_DIR

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
OUTPUTS_DIR = outputs/20220812-204006, exists = True
DRIVE_OUTPUTS_DIR = /content/drive/MyDrive/colab_data/aml_project/gp_vae_for_data_generation/outputs/20220812-204006, exists = True
total 100
drwxr-xr-x 1 root root  4096 Aug 12 20:07 .
drwxr-xr-x 1 root root  4096 Aug 12 19:46 ..
drwxr-xr-x 4 root root  4096 Aug  3 20:20 .config
drwx------ 3 root root  4096 Aug 12 20:07 data
drwx------ 5 root root  4096 Aug 12 20:06 drive
drwx------ 2 root root  4096 Aug 12 20:06 figures
-rw------- 1 root root 11860 Aug 12 20:40 generate.py
drwx------ 3 root root  4096 Aug 12 20:06 lib
-rw------- 1 root root  1087 Aug 12 20:40 LICENSE
drwxr-xr-x 6 root root  4096 Aug 12 20:40 outputs
drwx------ 2 root root  4096 Aug 12 20:36 __pycache__
-rw------- 1 root root  2364 Aug 12 20:40 README.md
-rw------- 1 root root   146 Aug 12 20:40 requirements.txt
drwxr-xr-x 1 root root  409

### Run the below cell only once per new host in Colab, then comment it out.

In [9]:
# # Run this cell once for a new host, then comment this cell, restart the host, and rerun all
# #---------------------------------------------------------------------------------

# # copy code to colab host
# !cp -R $DRIVE_ROOT_DIR/* .

# # Update data download scripts
# !sudo apt-get install dos2unix
# !dos2unix data/*.sh
# !chmod +x data/*.sh
# !ls -la data

# # reinstall old packages for old code
# !pip install -r requirements.txt

# # Download dataset for hmnist
# !data/load_hmnist.sh


## Train model

In [None]:
#Train on limited dataset
SAMPLES_PER_DIGIT = 500
!python train.py --model_type gp-vae --data_type hmnist --exp_name reproduce_hmnist --seed 111 --testing --banded_covar --latent_dim 256 --encoder_sizes=256,256 --decoder_sizes=256,256,256 --window_size 3 --sigma 1 --length_scale 2 --beta 0.8 --num_epochs 20  --train_class_number $SAMPLES_PER_DIGIT --base_dir $OUTPUTS_DIR


Testing:  True 	 Seed: 111
Full exp name:  outputs/20220812-204006/model
tcmalloc: large alloc 1881604096 bytes == 0x6174000 @  0x7ff07ec7a1e7 0x7ff07b70fcf1 0x7ff07b7747c8 0x7ff07b7134f1 0x5947d6 0x548cc1 0x5127f1 0x549576 0x593fce 0x548ae9 0x5127f1 0x4bc98a 0x533274 0x4d3969 0x512147 0x549e0e 0x593fce 0x5118f8 0x593dd7 0x5118f8 0x549576 0x593fce 0x548ae9 0x51566f 0x549576 0x604173 0x5f5506 0x5f8c6c 0x5f9206 0x64faf2 0x64fc4e
tcmalloc: large alloc 1881604096 bytes == 0x76424000 @  0x7ff07ec7a1e7 0x7ff07b70fcf1 0x7ff07b7747c8 0x7ff07b7134f1 0x5947d6 0x548cc1 0x5127f1 0x549576 0x593fce 0x548ae9 0x5127f1 0x4bc98a 0x533274 0x4d3969 0x512147 0x549e0e 0x593fce 0x5118f8 0x593dd7 0x5118f8 0x549576 0x593fce 0x548ae9 0x51566f 0x549576 0x604173 0x5f5506 0x5f8c6c 0x5f9206 0x64faf2 0x64fc4e
tcmalloc: large alloc 1881604096 bytes == 0xe6694000 @  0x7ff07ec7a1e7 0x7ff07b70fcf1 0x7ff07b7747c8 0x7ff07b7134f1 0x5947d6 0x548cc1 0x5127f1 0x549576 0x593fce 0x548ae9 0x5127f1 0x4bc98a 0x533274 0x4d3969 0x51

In [None]:
# Copy trained models back to drive
!cp -R $OUTPUTS_DIR/* $DRIVE_OUTPUTS_DIR
print(f'DRIVE_OUTPUTS_DIR = {DRIVE_OUTPUTS_DIR}')
!ls -la $DRIVE_OUTPUTS_DIR


## Generate new samples based on the dataset used in train and the model

In [None]:
# copy code to colab host
!cp -R $DRIVE_ROOT_DIR/* .

In [None]:
#Train on limited dataset

!python generate.py --model_type gp-vae --data_type hmnist --exp_name reproduce_hmnist --seed 123 --testing --banded_covar --latent_dim 256 --encoder_sizes=256,256 --decoder_sizes=256,256,256 --window_size 3 --sigma 1 --length_scale 2 --beta 0.8 --num_epochs 20 --base_dir $OUTPUTS_DIR

In [None]:
# Copy trained models back to drive
!cp -R $OUTPUTS_DIR/generated_data $DRIVE_OUTPUTS_DIR
print(f'DRIVE_OUTPUTS_DIR = {DRIVE_OUTPUTS_DIR}')
!ls -la $DRIVE_OUTPUTS_DIR/generated_data

