<a href="https://colab.research.google.com/github/mwl10/hetvae/blob/errors/src/catalina_optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import shutil
os.chdir('/content')
! git clone --branch errors https://github.com/mwl10/hetvae
os.chdir('/content/hetvae')
! pip install -r requirements.txt
os.chdir('/content/hetvae/src')
import numpy as np
import torch
import optuna
from optuna.trial import TrialState
import torch.optim as optim
import models
from argparse import Namespace
import torch.optim as optim
import utils
import my_utils
import pandas as pd
from glob import glob
import importlib
import vae_models
import matplotlib.pyplot as plt
import sys
from dataset import DataSet
%matplotlib inline

Cloning into 'hetvae'...
remote: Enumerating objects: 3899, done.[K
remote: Counting objects: 100% (935/935), done.[K
remote: Compressing objects: 100% (797/797), done.[K
remote: Total 3899 (delta 153), reused 214 (delta 138), pack-reused 2964[K
Receiving objects: 100% (3899/3899), 36.80 MiB | 26.32 MiB/s, done.
Resolving deltas: 100% (1098/1098), done.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting alembic==1.7.7
  Downloading alembic-1.7.7-py3-none-any.whl (210 kB)
[K     |████████████████████████████████| 210 kB 5.1 MB/s 
Collecting autopage==0.5.0
  Downloading autopage-0.5.0-py3-none-any.whl (29 kB)
Collecting backports.functools-lru-cache==1.6.4
  Downloading backports.functools_lru_cache-1.6.4-py2.py3-none-any.whl (5.9 kB)
Collecting cliff==3.10.1
  Downloading cliff-3.10.1-py3-none-any.whl (81 kB)
[K     |████████████████████████████████| 81 kB 11.4 MB/s 
[?25hCollecting cmaes==0.8.2
  Downloading cmaes-0.8.2

In [2]:
def define_model_args(trial):

    args = Namespace(
        batch_size = trial.suggest_categorical("batch_size", [8,16,32]),
        bound_variance = True,
        const_var = False,
        dataset='toy',
        dropout = trial.suggest_float("dropout", 0.0,0.5),
        elbo_weight = trial.suggest_float("elbo_weight", 0.0, 2.0),
        embed_time = trial.suggest_categorical("embed_time", [8,16,32,64,128]),
        enc_num_heads=trial.suggest_categorical("enc_num_heads", [1,2,4,8,16]),
        intensity=True,
        k_iwae=1,
        kl_annealing=False,#trial.suggest_categorical("kl_annealing",False),
        kl_zero=False, 
        latent_dim=trial.suggest_categorical("latent_dim", [8,16,32,64,128]),
        lr=trial.suggest_float("lr", 1e-7, 1e-1, log=True),
        mixing="concat_and_mix",#trial.suggest_categorical("mixing", ["concat", "concat_and_mix"]),#"separate", "interp_only", "na"]),
        mse_weight=trial.suggest_float("mse_weight",1,6),
        #n=trial.suggest_categorical("n", [8,16,32,64,128]),
        net='hetvae', 
        niters=1000, 
        norm=True, 
        normalize_input='znorm', 
        num_ref_points=trial.suggest_categorical("num_ref_points", [8,16,32,64,128]),
        rec_hidden=trial.suggest_categorical("rec_hidden", [8,16,32,64,128]),
        recon_loss=False, 
        sample_tp= trial.suggest_float("sample_tp", 0.1,0.9), # will be ignored
        save=True, 
        seed=0, 
        shuffle=True, 
        std=0.1, 
        var_per_dim=False, 
        width=trial.suggest_categorical("width", [8,16,32,64,128,256])
    )

    return args

In [3]:
EPOCHS = 100
FILES = glob('/content/hetvae/data/CAT/*/*')
#FILES = glob('/content/hetvae/data/CAT/*/*')[:50]


DIM = 1

In [4]:
from contextlib import contextmanager
import sys, os

@contextmanager
def suppress_stdout():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:  
            yield
        finally:
            sys.stdout = old_stdout

In [5]:
def objective(trial):

    args = define_model_args(trial)

    seed = args.seed
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # dataset hyperparams
    num_samples=trial.suggest_int("num_samples", 1,10)
    # normalize choices for optuna
    x_by_range = trial.suggest_categorical("x_by_range", [True, False])
    y_by_range = trial.suggest_categorical("y_by_range", [True, False])
    #normalize_y = trial.suggest_categorical("normalize_y", ["all", "individual"])

    with suppress_stdout():
        Catalina = DataSet().add_files(FILES).files_to_numpy().handle_dups().prune_graham().resample_dataset(num_samples=num_samples) \
            .normalize(y_by_range=y_by_range, x_by_range=x_by_range).set_union_x().zero_fill().make_masks(frac=args.sample_tp)
    
    LightCurves = np.concatenate((Catalina.dataset, Catalina.subsampled_mask[:,:,np.newaxis], Catalina.recon_mask[:,:,np.newaxis]), axis=-1) # format the masks for the model 

    training, valid = np.split(LightCurves, [int(np.floor(0.8*len(LightCurves)))])# shuffle?
    
    
    train_loader = torch.utils.data.DataLoader(training, batch_size=args.batch_size)
    valid_loader = torch.utils.data.DataLoader(valid, batch_size=args.batch_size)
    
    
    net = models.load_network(args, DIM, torch.Tensor(Catalina.union_x)) # , device="cuda"
    

    optimizer_name = trial.suggest_categorical("optimizer", ["Adam","RMSprop"])
    optimizer = getattr(optim, optimizer_name)(net.parameters(), lr=args.lr)

    for epoch in range(EPOCHS):
        train(net, optimizer, epoch, train_loader, args, device=device)
        nll_loss = my_utils.evaluate(net, valid_loader, device=device)
        trial.report(nll_loss, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return nll_loss

In [8]:
study = optuna.create_study(direction="minimize")

[32m[I 2022-06-17 01:39:31,730][0m A new study created in memory with name: no-name-6031f380-efeb-400e-8f68-7517a64178ad[0m


In [None]:
study.optimize(objective, n_trials=10, timeout=600)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Iter: 0, train loss: 0.3729, avg nll: 0.5515, avg kl: 0.4081, mse: 0.047880, mae: 0.169447


In [6]:
def train(net, optimizer,epoch, train_loader, args, device="cuda"):
      
      train_loss = 0.
      train_n = 0.
      avg_loglik, avg_kl, mse, mae = 0., 0., 0., 0.
      for i, train_batch in enumerate(train_loader):
          batch_len = train_batch.shape[0] 
          train_batch = train_batch.to(device)
          x = train_batch[:,:,0]
          y = train_batch[:,:,1:2]
          
          subsampled_mask = train_batch[:,:,3:4]
          recon_mask = train_batch[:,:,4:]
          sample_weight = train_batch[:,:,2:3]
          seqlen = train_batch.size(1) 
          # subsampled flux values and their corresponding masks....
          context_y = torch.cat((
              y * subsampled_mask, subsampled_mask
          ), -1) 
          recon_context_y = torch.cat((            # flux values with only recon_mask values showing
                  y * recon_mask, recon_mask
              ), -1)
          
    # #   def compute_unsupervised_loss(self, context_x, context_y, target_x, target_y, num_samples=1, beta=1):
          loss_info = net.compute_unsupervised_loss(
              x, # context_x, times
              context_y,           
              x, # target_x, same times, can project to arbitrary times 
              recon_context_y,
              num_samples=args.k_iwae,
              beta=1,
              #sample_weight = sample_weight   # default is 1. (no errors provided)

          )
          optimizer.zero_grad()
          loss_info.composite_loss.backward()
          optimizer.step()
          #scheduler.step()
          train_loss += loss_info.composite_loss.item() * batch_len
          avg_loglik += loss_info.loglik * batch_len
          avg_kl += loss_info.kl * batch_len
          mse += loss_info.mse * batch_len
          mae += loss_info.mae * batch_len
          train_n += batch_len
      
      
      if epoch % 100 == 0:
          print(
              'Iter: {}, train loss: {:.4f}, avg nll: {:.4f}, avg kl: {:.4f}, '
              'mse: {:.6f}, mae: {:.6f}'.format(
                  epoch,
                  train_loss / train_n,
                  -avg_loglik / train_n,
                  avg_kl / train_n,
                  mse / train_n,
                  mae / train_n
              )
          )
      

In [None]:

optuna.visualization.plot_param_importances(study)

In [None]:

optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_slice(study, params=["optimizer"])