<a href="https://colab.research.google.com/github/mwl10/hetvae/blob/errors/src/catalina_optuna.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import shutil
os.chdir('/content')
! git clone --branch errors https://github.com/mwl10/hetvae
os.chdir('/content/hetvae')
! pip install -r requirements.txt
os.chdir('/content/hetvae/src')
import numpy as np
import torch
import optuna
from optuna.trial import TrialState
import torch.optim as optim
import models
from argparse import Namespace
import torch.optim as optim
import utils
import my_utils
import pandas as pd
from glob import glob
import importlib
import vae_models
import matplotlib.pyplot as plt
import sys
from dataset import DataSet
%matplotlib inline

Cloning into 'hetvae'...
remote: Enumerating objects: 3957, done.[K
remote: Counting objects: 100% (993/993), done.[K
remote: Compressing objects: 100% (835/835), done.[K
remote: Total 3957 (delta 194), reused 247 (delta 158), pack-reused 2964[K
Receiving objects: 100% (3957/3957), 39.00 MiB | 3.65 MiB/s, done.
Resolving deltas: 100% (1139/1139), done.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting alembic==1.7.7
  Downloading alembic-1.7.7-py3-none-any.whl (210 kB)
[K     |████████████████████████████████| 210 kB 14.7 MB/s 
Collecting autopage==0.5.0
  Downloading autopage-0.5.0-py3-none-any.whl (29 kB)
Collecting backports.functools-lru-cache==1.6.4
  Downloading backports.functools_lru_cache-1.6.4-py2.py3-none-any.whl (5.9 kB)
Collecting cliff==3.10.1
  Downloading cliff-3.10.1-py3-none-any.whl (81 kB)
[K     |████████████████████████████████| 81 kB 8.5 MB/s 
[?25hCollecting cmaes==0.8.2
  Downloading cmaes-0.8.2-

In [2]:
def define_model_args(trial):

    args = Namespace(
        batch_size = 8, #trial.suggest_categorical("batch_size", [8,16,32]),
        bound_variance = True,
        const_var = False,
        dataset='toy',
        dropout = trial.suggest_float("dropout", 0.0,0.5),
        elbo_weight = trial.suggest_float("elbo_weight", 0.0, 2.0),
        embed_time = trial.suggest_categorical("embed_time", [8,16,32,64]),
        enc_num_heads=4,#trial.suggest_categorical("enc_num_heads", [1,2,4,8]),
        intensity=True,
        k_iwae=1,
        kl_annealing=False,#trial.suggest_categorical("kl_annealing",False),
        kl_zero=False, 
        latent_dim=128,#trial.suggest_categorical("latent_dim", [8,16,32,64,128]),
        lr=trial.suggest_float("lr", 1e-7, 1e-1, log=True),
        mixing="concat_and_mix",#trial.suggest_categorical("mixing", ["concat", "concat_and_mix"]),#"separate", "interp_only", "na"]),
        mse_weight=trial.suggest_float("mse_weight",1,6),
        #n=trial.suggest_categorical("n", [8,16,32,64,128]),
        net='hetvae', 
        niters=1000, 
        norm=True, 
        normalize_input='znorm', 
        num_ref_points=trial.suggest_categorical("num_ref_points", [64]),
        rec_hidden=trial.suggest_categorical("rec_hidden", [16,32,64]),
        recon_loss=False, 
        sample_tp= 0.5, #trial.suggest_float("sample_tp", 0.1,0.9), # will be ignored
        save=True, 
        seed=0, 
        shuffle=True, 
        std=0.1, 
        var_per_dim=False, 
        width=trial.suggest_categorical("width", [32,64,128])
    )

    return args

In [3]:
EPOCHS = 100
FILES = glob('/content/hetvae/data/CAT/*/*')
#FILES = glob('/content/hetvae/data/CAT/*/*')[:50]


DIM = 1

In [4]:
from contextlib import contextmanager
import sys, os

@contextmanager
def suppress_stdout():
    with open(os.devnull, "w") as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:  
            yield
        finally:
            sys.stdout = old_stdout

In [5]:
agn_df = pd.read_csv('/content/hetvae/data/AGN_1H2106-099/1H2106-099_latest_lcs_pyroa.csv')
# handle formatting for new AGN
lcs = []
for lc_df in agn_df.groupby('Filter'):
    lc = lc_df[1][['MJD', 'Flux', 'Error']].to_numpy()
    lcs.append(lc)
# lcs[0] = lcs[0][lcs[0][:,0].argsort()]
# lcs[0] = np.concatenate((lcs[0][:10], lcs[0][410:]))
lcs = [lcs[0]]


In [6]:
def objective(trial):

    args = define_model_args(trial)

    seed = args.seed
    torch.manual_seed(seed)
    np.random.seed(seed)
    torch.cuda.manual_seed(seed)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # dataset hyperparams
    num_samples=trial.suggest_int("num_samples", 1,6)
    # normalize choices for optuna
    x_by_range = trial.suggest_categorical("x_by_range", [True, False])
    y_by_range = False#trial.suggest_categorical("y_by_range", [True, False])
    #normalize_y = trial.suggest_categorical("normalize_y", ["all", "individual"])

    with suppress_stdout():
        # AGN = DataSet().add_files(FILES).files_to_numpy().handle_dups().prune_graham().resample_dataset(num_samples=num_samples) \
        #     .normalize(y_by_range=y_by_range, x_by_range=x_by_range).set_union_x().zero_fill().make_masks(frac=args.sample_tp)
        AGN = DataSet()
        AGN.dataset = lcs
        AGN = AGN.handle_dups().prune_outliers().resample_dataset(num_samples=num_samples) \
             .normalize(y_by_range=y_by_range, x_by_range=x_by_range).set_union_x().zero_fill().make_masks(frac=args.sample_tp)



    LightCurves = AGN.dataset

    training, valid = np.split(LightCurves, [int(np.floor(0.8*len(LightCurves)))])# shuffle?
    
    
    train_loader = torch.utils.data.DataLoader(training, batch_size=args.batch_size)
    valid_loader = torch.utils.data.DataLoader(valid, batch_size=args.batch_size)
    
    
    net = models.load_network(args, DIM, torch.Tensor(AGN.union_x)) # , device="cuda"
    

    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "Adadelta"])
    optimizer = getattr(optim, optimizer_name)(net.parameters(), lr=args.lr)
    frac = trial.suggest_float("sample_tp", 0.1,0.9)
    for epoch in range(EPOCHS):
        nll_loss, mse = my_utils.train(net, optimizer, epoch, train_loader, args, device=device, frac=frac)
        #nll_loss = my_utils.evaluate(net, valid_loader, device=device)
        trial.report(nll_loss, epoch)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return nll_loss

In [7]:
study = optuna.create_study(direction="minimize")

[32m[I 2022-06-17 17:13:06,140][0m A new study created in memory with name: no-name-3328574b-bb12-4d3a-ae6a-a420bd38545e[0m


In [8]:
study.optimize(objective, n_trials=10, timeout=600)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Iter: 0, train loss: 4.8142, avg nll: 1.6219, avg kl: 1.6879, mse: 1.042513, mae: 0.855909


[32m[I 2022-06-17 17:13:27,650][0m Trial 0 finished with value: 1.4127589464187622 and parameters: {'dropout': 0.3599044428458503, 'elbo_weight': 1.9123338076707597, 'embed_time': 64, 'lr': 0.00023158498431509624, 'mse_weight': 1.333118335595446, 'num_ref_points': 128, 'rec_hidden': 64, 'width': 64, 'num_samples': 4, 'x_by_range': False, 'optimizer': 'Adam', 'sample_tp': 0.8876687004243902}. Best is trial 0 with value: 1.4127589464187622.[0m


Iter: 0, train loss: 2.7874, avg nll: 1.5374, avg kl: 0.6205, mse: 1.013716, mae: 0.846237


[32m[I 2022-06-17 17:13:50,377][0m Trial 1 finished with value: 1.4736545085906982 and parameters: {'dropout': 0.42880324848285345, 'elbo_weight': 0.32179563504461317, 'embed_time': 8, 'lr': 0.000575385430744067, 'mse_weight': 2.2419644037783217, 'num_ref_points': 64, 'rec_hidden': 16, 'width': 128, 'num_samples': 5, 'x_by_range': True, 'optimizer': 'Adadelta', 'sample_tp': 0.1391064782851509}. Best is trial 0 with value: 1.4127589464187622.[0m


Iter: 0, train loss: 3.5249, avg nll: 1.5516, avg kl: 0.7760, mse: 0.998715, mae: 0.838576


[32m[I 2022-06-17 17:16:30,538][0m Trial 2 finished with value: 1.5498145818710327 and parameters: {'dropout': 0.07953097325055608, 'elbo_weight': 0.6518073566103277, 'embed_time': 64, 'lr': 3.4757442786340244e-06, 'mse_weight': 2.4661865419478164, 'num_ref_points': 64, 'rec_hidden': 16, 'width': 128, 'num_samples': 6, 'x_by_range': False, 'optimizer': 'Adadelta', 'sample_tp': 0.5645767100115142}. Best is trial 0 with value: 1.4127589464187622.[0m
[33m[W 2022-06-17 17:16:31,278][0m Trial 3 failed because of the following error: RuntimeError('CUDA out of memory. Tried to allocate 2.42 GiB (GPU 0; 15.78 GiB total capacity; 9.69 GiB already allocated; 2.28 GiB free; 12.10 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF')[0m
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packages/optuna/study/_optimize.py", li

RuntimeError: ignored

In [None]:
# [I 2022-06-17 16:55:13,210] Trial 3 finished with value: 0.09719855338335037 and parameters: {'dropout': 0.02418752335062102, 'elbo_weight': 1.1061943368326204, 'embed_time': 16, 'lr': 4.212141191372822e-05, 'mse_weight': 5.108416562407145, 'rec_hidden': 128, 'num_samples': 6, 'x_by_range': True, 'optimizer': 'Adam', 'sample_tp': 0.1979453313080004}. Best is trial 3 with value: 0.09719855338335037.

optuna.visualization.plot_param_importances(study)

In [None]:

optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_slice(study, params=["optimizer"])