In [1]:
import torch
import numpy as np
from torch.utils.data import DataLoader
from vae.datasets import VolSurfaceExFeatsDataSet
from vae.cond_conv_vae_with_mem import CVAEConv2DMem
from vae.utils import *
import flaml
from ray import tune

In [2]:
data = np.load("data/vol_surface_with_ret.npz")
vol_surf_data = data["surface"]
ret_data = data["ret"]

In [3]:
# perform model evaluation in terms of the accuracy and f1 score.
def model_eval_new(model: BaseVAE, dataloader):
    model.eval() # switch to eval model, will turn off randomness like dropout
    eval_loss = 0
    num_batches = 0
    for step, batch in enumerate(dataloader):
        try:
            batch.to(model.device)
        except:
            pass

        losses = model.test_step(batch)

        eval_loss += losses["loss"].item()
        num_batches += 1

    return eval_loss / num_batches

def train_new(model: BaseVAE, train_dataloader: DataLoader, valid_dataloader: DataLoader, 
          lr=1e-5, epochs=100, 
          model_dir="./", file_name="vanilla.pt"):
    model.train()
    optimizer = opt.AdamW(model.parameters(), lr)
    best_dev_loss = np.inf

    ## run for the specified number of epochs
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    if "." in file_name:
        file_prefix = file_name.split(".")[0]
    else:
        file_prefix = file_name
    log_file = open(f"{model_dir}/{file_prefix}-{epochs}-log.txt", "w", encoding="utf-8")

    print("Model config: ", file=log_file)
    print(json.dumps(model.config, indent=True), file=log_file)
    print(f"LR: {lr}", file=log_file)
    print(f"Epochs: {epochs}", file=log_file)
    print("", file=log_file)
    start_time = time.time()
    for epoch in range(epochs):
        epoch_start_time = time.time()
        model.train()
        train_loss = 0
        num_batches = 0
        for step, batch in enumerate(train_dataloader):
            try:
                batch.to(model.device)
            except:
                pass

            losses = model.train_step(batch, optimizer)

            train_loss += losses["loss"].item()
            num_batches += 1

        train_loss = train_loss / (num_batches)
        
        dev_loss = model_eval_new(model, valid_dataloader)

        if dev_loss < best_dev_loss:
            best_dev_loss = dev_loss
            model.save_weights(optimizer, model_dir, file_prefix)

        # print(f"epoch {epoch}: train loss :: {train_loss :.3f}, dev loss :: {dev_loss :.3f}, time elapsed :: {time.time() - epoch_start_time}")
        print(f"epoch {epoch}: train loss :: {train_loss :.3f}, dev loss :: {dev_loss :.3f}, time elapsed :: {time.time() - epoch_start_time}", file=log_file)
        yield train_loss, dev_loss
    # print(f"training finished, total time :: {time.time() - start_time}")
    print(f"training finished, total time :: {time.time() - start_time}", file=log_file)
    return train_loss, dev_loss

In [8]:
def train_wrapper(config):
    model_config = {
        "seq_len": config["seq_ctx_len"][0], 
        "feat_dim": (5, 5),
        "latent_dim": config["latent_dim"],
        "device": "cuda",
        "kl_weight": config["kl_weight"],
        "re_feat_weight": config["re_feat_weight"],
        "surface_hidden": list(config["surface_hidden"]),
        "ex_feats_dim": 1,
        "ex_feats_hidden": None,
        "mem_type": "lstm",
        "mem_hidden": config["mem_hidden"],
        "mem_layers": config["mem_layers"],
        "mem_dropout": config["mem_dropout"],
        "ctx_len": config["seq_ctx_len"][1], 
        "ctx_surface_hidden": list(config["ctx_surface_hidden"]), 
        "ctx_ex_feats_hidden": None,
    }
    train_dataset = VolSurfaceExFeatsDataSet(vol_surf_data[:4000], ret_data[:4000], config["seq_ctx_len"][0])
    train_dataloader = DataLoader(train_dataset, shuffle=False, batch_size=config["batch_size"])
    valid_dataset = VolSurfaceExFeatsDataSet(vol_surf_data[4000:5000], ret_data[4000:5000], config["seq_ctx_len"][0])
    valid_dataloader = DataLoader(valid_dataset, shuffle=False, batch_size=config["batch_size"])
    test_dataset = VolSurfaceExFeatsDataSet(vol_surf_data[5000:], ret_data[5000:], config["seq_ctx_len"][0])
    test_dataloader = DataLoader(test_dataset, shuffle=False, batch_size=config["batch_size"])

    model = CVAEConv2DMem(model_config)
    for train_loss, dev_loss in train_new(model, train_dataloader, valid_dataloader, 
                                 config["lr"], int(round(config["num_epochs"])), 
                                 "models", "cond_conv2d_lstm.pt"):
        tune.report(train_loss=train_loss, dev_loss=dev_loss)

In [19]:
def surface_hidden_size_sampler():
    num_layers = np.random.randint(1, 6)
    layers = []
    for _ in range(num_layers):
        layer_size = np.random.randint(1, 11)
        layers.append(int(layer_size))
    return tuple(layers)

def seq_len_sampler():
    # modify this for different choices
    seq_len_choices=[2, 5, 7, 10, 30, 90, 180, 252, 365]
    seq_len = np.random.choice(seq_len_choices)
    ctx_len = np.random.randint(1, seq_len)
    return (int(seq_len), int(ctx_len))

In [20]:
config = {
    "lr": tune.loguniform(1e-5, 1e-3),
    "num_epochs": tune.choice([10, 25, 50, 100]),
    "batch_size": tune.choice([16, 32, 64]),
    "seq_ctx_len": tune.sample_from(seq_len_sampler),
    "latent_dim": tune.randint(1, 1001),
    "re_feat_weight": tune.loguniform(1, 1000),
    "kl_weight": tune.uniform(0, 1),
    "surface_hidden": tune.sample_from(surface_hidden_size_sampler),
    "mem_hidden": tune.randint(25, 101),
    "mem_layers": tune.randint(1, 11),
    "mem_dropout": tune.uniform(0, 1),
    "ctx_surface_hidden": tune.sample_from(surface_hidden_size_sampler),
}

In [21]:
result = flaml.tune.run(
    tune.with_parameters(train_wrapper),
    config=config,
    metric="dev_loss",
    mode="min",
    low_cost_partial_config={"num_epochs": 10},
    # max_resource=
    scheduler="asha",  # Use asha scheduler to perform early stopping based on intermediate results reported
    resources_per_trial={"cpu": 1, "gpu": 1},
    local_dir="test/logs/",
    num_samples=1,
    use_ray=True
)

Using CFO for search. To use BlendSearch, run: pip install flaml[blendsearch]


0,1
Current time:,2023-05-07 15:16:06
Running for:,00:06:34.91
Memory:,15.5/63.7 GiB

Trial name,status,loc,batch_size,ctx_surface_hidden,kl_weight,latent_dim,lr,mem_dropout,mem_hidden,mem_layers,num_epochs,re_feat_weight,seq_ctx_len,surface_hidden,iter,total time (s),train_loss,dev_loss
train_wrapper_9565f865,TERMINATED,127.0.0.1:13716,16,"(4, 7, 2, 4, 5)",0.368659,648,1.56626e-05,0.266392,72,8,10,12.2582,"(252, 67)","(6, 7)",10,391.893,0.12013,0.077612


Trial name,dev_loss,train_loss
train_wrapper_9565f865,0.077612,0.12013


2023-05-07 15:16:06,962	INFO tensorboardx.py:269 -- Removed the following hyperparameter values when logging to tensorboard: {'seq_ctx_len': (252, 67), 'surface_hidden': (6, 7), 'ctx_surface_hidden': (4, 7, 2, 4, 5)}
2023-05-07 15:16:06,988	INFO tune.py:945 -- Total run time: 394.95 seconds (394.91 seconds for the tuning loop).




In [22]:
print(f"#trials={len(result.trials)}")
best_trial = result.get_best_trial("loss", "dev_loss", "all")
print("Best trial config: {}".format(best_trial.config))

#trials=1
Best trial config: {'num_epochs': 10, 'lr': 1.5662610420278313e-05, 'batch_size': 16, 'seq_ctx_len': (252, 67), 'latent_dim': 648, 're_feat_weight': 12.258227733927924, 'kl_weight': 0.36865945026811975, 'surface_hidden': (6, 7), 'mem_hidden': 72, 'mem_layers': 8, 'mem_dropout': 0.26639242043080236, 'ctx_surface_hidden': (4, 7, 2, 4, 5)}
