In [1]:
%load_ext autoreload
%autoreload 2
import torch
import eq
import wandb
from tqdm.notebook import trange
import numpy as np
import warnings
from itertools import product
warnings.filterwarnings('ignore')

In [2]:
device = "cuda"
catalog = eq.catalogs.ANSS_MultiCatalog(mag_completeness=4.5)

Loading existing catalog from /home/zekai/repos/recast/data/ANSS_MultiCatalog.


In [3]:
def train(config=None):
    with wandb.init(config=config):
        config = wandb.config
        try:
            dl_train = catalog.train.get_dataloader(batch_size=config.batch_size)
            dl_val = catalog.val.get_dataloader(batch_size=1)
            dl_test = catalog.test.get_dataloader(batch_size=1)

            model = eq.models.RecurrentTPP(context_size=config.context_size,
                                           num_components=config.num_components,
                                           rnn_type=config.rnn_type,
                                           dropout_proba=config.dropout_proba,
                                           learning_rate=config.lr,
                                           tau_mean=config.tau_mean)
            model = model.to(device)

            epochs = 200
            avg_train_loss_list = []
            avg_val_loss_list = []

            optimizer = torch.optim.AdamW(model.parameters(), 
                                          lr=config.lr, 
                                          betas=config.betas, 
                                          weight_decay=config.weight_decay)
            
            best_model_path = "temp_best_model"
            best_val_loss = float('inf')

            for epoch in trange(epochs):
                running_train_loss = []
                model.train()
                for i, data in enumerate(dl_train):
                    data = data.to(device)
                    optimizer.zero_grad()
                    nll = model.nll_loss(data).mean()
                    nll.backward()
                    optimizer.step()
                    running_train_loss.append(nll.item())
                
                model.eval()
                with torch.no_grad():
                    running_val_loss = []
                    for i, data in enumerate(dl_val):
                        data = data.to(device)
                        nll = model.nll_loss(data).mean()
                        running_val_loss.append(nll.item())

                avg_val_loss = np.mean(running_val_loss)
                avg_train_loss = np.mean(running_train_loss)
                avg_train_loss_list.append(avg_train_loss)
                avg_val_loss_list.append(avg_val_loss)
                wandb.log({"avg_train_loss": avg_train_loss,
                           "avg_val_loss": avg_val_loss,
                           "epoch": epoch})


                if avg_val_loss < best_val_loss:
                    best_val_loss = avg_val_loss
                    torch.save(model.state_dict(), best_model_path)
                

            best_model = eq.models.RecurrentTPP(context_size=config.context_size,
                                                num_components=config.num_components,
                                                rnn_type=config.rnn_type,
                                                dropout_proba=config.dropout_proba,
                                                learning_rate=config.lr)
            best_model.load_state_dict(torch.load(best_model_path))
            best_model = best_model.to(device)
            best_model.eval()
            with torch.no_grad():
                running_test_loss = []
                for i, data in enumerate(dl_test):
                    data = data.to(device)
                    nll = best_model.nll_loss(data).mean()
                    running_test_loss.append(nll.item())
            avg_test_loss = np.mean(running_test_loss)

        except Exception as e:
            print(e)
            avg_test_loss = float("nan")

        wandb.log({"avg_test_loss": avg_test_loss})


In [4]:
import os
os.environ["WANDB_NOTEBOOK_NAME"] = "sweep_updated.ipynb"
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mzekai-wang[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [5]:
sweep_config = {'method': "random"}
metric = {"name": "avg_test_loss",
          "goal": "minimize"}
sweep_config["metric"] = metric

parameter_dict = {"context_size": {'values': [8, 16, 32, 64, 128]},
                  "num_components": {"values": [8, 16, 32, 64, 128]},
                  "rnn_type": {"value": "LSTM"},
                  "dropout_proba": {"values": [0, 0.1, 0.2, 0.3, 0.4, 0.5]},
                  "lr": {"values": [1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2]},
                  "betas": {"values": list(product([0.9, 0.99], [0.99, 0.999, 0.9999]))},
                  "weight_decay": {"values": [0, 1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2]},
                  "batch_size": {"values": [8, 16, 32, 64, 128]},
                  "tau_mean": {"values": [0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0]}}

sweep_config["parameters"] = parameter_dict

In [6]:
sweep_id = wandb.sweep(sweep_config, project="RecurrentTPP on ANSS Nov 6")

Create sweep with ID: odgh4jcm
Sweep URL: https://wandb.ai/zekai-wang/RecurrentTPP%20on%20ANSS%20Nov%206/sweeps/odgh4jcm


In [7]:
wandb.agent(sweep_id, function=train, count=200)

[34m[1mwandb[0m: Agent Starting Run: 85ed9gsd with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▄▂▂▂▁▂▂▁▂▁▂▁▁▁▂▁▂▂▁▁▁▂▁▂▁▂▁▂▂▂▂▁▁▂▁▁▁▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18843
avg_train_loss,-0.22566
avg_val_loss,-2.10463
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: ghdivi06 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15649
avg_train_loss,-0.21273
avg_val_loss,-2.13752
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: emuysfra with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16786
avg_train_loss,-0.19802
avg_val_loss,-2.10756
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: w9f569b4 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▇▅▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▇▅▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18489
avg_train_loss,-0.18953
avg_val_loss,-2.02861
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: mx21uywi with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (1, 2329, 128)) of distribution Weibull(scale: torch.Size([1, 2329, 128]), shape: torch.Size([1, 2329, 128])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.3503, 1.1123, 0.5038,  ..., 1.1671, 2.1863, 1.3151],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]]],
       device='cuda:0')


0,1
avg_train_loss,█▅▃▃▃▂▂▂▂▂▁▁▂▂▂▂▁▁▂▂▁▂▁▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁▂▃
avg_val_loss,▆▁▄▆▂▁▁▆▃▁▄▅▅▅█▄▅█▅▄▆▆█▄▆▇█▆▆▅█▄▅▅▄▄▅▅▅▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.20205
avg_val_loss,-2.0023
epoch,142.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 8kup7x8l with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.015 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.754184…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16483
avg_train_loss,-0.21868
avg_val_loss,-2.09874
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 604d7d7k with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16029
avg_train_loss,-0.2234
avg_val_loss,-2.14512
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 1gpbklwg with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁
avg_val_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▂▂▃▃▃▃▄▄▄▅▃▄▄▅
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16702
avg_train_loss,-0.3998
avg_val_loss,-1.78583
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 02nh5lpa with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16221
avg_train_loss,-0.23213
avg_val_loss,-2.15736
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: svs8i5rg with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16559
avg_train_loss,-0.23983
avg_val_loss,-2.15471
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pa46i283 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17359
avg_train_loss,-0.19809
avg_val_loss,-2.10795
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: f8vucim9 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.015 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.231969…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17769
avg_train_loss,-0.20594
avg_val_loss,-2.11173
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: kzjwrmyx with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▄▃▃▃▃▂▂▂▂▃▂▂▂▂▂▂▂▂▂▁▂▂▁▁▂▁▁▁▁▁▁▂▁▁▁▂▁
avg_val_loss,█▄▃▂▂▁▃▂▁▁▁▁▂▁▁▂▁▁▁▂▁▂▂▃▂▂▂▂▂▃▂▂▂▂▃▃▂▃▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16293
avg_train_loss,-0.27381
avg_val_loss,-2.11949
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 93hk7kxu with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▇▇▇▇▇▇▆▆▆▅▅▅▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁
avg_val_loss,▂▁▁▁▁▁▁▁▁▁▂▂▃▃▃▃▄▄▄▄▅▅▅▆▆▆▆▇▇▇▇▇▇▇██▇███
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16977
avg_train_loss,-0.98069
avg_val_loss,-0.14501
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: uoesnjhc with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
avg_val_loss,█▃▂▂▁▁▁▁▁▁▂▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▆▅▅▅▅▆▅▅▇▆▇▇▆
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16753
avg_train_loss,-0.43626
avg_val_loss,-1.90641
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 8qleii6x with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▂▃▂▂▂▂▂▂▂▁▁▁▁▂▁▁▁▁▁▁▃▂▂▁▁▁▁▃▁▂▃▁▁▁▁▁▁▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18376
avg_train_loss,-0.23204
avg_val_loss,-2.10257
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: w0ukeiu8 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.015 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.743009…

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17441
avg_train_loss,-0.19829
avg_val_loss,-2.11519
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: jzv5bwrg with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.179469…

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▄▁▁▁▁▂▁▁▆▆▅▅▅▅▅▄▄▄▄▄▄▄
avg_val_loss,▆▃▂▂▄▁▁▁▂▁▂▃▃▄▄▄▄▅▄▅▅▅▆▅▅▆█▄▃▂▂▂▁▂▁▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16852
avg_train_loss,-0.28257
avg_val_loss,-2.10835
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 50mfvdcm with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.1724
avg_train_loss,-0.20062
avg_val_loss,-2.0975
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: jchh608q with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.008 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.377910…

0,1
avg_test_loss,▁
avg_train_loss,█▇▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▇▅▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.20157
avg_train_loss,-0.13279
avg_val_loss,-1.93035
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 6bs3g7po with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18097
avg_train_loss,-0.18198
avg_val_loss,-2.07897
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: z14ibpqd with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17415
avg_train_loss,-0.23876
avg_val_loss,-2.10873
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: ysyrm7zr with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172294…

0,1
avg_test_loss,▁
avg_train_loss,█▆▄▄▃▃▃▃▃▃▃▃▃▂▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▂▁▁▁▁▁▁▁▁▁
avg_val_loss,█▇▄▂▂▂▃▁▂▁▂▂▁▂▅▂▂▂▂▃▂▄▂▄▂▃▃▃▃▂▄▃▃▆▃▅▃▄▃▄
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16465
avg_train_loss,-0.29541
avg_val_loss,-2.05228
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 1a0jv7an with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16371
avg_train_loss,-0.21703
avg_val_loss,-2.16225
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: etwlub1v with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 1539, 128)) of distribution Weibull(scale: torch.Size([16, 1539, 128]), shape: torch.Size([16, 1539, 128])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[ 6.8737,  0.1452, 13.6023,  ...,  1.5941,  3.8222,  0.8020],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         ...,
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

        [[ 6.8737,  0.1452, 13.6023,  ...,  1.5941,  3.8222,  0.8020],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         ...,
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     

0,1
avg_train_loss,█▅▅▅▅▄▄▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▃▂▁▁▁▂▁▁▁▁▁
avg_val_loss,▅▅▂▂▂▂▁▁▂▁▂▂▁▂▂▄▄▄▄▃▄▅▄▄▄▅▅▅▄▅▅▆▆▅▅▇▇██▆
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.52301
avg_val_loss,-1.80094
epoch,66.0


[34m[1mwandb[0m: Agent Starting Run: 9vuroixh with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16382
avg_train_loss,-0.22458
avg_val_loss,-2.13371
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: xxwa4rap with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▆▆▅▅▅▅▅▅▅▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁
avg_val_loss,▄▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▅▅▅▆▆▇▆▇▇██
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16113
avg_train_loss,-0.63276
avg_val_loss,-0.96655
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 9jx3zkic with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.21957
avg_train_loss,-0.12015
avg_val_loss,-1.955
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 177quqgy with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▅▃▃▄▂▃▂▂▃▃▂▁▄▂▁▁▂▂▁▁▁▁▁▂▁▁▂▂▁▁▃▂▂▁▁▂▁▁
avg_val_loss,█▄▄▃▃▃▃▃▂▂▃▅▃▃▃▂▂▁▂▂▁▁▂▁▁▂▂▅▃▂▂▂▁▄▂▁▁▂▂▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15618
avg_train_loss,-0.2386
avg_val_loss,-2.15682
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: i2oszt6s with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18475
avg_train_loss,-0.21402
avg_val_loss,-2.10041
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: xdbq550j with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▁▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16601
avg_train_loss,-0.28484
avg_val_loss,-2.10251
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: k3zlfis9 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.1586
avg_train_loss,-0.24861
avg_val_loss,-2.16465
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: yza5ljrk with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16867
avg_train_loss,-0.21915
avg_val_loss,-2.13094
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: bznebl92 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▆▄▄▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18175
avg_train_loss,-0.18776
avg_val_loss,-2.05963
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 676obiyg with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17017
avg_train_loss,-0.2453
avg_val_loss,-2.14142
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: soxjfi6a with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16131
avg_train_loss,-0.24471
avg_val_loss,-2.16479
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 10u2tiaz with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16833
avg_train_loss,-0.20625
avg_val_loss,-2.09968
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: d3qducz0 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17774
avg_train_loss,-0.20242
avg_val_loss,-2.08845
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: cxmofqyn with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,▇▂▁▁▂▂▃▄▄▄▄▄▃▄▃▄▄▄▅▄▄▄▄▅▄▅▄▄▄▅█▄▄▆▄▅▅▅▅▅
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.20609
avg_train_loss,-0.20543
avg_val_loss,-1.74793
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: j53dmpyh with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 498, 32)) of distribution Weibull(scale: torch.Size([8, 498, 32]), shape: torch.Size([8, 498, 32])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[1.1416, 1.1636, 1.0642,  ..., 1.4332, 1.0723, 0.9171],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[1.1416, 1.1636, 1.0642,  ..., 1.4332, 1.0723, 0.9171],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,  

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: 1gy0k2f3 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16344
avg_train_loss,-0.25204
avg_val_loss,-2.1272
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 2oqiaa4l with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16461
avg_train_loss,-0.21981
avg_val_loss,-2.15387
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: zkxrz76h with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16604
avg_train_loss,-0.21706
avg_val_loss,-2.11907
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: tj9in4yg with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁
avg_val_loss,█▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▇
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16362
avg_train_loss,-0.43007
avg_val_loss,-1.69756
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: ds5sqzlr with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.002


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112388766681155, max=1.0…

  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15959
avg_train_loss,-0.26169
avg_val_loss,-2.14213
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ykal3029 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172628…

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17047
avg_train_loss,-0.29571
avg_val_loss,-1.96537
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: hr1lnnvn with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16077
avg_train_loss,-0.29086
avg_val_loss,-2.10877
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: acc0rvgg with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172394…

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▂▁▁▁▄▃▃▂▂▂
avg_val_loss,█▃▂▂▂▂▁▁▁▁▁▁▂▁▁▁▂▁▁▂▁▂▁▂▃▃▂▂▂▃▃▂▃▃▃▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16812
avg_train_loss,-0.26602
avg_val_loss,-2.1061
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: xom79cor with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.015 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.754058…

0,1
avg_test_loss,▁
avg_train_loss,█▄▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17986
avg_train_loss,-0.16407
avg_val_loss,-2.0966
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: gm0so7wl with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172225…

0,1
avg_test_loss,▁
avg_train_loss,█▄▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16154
avg_train_loss,-0.19506
avg_val_loss,-2.12818
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: kacequze with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.19603
avg_train_loss,-0.21483
avg_val_loss,-2.12523
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: iigcn379 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.212749…

0,1
avg_test_loss,▁
avg_train_loss,█▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16012
avg_train_loss,-0.2666
avg_val_loss,-2.14812
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: qnqhmnio with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▃▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▂▁▁▂▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16989
avg_train_loss,-0.32256
avg_val_loss,-2.04667
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 5zhjv2xj with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.008 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.377800…

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15848
avg_train_loss,-0.27228
avg_val_loss,-2.15031
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: plum60a9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (64, 1076, 64)) of distribution Weibull(scale: torch.Size([64, 1076, 64]), shape: torch.Size([64, 1076, 64])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.3005, 0.3074, 0.8676,  ..., 0.8147, 0.6586, 3.7891],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.3005, 0.3074, 0.8676,  ..., 0.8147, 0.6586, 3.7891],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

0,1
avg_train_loss,█▅▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▃▂▂▂▁▁▁▂▁▂▁▁▁▁
avg_val_loss,█▃▃▂▂▂▁▂▁▂▂▁▁▂▁▂▃▁▂▂▂▃▂▁▂▃▁▂▃▂▃▃▃▃▃▂▃▂▃▂
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.27968
avg_val_loss,-2.03213
epoch,161.0


[34m[1mwandb[0m: Agent Starting Run: fzwn1lqz with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁
avg_val_loss,█▆▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▃▃▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16484
avg_train_loss,-0.40584
avg_val_loss,-1.9123
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: xa8xo8kj with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 2851, 32)) of distribution Weibull(scale: torch.Size([8, 2851, 32]), shape: torch.Size([8, 2851, 32])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[3.3527, 0.8307, 0.8063,  ..., 0.2336, 0.4745, 4.1115],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[3.3527, 0.8307, 0.8063,  ..., 0.2336, 0.4745, 4.1115],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan

0,1
avg_train_loss,█▆▅▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
avg_val_loss,▆▃▁▁▁▁▂▂▃▃▄▄▆█▅▅▆▇▂▂▂▂▂▂▁▂▂▁▁▂▂▁▂▂▃▂▂▂▂▂
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.28373
avg_val_loss,-2.00974
epoch,118.0


[34m[1mwandb[0m: Agent Starting Run: rhleq575 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.017 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.827916…

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16102
avg_train_loss,-0.23027
avg_val_loss,-2.14698
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: q6t8v9za with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▄▄▄▄▄▄▄▃▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▂▁▁
avg_val_loss,█▃▂▁▂▁▁▁▁▁▂▂▁▂▂▃▃▂▃▄▃▅▄▄▄▅▃▄▄▄▅▄▅▄▄▄▅▆▅▆
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16014
avg_train_loss,-0.33726
avg_val_loss,-2.00964
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: xtc49a76 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▃▂▂▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃▃▃▂▂▃▃▃▃▃▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.19207
avg_train_loss,-0.21978
avg_val_loss,-1.93599
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 9o8bjffu with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▁▁▂▂▂▂▁▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16078
avg_train_loss,-0.26435
avg_val_loss,-2.11584
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: xdz5iqii with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (1, 2329, 32)) of distribution Weibull(scale: torch.Size([1, 2329, 32]), shape: torch.Size([1, 2329, 32])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[ 2.2675,  3.6793,  3.1626,  ...,  0.8176, 10.6699,  0.5936],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         ...,
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan]]],
       device='cuda:0')


0,1
avg_train_loss,█▅▅▄▄▄▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
avg_val_loss,▆▃▃▁▂▁▁▁▁▂▂▂▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇█▇█
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.57069
avg_val_loss,-1.63431
epoch,67.0


[34m[1mwandb[0m: Agent Starting Run: x6or5xz6 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▃▃▃▂▂▄▂▂▂▂▂▂▂▂▂▁▁▁▁▂▁▂▁▁▁▂▁▂▂▁▁▁▂▁▂▁▁
avg_val_loss,██▅▄▄▄▆▄▃▂▂▂▂▂▂▂▂▂▂▂▁▃▄▂▃▂▁▁▁▁▁▂▃▂▄▄▂▃▃▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15843
avg_train_loss,-0.24756
avg_val_loss,-2.14997
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: xaxhdgau with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▃▃▃▂▂▂▁▁▁▁▁▁▁▁▁▂▂▂▁▁▁▂▂▂▁▁▁▂▂▁▁▁▂▂▂▂▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16875
avg_train_loss,-0.21574
avg_val_loss,-2.12926
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: mqfu2jo0 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▂▂▁▁▂▁▁▁▂▂▁▁▁▄▃▂▂
avg_val_loss,▄▁▂▂▁▁▁▃▂▂▂▃▂▄▃▃▃▄▄▄▄▅▄▃▃▃▃█▃▄▄▄▃▃▄▃▂▂▂▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16456
avg_train_loss,-0.32249
avg_val_loss,-1.98465
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: fbb043a5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16899
avg_train_loss,-0.22369
avg_val_loss,-2.14238
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: tlhji7o5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (64, 1076, 8)) of distribution Weibull(scale: torch.Size([64, 1076, 8]), shape: torch.Size([64, 1076, 8])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[    nan,  2.0103,  1.0089,  ...,  0.2130,  0.5530, 10.1431],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         ...,
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan]],

        [[    nan,  2.0103,  1.0089,  ...,  0.2130,  0.5530, 10.1431],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         ...,
         [    nan,     nan,     nan,  ...,     nan,     nan,     nan],
         [    nan,     nan,  

0,1
avg_train_loss,█▄▄▃▃▃▃▂▃▂▂▂▂▁▁▁▁▁█▇▇▇▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆
avg_val_loss,▇▆▃▃▂▂▂▁▁▁▁▁▁▂▃▃▄▄█▆█▆▆▅▆▆▇▆▆▆▆▆▆▆▅▆▅▅▆▅
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.05249
avg_val_loss,-1.95083
epoch,183.0


[34m[1mwandb[0m: Agent Starting Run: qubgdryp with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▂▂▂▁▁▁▁▁▂▁▁▁▂▂▂▁▂▂▂▁▂▂▂▂▂▂▂▂▂▂▁▁▂▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18365
avg_train_loss,-0.23377
avg_val_loss,-2.12461
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 79j3wb2l with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172146…

0,1
avg_test_loss,▁
avg_train_loss,█▆▆▅▅▅▅▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▂▃▂▂▂▂▁▄▂▁
avg_val_loss,▆▃▂▂▂▁▁▁▁▁▁▂▁▂▂▂▂▂▂▂▂▂▃▃▃▄▄▃▄▄▄▃▅▇▆▆▇▄▇█
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15984
avg_train_loss,-0.45179
avg_val_loss,-1.69988
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 095p60fv with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172355…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▁▁▂▂▂▂▂▂▂▁▂▁▁▂▂▂▂▂▁▁▂▂▂▂▁▁▂▁▂▂▂▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.19959
avg_train_loss,-0.23144
avg_val_loss,-2.10845
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: kv0dxdea with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▂▂▂▂▂▁▁▁▁▁▂▁▂▁▂▁▂▁▁▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16667
avg_train_loss,-0.24624
avg_val_loss,-2.13195
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 78bqifgn with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17431
avg_train_loss,-0.16172
avg_val_loss,-2.07679
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 8laj85vp with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (128, 3025, 32)) of distribution Weibull(scale: torch.Size([128, 3025, 32]), shape: torch.Size([128, 3025, 32])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[1.2053, 0.6255, 1.0528,  ..., 0.4612, 1.0553, 0.8271],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[1.2053, 0.6255, 1.0528,  ..., 0.4612, 1.0553, 0.8271],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan, 

0,1
avg_train_loss,█▄▄▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▁▂▁▁▁▁▂▂
avg_val_loss,█▄▂▂▃▁▁▁▂▁▁▁▁▁▂▂▁▂▂▂▂▂▂▂▂▂▂▂▃▃▃▂▃▃▃▃▄▃▂▂
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.3225
avg_val_loss,-2.00322
epoch,194.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: tleuhtl9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▇▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▇▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.1968
avg_train_loss,-0.10279
avg_val_loss,-1.98592
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: y4zr1lry with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.179363…

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▅▅▅▅▅▅▅▄▄▄▄▄▄▃▃▄▄▄▄▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁
avg_val_loss,█▃▃▁▁▁▁▁▁▁▁▁▂▁▂▂▂▂▂▂▂▂▃▂▃▃▃▃▄▄▄▄▄▅▅▅▆▅▆▇
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17083
avg_train_loss,-0.77733
avg_val_loss,-1.34895
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 8ljsr1i6 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▂▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▁▂▁▁▂▁▂▁▁▂▁▁▂▁▁▂▂▁▁▃▂▂▂▂▂▂▂▂▂▃▂▂▃▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15918
avg_train_loss,-0.28935
avg_val_loss,-2.10543
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 6z6yxgkt with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.179460…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.20532
avg_train_loss,-0.16355
avg_val_loss,-1.94751
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kbha8cdg with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.008 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.377975…

0,1
avg_test_loss,▁
avg_train_loss,█▇▆▅▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▇▅▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18912
avg_train_loss,-0.18102
avg_val_loss,-2.01225
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 03i16r9n with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16818
avg_train_loss,-0.19845
avg_val_loss,-2.13334
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: g8v1856w with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▆▅▅▅▅▄▄▄▃▃▃▂▃▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▂▂▂▁▁▂▂▂▁▁
avg_val_loss,▄▂▃▁▁▁▂▂▂▄▃▃▄▅▄▄▄▄▅▅▅▅▆▅█▅▆▆▅▆▅▄▅▅▅▆▆▇▇▆
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16016
avg_train_loss,-0.43865
avg_val_loss,-1.91404
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 7d0iemrk with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.015 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.743100…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15695
avg_train_loss,-0.24592
avg_val_loss,-2.16233
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: tuicuu1f with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 676, 64)) of distribution Weibull(scale: torch.Size([16, 676, 64]), shape: torch.Size([16, 676, 64])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.3663, 0.5719, 2.5202,  ..., 0.5385, 0.7803, 0.7521],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.3663, 0.5719, 2.5202,  ..., 0.5385, 0.7803, 0.7521],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan

0,1
avg_train_loss,█▅▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▃▂▃▂▃▃▃▃▃▃▃▃▃▃▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.22607
avg_val_loss,-1.97313
epoch,150.0


[34m[1mwandb[0m: Agent Starting Run: ys0oifjy with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15779
avg_train_loss,-0.23043
avg_val_loss,-2.16178
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 3sdn0q91 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16211
avg_train_loss,-0.18668
avg_val_loss,-2.12446
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 790azak6 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▂▁▁▁▁
avg_val_loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▂▂▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▄▄▄▄▄▄▄
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15779
avg_train_loss,-0.34414
avg_val_loss,-2.02
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 4gon7jnk with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16441
avg_train_loss,-0.23753
avg_val_loss,-2.15131
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 9pykwnu5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.1792
avg_train_loss,-0.14047
avg_val_loss,-2.075
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: sufi98it with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16295
avg_train_loss,-0.25387
avg_val_loss,-2.15966
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: sopscrun with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▇▅▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▆▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.20321
avg_train_loss,-0.12162
avg_val_loss,-1.9469
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 9hg4zsf3 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17971
avg_train_loss,-0.2066
avg_val_loss,-2.05233
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 2y05fl3k with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.015 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.743021…

0,1
avg_test_loss,▁
avg_train_loss,█▇▆▆▆▆▆▆▅▅▅▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁
avg_val_loss,▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇██
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15936
avg_train_loss,-0.66167
avg_val_loss,-1.02563
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: mfu2nti2 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▅▄▄▄▄▄▃▃▃▂▂▂▂▂▁▂▂▄▂▅▄▄▃▃▂▂▂▂▂▂▁▁▁▁▁▂
avg_val_loss,▇▅▂▁▁▁▃▁▃▂▃▃▃▃▄▄▅▅▅▇▅▇▇▄▃▃▄▄▅▆▆▆▆▇██▇▆▇▇
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17532
avg_train_loss,-0.34518
avg_val_loss,-1.96587
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: uicnlfu7 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▇▇▆▆▆▆▆▅▅▅▅▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▂▇▆▆▅
avg_val_loss,▃▁▁▁▁▁▁▁▂▂▂▂▃▃▄▄▄▅▅▅▆▆▆▆▆▇▇▇█▇█▇█▇█▆▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17343
avg_train_loss,-0.33531
avg_val_loss,-1.96694
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: mzsplxb0 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15793
avg_train_loss,-0.28971
avg_val_loss,-2.12359
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: kb73o11g with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁
avg_val_loss,█▃▂▃▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▃▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16127
avg_train_loss,-0.21982
avg_val_loss,-2.06989
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: hq8gkxav with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16265
avg_train_loss,-0.20244
avg_val_loss,-2.15038
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: rrklre4w with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▇▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▇▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.19844
avg_train_loss,-0.10247
avg_val_loss,-1.97617
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: c3y9zd2v with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16136
avg_train_loss,-0.26134
avg_val_loss,-2.09456
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: f8r6nj50 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18474
avg_train_loss,-0.20803
avg_val_loss,-2.05735
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 9n5xiapu with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 1539, 32)) of distribution Weibull(scale: torch.Size([16, 1539, 32]), shape: torch.Size([16, 1539, 32])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.4950, 0.6969, 0.7239,  ..., 1.0732, 0.5511, 0.4937],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.4950, 0.6969, 0.7239,  ..., 1.0732, 0.5511, 0.4937],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

0,1
avg_train_loss,█▂▂▁▁▁▁▁▁▁▁▁▁▂▄
avg_val_loss,▅▅▄▂▂▂▂▁▁▁▁▁▂▂█
epoch,▁▁▂▃▃▃▄▅▅▅▆▇▇▇█

0,1
avg_test_loss,
avg_train_loss,0.17381
avg_val_loss,-1.22137
epoch,14.0


[34m[1mwandb[0m: Agent Starting Run: 631tmcqp with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▇▅▅▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▇▅▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.20032
avg_train_loss,-0.09329
avg_val_loss,-1.96438
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: wqiysxu7 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▅▄▄▄▄▄▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▂▁▃▂▂▁▁▁▁▁▁▂
avg_val_loss,▅▂▁▅▁▃▂▂▅▃▄▄▅▄▄▃▄▄▅▄▄▅▄▅▃▄▄▄▄▅▃▃▃▄▄▆▄▅█▄
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15959
avg_train_loss,-0.30748
avg_val_loss,-2.0762
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: wgrurn2w with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16669
avg_train_loss,-0.23791
avg_val_loss,-2.0689
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: twbl3sdn with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (32, 1539, 16)) of distribution Weibull(scale: torch.Size([32, 1539, 16]), shape: torch.Size([32, 1539, 16])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.7794, 0.8007, 0.6437,  ..., 0.9459, 0.9205, 0.6922],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.7794, 0.8007, 0.6437,  ..., 0.9459, 0.9205, 0.6922],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_train_loss,█▅▂▂▂▂▂▁▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▆▃▂▂▂▂▂▂▃▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▂▁▂
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,0.0643
avg_val_loss,-1.63472
epoch,117.0


[34m[1mwandb[0m: Agent Starting Run: rh5mmupr with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.182
avg_train_loss,-0.17409
avg_val_loss,-2.04061
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: oiy1ks4p with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17169
avg_train_loss,-0.19821
avg_val_loss,-2.15754
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: utpcxjyn with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.015 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.231776…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16554
avg_train_loss,-0.17616
avg_val_loss,-2.10478
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: y06708ti with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.015 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.231873…

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16747
avg_train_loss,-0.23652
avg_val_loss,-2.1295
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: hnvyea9i with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18205
avg_train_loss,-0.21279
avg_val_loss,-2.07217
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: nr4hwtd2 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▁▁▁▁▁▁▂▁▂▁▁▁▂▁▁▁▁▁▂▁▂▂▁▁▂▂▁▂▂▁▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17562
avg_train_loss,-0.24373
avg_val_loss,-2.10395
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 8e36q3ew with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16486
avg_train_loss,-0.17843
avg_val_loss,-2.12205
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: hvnukdzr with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 676, 128)) of distribution Weibull(scale: torch.Size([8, 676, 128]), shape: torch.Size([8, 676, 128])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.4805, 4.7190, 2.4240,  ..., 5.3375, 0.2755, 1.1754],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.4805, 4.7190, 2.4240,  ..., 5.3375, 0.2755, 1.1754],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan

0,1
avg_train_loss,█▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▂▁▁▂▁▁▁▂▁▂▁▂▂▁▂
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.24056
avg_val_loss,-2.07934
epoch,120.0


[34m[1mwandb[0m: Agent Starting Run: 07w6aa99 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▄▄▄▃▃▃▃▃▂▂▃▂▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▂▁▁▁▂
avg_val_loss,█▂▂▁▂▂▂▃▃▃▄▄▄▄▄▄▄▄▄▅▅▇▅▇▅▆▇▆▅▆▇▆▆▅▆▅▆▅▆▆
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16612
avg_train_loss,-0.37603
avg_val_loss,-1.9304
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 8q6i6lqt with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.1624
avg_train_loss,-0.178
avg_val_loss,-2.12622
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: uq4oznfn with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▆▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16639
avg_train_loss,-0.24569
avg_val_loss,-2.15303
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: kxb0pl6r with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16529
avg_train_loss,-0.21848
avg_val_loss,-2.14983
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: a7lal2iv with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▇▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▇▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17524
avg_train_loss,-0.19763
avg_val_loss,-2.08257
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 664j99hb with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 2872, 8)) of distribution Weibull(scale: torch.Size([16, 2872, 8]), shape: torch.Size([16, 2872, 8])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.7174, 0.6633, 0.7655,  ..., 0.7083, 0.7459, 0.7195],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.7174, 0.6633, 0.7655,  ..., 0.7083, 0.7459, 0.7195],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: 9jd02b08 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172425…

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17522
avg_train_loss,-0.15357
avg_val_loss,-2.08913
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: u2rn4kf7 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172155…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▃▁▃▂▁▁▁▂▂▂▂▂▃▂▃▂▃▃▃▄▄▃▄▄▄▄▄▄▃▄▄▄▄▄▄▄▄▃▆
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.21233
avg_train_loss,-0.20557
avg_val_loss,-1.59872
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: tkat8j89 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.208
avg_train_loss,-0.19033
avg_val_loss,-1.80541
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: zd9d4kxa with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 676, 128)) of distribution Weibull(scale: torch.Size([8, 676, 128]), shape: torch.Size([8, 676, 128])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.7452, 1.0888, 1.6148,  ..., 1.1200, 1.3023, 1.0452],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.7452, 1.0888, 1.6148,  ..., 1.1200, 1.3023, 1.0452],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan

0,1
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁
avg_val_loss,█▆▅▂▂▃▁▁▁▂▂▁▃▃▂▃▂▂▄▄▃▂▃▁▂▂▁▂▂▁▂▁▁▂▂▂▁▂▂▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.25914
avg_val_loss,-2.14851
epoch,70.0


[34m[1mwandb[0m: Agent Starting Run: frb8p0w3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15995
avg_train_loss,-0.24532
avg_val_loss,-2.16991
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: a36xgcf9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.01


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112394644361404, max=1.0…

  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16619
avg_train_loss,-0.18929
avg_val_loss,-2.12392
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: y39dml1u with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172278…

0,1
avg_test_loss,▁
avg_train_loss,█▄▄▃▄▅▄▄▃▃▃▃▃▂▂▂▂▂▃▃▃▂▃▂▃▃▂▂▃▂▂▂▂▂▁▁▁▁▂▁
avg_val_loss,▇▃▆▂▆█▃▆▁▁▂▁▁▂▂▂▂▃▄▂▄▅▅▂▆▅▃▃▆▄▃▂▃▄▃▃▂▅▅▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15929
avg_train_loss,-0.25873
avg_val_loss,-2.1066
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: q2qnmot9 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 1539, 64)) of distribution Weibull(scale: torch.Size([16, 1539, 64]), shape: torch.Size([16, 1539, 64])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.7010, 1.3578, 2.0406,  ..., 0.0631, 0.5274, 1.0015],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.7010, 1.3578, 2.0406,  ..., 0.0631, 0.5274, 1.0015],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

0,1
avg_train_loss,▄▃▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▅▆▅▅▅▅
avg_val_loss,▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▄▅▄▆▄▄
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.02731
avg_val_loss,-1.75951
epoch,151.0


[34m[1mwandb[0m: Agent Starting Run: mxsbljyw with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▃
avg_val_loss,█▅▃▂▂▂▂▁▁▁▁▁▂▂▂▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▂▂▂▂▂▂▂▃▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16169
avg_train_loss,-0.21327
avg_val_loss,-2.12033
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: n18qc51l with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16921
avg_train_loss,-0.18027
avg_val_loss,-2.12376
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: b3ubg2l1 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.179380…

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16736
avg_train_loss,-0.20908
avg_val_loss,-2.12283
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 482xse6c with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16649
avg_train_loss,-0.22675
avg_val_loss,-2.13685
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: b3smnx01 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▆▄▃▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15924
avg_train_loss,-0.26799
avg_val_loss,-2.14683
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: bg4qqxbw with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.020 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁
avg_train_loss,▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▆▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
avg_val_loss,▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▆▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16726
avg_train_loss,-0.06754
avg_val_loss,-1.83326
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: jz3gpirp with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.020 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▅▅▅▅▄▄▄▄▃▃▃▃▃▃▃▂▄▄▃▃▂▂▂▂▃▃▂▂▂▂▂▁▁▁▁▁
avg_val_loss,▇▃▂▂▁▁▁▁▁▂▂▂▃▃▃▃▄▅▅▅▃▃▄▅▅▅▆▆▅▄▆▆▇▇▇▇██▇█
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16241
avg_train_loss,-0.42948
avg_val_loss,-1.85852
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: k2ltc9v6 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16557
avg_train_loss,-0.26717
avg_val_loss,-2.12395
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 49os720v with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16797
avg_train_loss,-0.19689
avg_val_loss,-2.11784
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: fb0ingox with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁
avg_val_loss,█▄▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▂▂▂▂▄▃▄▄▄▄▄▄▅▅▅▅▅▆
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16017
avg_train_loss,-0.48231
avg_val_loss,-1.73987
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: aczhizr3 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15872
avg_train_loss,-0.24394
avg_val_loss,-2.14909
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: i1z7wwqt with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.179452…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16036
avg_train_loss,-0.2352
avg_val_loss,-2.1529
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 18ovn8j6 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172286…

0,1
avg_test_loss,▁
avg_train_loss,█▆▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▆▄▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18436
avg_train_loss,-0.12882
avg_val_loss,-2.03415
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: nnc66t3m with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172194…

0,1
avg_test_loss,▁
avg_train_loss,█▇▇▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▂▁▂▁▁▁▁▁▁
avg_val_loss,▃▁▁▁▂▃▄▅▅▅▆▆▅▆▆▇▆▇▇▇▇█▇▇▆▇▇▇▇▇▇▇▇▇███▇█▇
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17412
avg_train_loss,-0.8602
avg_val_loss,-1.13484
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: d9tuk4e9 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.020 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15639
avg_train_loss,-0.27273
avg_val_loss,-2.11087
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 0lgp1cnj with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▇▇▇▇▆▆▆▅▅▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▁▂▁▂▂▂▁▁▁▁▁▁▁▁
avg_val_loss,▃▂▁▁▂▁▁▂▂▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇█▇▇▇█
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16819
avg_train_loss,-0.84833
avg_val_loss,-1.14007
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 2fosqn3n with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.179434…

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▄▄▃▃▂▂▂▁▁▁▆▇▇▆▆▆▅▅▅▅▅▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃
avg_val_loss,▄▂▁▁▁▁▁▁▃▂▃▂▃█▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▁▁▂▂▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18228
avg_train_loss,-0.27388
avg_val_loss,-2.12852
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: sarmwjnr with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.180100…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.20594
avg_train_loss,-0.18511
avg_val_loss,-1.94422
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: heh7uhf4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▂▂▂▁▂▁▁▂▁▁▁▁▁▂▁▁▁▁▁▁▂▁▁▁▁▁▂▁▁▁▂▂▁▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17038
avg_train_loss,-0.21456
avg_val_loss,-2.10737
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: kk0ho96k with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16208
avg_train_loss,-0.28717
avg_val_loss,-2.1251
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 91pul1c6 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▅▅▅▅▅▅▄▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂▂▃▄▃▄▄▄▄▅▅▆▆▆▆▇▇▇▇▇
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16676
avg_train_loss,-0.51804
avg_val_loss,-1.55098
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 3hxmh5md with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.02


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112391500015898, max=1.0…

  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16602
avg_train_loss,-0.29826
avg_val_loss,-2.09368
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: hnnmfstw with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16192
avg_train_loss,-0.23467
avg_val_loss,-2.13863
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: r06vphjr with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▃▃▂▁▃▂▂▂▂▂▅▃▃▃▂▂▂▂▁▁▂▂▁▁▂▁▁▃▂▂▂▂
avg_val_loss,█▃▁▁▁▂▁▂▂▂▂▂▁▁▁▃▂▄▂▁▁▁▂▁▂▂▁▂▂▂▁▂▂▂▂▂▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.1788
avg_train_loss,-0.23876
avg_val_loss,-2.12445
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: frccxwmc with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.183021…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16223
avg_train_loss,-0.19983
avg_val_loss,-2.13448
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 45c0irwt with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16702
avg_train_loss,-0.21881
avg_val_loss,-2.1022
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: xkiefooi with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16354
avg_train_loss,-0.21903
avg_val_loss,-2.13573
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: x8bwz7m3 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16388
avg_train_loss,-0.27063
avg_val_loss,-2.08682
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: qy0zi289 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▄▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▂▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▁▂▁▂▁▁▁▁▃▃▂▂▂▂▄▃▂▃▃▄▃▂▄▃▄▃▅▄▄▄▄▄▄▃▅▄
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16593
avg_train_loss,-0.36823
avg_val_loss,-2.01094
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 188h0g9p with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▃▃▃▃▃▃▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▂▂▁▂▂▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▁▂▁▁▂▁▃▁▃▁▃▁▃▁▂▂▁▁▁▄▁▂▁▁▂▂▂▃▁▃▂▁▃▂▂▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16849
avg_train_loss,-0.27482
avg_val_loss,-2.06658
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: s28kgqdm with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18969
avg_train_loss,-0.18495
avg_val_loss,-1.99301
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9btb1rh9 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.015 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.754012…

0,1
avg_test_loss,▁
avg_train_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16335
avg_train_loss,-0.2237
avg_val_loss,-2.1228
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: woi5qafo with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂
avg_val_loss,█▄▃▄▃▁▁▂▁▂▁▁▁▁▂▂▂▁▂▂▂▁▁▃▂▂▃▂▂▂▂▂▂▃▂▂▃▂▂▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16697
avg_train_loss,-0.20143
avg_val_loss,-2.12787
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: nfn9s3g6 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁
avg_val_loss,█▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17149
avg_train_loss,-0.26003
avg_val_loss,-2.1021
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: vgc03ov3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16451
avg_train_loss,-0.20283
avg_val_loss,-2.12647
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: cdbjid2n with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16277
avg_train_loss,-0.24007
avg_val_loss,-2.12926
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: llwhaahh with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16479
avg_train_loss,-0.22688
avg_val_loss,-2.15389
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: chsypugj with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.15951
avg_train_loss,-0.29094
avg_val_loss,-2.08263
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: oc8hxnav with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18947
avg_train_loss,-0.15357
avg_val_loss,-2.03265
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 35jkbxpt with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.015 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.228335…

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▅▅▅▄▄▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▂▂▃▁▁▁▁▁
avg_val_loss,█▄▄▂▁▁▁▁▁▁▁▂▂▃▂▃▃▄▄▄▅▅▅▅▆▅▅▇▇▇▅▇▇▆▅▇▇██▇
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16874
avg_train_loss,-0.4071
avg_val_loss,-1.87766
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: uu0klgm8 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18837
avg_train_loss,-0.21082
avg_val_loss,-2.09209
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 44xbrzh4 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18983
avg_train_loss,-0.15859
avg_val_loss,-2.01283
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: q6tmwrlf with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,██▇▇▆▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,██▇▆▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.21988
avg_train_loss,-0.10619
avg_val_loss,-1.88785
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: g340y0qw with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.015 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.742961…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16296
avg_train_loss,-0.29653
avg_val_loss,-2.0613
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: o5otozks with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▂▁▂▁▁▁▂▂▂▂▂▂▂▃
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16072
avg_train_loss,-0.22759
avg_val_loss,-2.05514
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 78vuw3qh with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17534
avg_train_loss,-0.19658
avg_val_loss,-2.09856
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 7y9vw5ft with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (24, 587, 64)) of distribution Weibull(scale: torch.Size([24, 587, 64]), shape: torch.Size([24, 587, 64])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.5533, 0.4975, 0.5320,  ..., 0.8352, 0.7125, 0.5985],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.5533, 0.4975, 0.5320,  ..., 0.8352, 0.7125, 0.5985],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan

0,1
avg_train_loss,█▆▅▅▅▅▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁
avg_val_loss,█▃▄▂▁▁▁▁▁▁▁▂▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▅▆▆▆
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.46951
avg_val_loss,-1.73411
epoch,192.0


[34m[1mwandb[0m: Agent Starting Run: mo9cbgba with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17829
avg_train_loss,-0.19057
avg_val_loss,-2.08542
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: jnafus2u with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▆▆▅▅▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁
avg_val_loss,▇▂▂▁▁▁▁▁▂▁▁▂▂▂▃▃▃▃▃▄▄▄▇▄▆▅▇█▇▅▅▆█▇▇▆▇█▇▇
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16929
avg_train_loss,-0.60122
avg_val_loss,-1.49896
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: ugozypet with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17639
avg_train_loss,-0.15092
avg_val_loss,-2.07904
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: dmehu3t8 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.179754…

0,1
avg_test_loss,▁
avg_train_loss,█▄▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16093
avg_train_loss,-0.26424
avg_val_loss,-2.1337
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 005zs8sa with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16124
avg_train_loss,-0.19289
avg_val_loss,-2.14903
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: clugd8dq with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▅▅▄▄▄▄▃▃▃▃▂▃▂▂▃▂▂▃▂▁▁▂▂▂▁▁▂▁▂▁▁▁▁▁▂▁
avg_val_loss,▆▂▁▂▂▃▂▃▆▃▄▃▇▅▅▄▅█▆▅▅▅▆▅▆▆▇▅▅▇▅▆▆██▇▆▇▇▇
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16896
avg_train_loss,-0.42736
avg_val_loss,-1.93203
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: qr3zqeb1 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▆▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▃▃▂▂▂▂▃▃▂▂▁▁▁▁▁▂▂▁▁
avg_val_loss,▆▃▂▂▂▁▁▁▂▂▃▃▂▃▃▄▄▃▃▃▄▃▄▄▅▅▅▃█▄▇▅▄▅▄▄▆▄▄▄
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16812
avg_train_loss,-0.35658
avg_val_loss,-2.06013
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: vs0ylwn4 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17148
avg_train_loss,-0.17728
avg_val_loss,-2.12153
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: hlkxvgkl with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.1711
avg_train_loss,-0.16655
avg_val_loss,-2.10125
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: wpgft6t3 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16966
avg_train_loss,-0.18282
avg_val_loss,-2.12414
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: jsceu9rq with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172417…

0,1
avg_test_loss,▁
avg_train_loss,█▅▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17217
avg_train_loss,-0.20793
avg_val_loss,-2.1038
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: j9lwnwqa with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▃▃▃▂▂▂▂▃▂▂▂▂▂▂▂▂▂▁▁▂▂▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17453
avg_train_loss,-0.19226
avg_val_loss,-2.1063
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2rq90774 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.99]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▁▂▂▁▁▁▁▁▁▂▁▂▁▁▁▁▁▁▁▂▁▁▂▁▁▁▁▁▁▂▁▂▂▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16308
avg_train_loss,-0.23411
avg_val_loss,-2.1305
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: i5jj95wh with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▅▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▁▃▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▁
avg_val_loss,▆▃▂▂▁▁▁▁▁▂▂▂▂▃▄▃▄▅▅▄▅▆▅▁▁▂▂▂▄▄▅▂▄▅▅▆▇▇▇█
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17131
avg_train_loss,-0.44734
avg_val_loss,-1.75977
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: yn54vae5 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.5
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.172239…

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂▁▂▁▁▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16023
avg_train_loss,-0.27985
avg_val_loss,-2.07747
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: jc72tsyy with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.015 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.743006…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16224
avg_train_loss,-0.25637
avg_val_loss,-2.14172
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: g9s4nvmi with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▆▅▅▅▅▅▅▄▄▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▃▂▂▂▂▂▂▁▃▁▄▂▂▁▁
avg_val_loss,█▄▃▂▂▁▁▁▁▁▁▂▂▂▂▃▃▃▄▃▄▄▅▅▅▃▄▅▅▆▆▆▇▆▆▄▅▅▆▆
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16854
avg_train_loss,-0.41538
avg_val_loss,-1.93924
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: qawvguyo with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.9999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.18142
avg_train_loss,-0.1976
avg_val_loss,-2.03539
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: p5d6tfdg with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.2
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▇▇▇▆▆▅▅▄▄▄▃▃▄▃▃▃▅▃▂▂▂▂▂▂▂▁▂▁▁▂▁▂▁▁▁▂▂▁▁
avg_val_loss,▂▁▁▁▂▂▃▃▄▃▄▅▄▄▄▅▅▄▅▆▆▆▇▇▇▇▇▇▇█▇█▇▇██▇███
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17505
avg_train_loss,-0.67449
avg_val_loss,-1.41926
epoch,199.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ldhofyf5 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▅▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16795
avg_train_loss,-0.22069
avg_val_loss,-2.11821
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: k52glf65 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.004 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁
avg_train_loss,█▅▄▄▃▃▃▃▃▃▃▂▃▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▅▄▄▄▃▃▃▃▃▃▃▃
avg_val_loss,█▄▂▂▂▁▁▁▂▂▁▁▂▂▂▃▂▂▂▂▂▂▂▄▂▂▃▂▃▃▂▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17496
avg_train_loss,-0.24506
avg_val_loss,-2.14474
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 0wy1c6x1 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.01
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.1654
avg_train_loss,-0.18041
avg_val_loss,-2.1233
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: 8j3epb5s with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.1
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16294
avg_train_loss,-0.24659
avg_val_loss,-2.14268
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: o9oz1r2g with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.9999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.05
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁
avg_train_loss,█▇▆▆▆▆▆▆▆▆▅▅▅▅▅▅▄▄▄▅▅▄▄▃▃▃▂▂▂▁▅▅▄▃▃▂▂▂▁▁
avg_val_loss,▇▃▂▂▁▁▁▁▁▁▁▁▁▂▂▃▃▃▄▃▂▃▄▄▅▅▆▆▇█▂▂▃▄▄▆▇▇██
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16532
avg_train_loss,-0.58713
avg_val_loss,-1.75131
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: l05hq69p with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.99, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 1
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.015 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.754045…

0,1
avg_test_loss,▁
avg_train_loss,█▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.16748
avg_train_loss,-0.2517
avg_val_loss,-2.13126
epoch,199.0


[34m[1mwandb[0m: Agent Starting Run: r7iaw1vr with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 2949, 8)) of distribution Weibull(scale: torch.Size([8, 2949, 8]), shape: torch.Size([8, 2949, 8])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.6123, 0.4727, 1.5123,  ..., 0.7554, 1.0979, 0.4845],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.6123, 0.4727, 1.5123,  ..., 0.7554, 1.0979, 0.4845],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,  

0,1
avg_train_loss,█▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▂
avg_val_loss,▆▃▃▃▁▃▂▁▃▂▁▂▂▂▁▁▃▁▁▁▁▂▁▁▁▃▁▂▁▂▁▂▂▁▂▂▃▁▁█
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,
avg_train_loss,-0.21221
avg_val_loss,-1.433
epoch,43.0


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: e5a5v549 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.99]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	tau_mean: 0.02
[34m[1mwandb[0m: 	weight_decay: 0.02


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111861422250513, max=1.0…

  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.015 MB of 0.020 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.742970…

0,1
avg_test_loss,▁
avg_train_loss,█▄▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
avg_val_loss,█▄▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███

0,1
avg_test_loss,0.17205
avg_train_loss,-0.16545
avg_val_loss,-2.1123
epoch,199.0
