In [10]:
%load_ext autoreload
%autoreload 2
import torch
import eq
import wandb
from tqdm.notebook import trange
import numpy as np
import warnings
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [11]:
device = "cuda"
catalog = eq.catalogs.ANSS_MultiCatalog(mag_completeness=4.5)

Loading existing catalog from /home/zekai/repos/recast/data/ANSS_MultiCatalog.


In [12]:
def train(config=None):
    with wandb.init(config=config):
        config = wandb.config
        try:
            dl_train = catalog.train.get_dataloader(batch_size=config.batch_size)
            dl_val = catalog.val.get_dataloader(batch_size=1)
            dl_test = catalog.test.get_dataloader(batch_size=1)

            model = eq.models.RecurrentTPP(context_size=config.context_size,
                                           num_components=config.num_components,
                                           rnn_type=config.rnn_type,
                                           dropout_proba=config.dropout_proba,
                                           learning_rate=config.lr)
            model = model.to(device)

            epochs = 200
            avg_train_loss_list = []
            avg_val_loss_list = []

            optimizer = torch.optim.AdamW(model.parameters(), 
                                          lr=config.lr, 
                                          betas=config.betas, 
                                          weight_decay=config.weight_decay)
            
            best_model_path = "temp_best_model"
            best_val_loss = float('inf')

            for epoch in trange(epochs):
                running_training_loss = []
                model.train()
                for i, data in enumerate(dl_train):
                    data = data.to(device)
                    optimizer.zero_grad()
                    nll = model.nll_loss(data).mean()
                    nll.backward()
                    optimizer.step()
                    running_training_loss.append(nll.item())
                
                model.eval()
                with torch.no_grad():
                    running_val_loss = []
                    for i, data in enumerate(dl_val):
                        data = data.to(device)
                        nll = model.nll_loss(data).mean()
                        running_val_loss.append(nll.item())

                avg_val_loss = np.mean(running_val_loss)

                avg_train_loss_list.append(np.mean(running_training_loss))
                avg_val_loss_list.append(avg_val_loss)

                if avg_val_loss < best_val_loss:
                    best_val_loss = avg_val_loss
                    torch.save(model.state_dict(), best_model_path)

            best_model = eq.models.RecurrentTPP(context_size=config.context_size,
                                                num_components=config.num_components,
                                                rnn_type=config.rnn_type,
                                                dropout_proba=config.dropout_proba,
                                                learning_rate=config.lr)
            best_model.load_state_dict(torch.load(best_model_path))
            best_model = best_model.to(device)
            best_model.eval()
            with torch.no_grad():
                running_test_loss = []
                for i, data in enumerate(dl_test):
                    data = data.to(device)
                    nll = best_model.nll_loss(data).mean()
                    running_test_loss.append(nll.item())
            avg_test_loss = np.mean(running_test_loss)

        except Exception as e:
            print(e)
            avg_test_loss = float("nan")

        wandb.log({"avg_test_loss": avg_test_loss})


In [13]:
import os
os.environ["WANDB_NOTEBOOK_NAME"] = "sweep.ipynb"
wandb.login()



True

In [14]:
sweep_config = {'method': "random"}
metric = {"name": "avg_test_loss",
          "goal": "minimize"}
sweep_config["metric"] = metric

parameter_dict = {"context_size": {'values': [8, 16, 32, 64, 128]},
                  "num_components": {"values": [8, 16, 32, 64, 128]},
                  "rnn_type": {"values": ["RNN", "GRU", "LSTM"]},
                  "dropout_proba": {"values": [0, 0.1, 0.2, 0.3, 0.4, 0.5]},
                  "lr": {"values": [1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2]},
                  "betas": {"value": (0.9, 0.999)},
                  "weight_decay": {"values": [0, 1e-3, 2e-3, 5e-3, 1e-2, 2e-2, 5e-2]},
                  "batch_size": {"values": [8, 16, 32, 64, 128]}}

sweep_config["parameters"] = parameter_dict

In [15]:
sweep_id = wandb.sweep(sweep_config, project="RecurrentTPP on ANSS Nov 5")

Create sweep with ID: 27ca0onv
Sweep URL: https://wandb.ai/zekai-wang/RecurrentTPP%20on%20ANSS%20Nov%205/sweeps/27ca0onv


In [16]:
wandb.agent(sweep_id, function=train, count=100)



<IPython.core.display.HTML object>


wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)
wandb: ERROR Dropped streaming file chunk (see wandb/debug-internal.log)


<IPython.core.display.HTML object>
<IPython.core.display.HTML object>


[34m[1mwandb[0m: Agent Starting Run: tdq51qjk with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

Exception in thread Thread-20 (_run_job):
Traceback (most recent call last):
  File "/home/zekai/miniconda3/envs/eq/lib/python3.11/site-packages/wandb/agents/pyagent.py", line 298, in _run_job
    self._function()
  File "/tmp/ipykernel_15643/1366785928.py", line 2, in train
  File "/home/zekai/miniconda3/envs/eq/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 3120, in __exit__
    self._finish(exit_code=exit_code)
  File "/home/zekai/miniconda3/envs/eq/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 1938, in _finish
    hook.call()
  File "/home/zekai/miniconda3/envs/eq/lib/python3.11/site-packages/wandb/sdk/wandb_init.py", line 464, in _jupyter_teardown
    ipython.display_pub.publish = ipython.display_pub._orig_publish
                                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'ZMQDisplayPublisher' object has no attribute '_orig_publish'

During handling of the above exception, another exception occurred:

Traceback (most recent call la

<IPython.core.display.HTML object>
VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max=1.0)))
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>


[34m[1mwandb[0m: Agent Starting Run: exm3zv6a with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

Exception in thread Exception in thread ChkStopThr:
NetStatThr:
Traceback (most recent call last):
Exception in thread Traceback (most recent call last):
IntMsgThr  File "/home/zekai/miniconda3/envs/eq/lib/python3.11/threading.py", line 1038, in _bootstrap_inner
:
  File "/home/zekai/miniconda3/envs/eq/lib/python3.11/threading.py", line 1038, in _bootstrap_inner
Traceback (most recent call last):
  File "/home/zekai/miniconda3/envs/eq/lib/python3.11/threading.py", line 1038, in _bootstrap_inner
        self.run()self.run()
    
self.run()  File "/home/zekai/miniconda3/envs/eq/lib/python3.11/threading.py", line 975, in run

  File "/home/zekai/miniconda3/envs/eq/lib/python3.11/threading.py", line 975, in run
  File "/home/zekai/miniconda3/envs/eq/lib/python3.11/threading.py", line 975, in run
    self._target(*self._args, **self._kwargs)
          File "/home/zekai/miniconda3/envs/eq/lib/python3.11/site-packages/wandb/sdk/wandb_run.py", line 267, in check_network_status
self._target(*se

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16265


[34m[1mwandb[0m: Agent Starting Run: hqe7mqof with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 462, 32)) of distribution Weibull(scale: torch.Size([8, 462, 32]), shape: torch.Size([8, 462, 32])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[   nan, 0.7629, 0.7606,  ..., 0.7818, 0.8243, 0.8388],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[   nan, 0.7629, 0.7606,  ..., 0.7818, 0.8243, 0.8388],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,  

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: 3f23ig8i with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.18022


[34m[1mwandb[0m: Agent Starting Run: mozsxwej with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.184864…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17367


[34m[1mwandb[0m: Agent Starting Run: u688d0ul with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.014 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.246308…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16217


[34m[1mwandb[0m: Agent Starting Run: j0h7zq2i with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17515


[34m[1mwandb[0m: Agent Starting Run: 21wsc8e7 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 582, 8)) of distribution Weibull(scale: torch.Size([8, 582, 8]), shape: torch.Size([8, 582, 8])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.8988, 3.6720, 0.7561,  ..., 1.1632, 8.1114, 2.0297],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.8988, 3.6720, 0.7561,  ..., 1.1632, 8.1114, 2.0297],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    n

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: sw1p9aoi with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 767, 16)) of distribution Weibull(scale: torch.Size([8, 767, 16]), shape: torch.Size([8, 767, 16])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.6533, 0.7147, 0.8922,  ..., 0.6525, 0.9216, 0.9456],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.6533, 0.7147, 0.8922,  ..., 0.6525, 0.9216, 0.9456],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,  

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: e5tlwvam with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.014 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.757529…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.18658


[34m[1mwandb[0m: Agent Starting Run: 3d69ednq with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.18031


[34m[1mwandb[0m: Agent Starting Run: j9ifcthf with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16392


[34m[1mwandb[0m: Agent Starting Run: y9wv6zrt with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.22648


[34m[1mwandb[0m: Agent Starting Run: 257wzujr with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17032


[34m[1mwandb[0m: Agent Starting Run: 7i8chsrd with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.184621…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17476


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: q18o6yyc with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16572


[34m[1mwandb[0m: Agent Starting Run: e8dapfta with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17972


[34m[1mwandb[0m: Agent Starting Run: e2f7bwjk with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.192881…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.18373


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: wbmunmus with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.004 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.192978…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.1621


[34m[1mwandb[0m: Agent Starting Run: 8g9gzltt with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.014 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.243329…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16696


[34m[1mwandb[0m: Agent Starting Run: e0qxo8kd with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16191


[34m[1mwandb[0m: Agent Starting Run: magsdlci with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 777, 16)) of distribution Weibull(scale: torch.Size([16, 777, 16]), shape: torch.Size([16, 777, 16])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.4928, 0.7050, 0.7236,  ..., 0.5647, 0.5160, 0.6274],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.4928, 0.7050, 0.7236,  ..., 0.5647, 0.5160, 0.6274],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: q33obwz6 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17879


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 285wxp5u with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.19858


[34m[1mwandb[0m: Agent Starting Run: a63fxrk7 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16507


[34m[1mwandb[0m: Agent Starting Run: p0z13sye with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 909, 64)) of distribution Weibull(scale: torch.Size([8, 909, 64]), shape: torch.Size([8, 909, 64])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.5295, 0.5086, 0.6147,  ..., 0.7090, 0.8108, 0.6590],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.5295, 0.5086, 0.6147,  ..., 0.7090, 0.8108, 0.6590],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,  

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: 1f6s6pyl with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 2935, 128)) of distribution Weibull(scale: torch.Size([16, 2935, 128]), shape: torch.Size([16, 2935, 128])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[1.3437, 0.5140, 0.4770,  ..., 0.0596, 0.0463, 1.0146],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[1.3437, 0.5140, 0.4770,  ..., 0.0596, 0.0463, 1.0146],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan, 

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: 978kaz97 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.1928


[34m[1mwandb[0m: Agent Starting Run: ax13ohnn with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 1076, 32)) of distribution Weibull(scale: torch.Size([16, 1076, 32]), shape: torch.Size([16, 1076, 32])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[5.7141, 1.5932, 2.4905,  ..., 0.6599, 0.7428, 1.3132],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[5.7141, 1.5932, 2.4905,  ..., 0.6599, 0.7428, 1.3132],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: da1t41ed with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.192629…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17188


[34m[1mwandb[0m: Agent Starting Run: xd43yx3g with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.019 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16582


[34m[1mwandb[0m: Agent Starting Run: hsyf2nip with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.18377


[34m[1mwandb[0m: Agent Starting Run: y47dxakf with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.20111


[34m[1mwandb[0m: Agent Starting Run: qqvuf2fq with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.014 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.750453…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17617


[34m[1mwandb[0m: Agent Starting Run: htvq44lb with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 3025, 64)) of distribution Weibull(scale: torch.Size([16, 3025, 64]), shape: torch.Size([16, 3025, 64])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.9346, 0.9200, 0.7018,  ..., 0.5013, 0.9179, 0.1993],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.9346, 0.9200, 0.7018,  ..., 0.5013, 0.9179, 0.1993],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: 4cwbpw6e with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (32, 1539, 16)) of distribution Weibull(scale: torch.Size([32, 1539, 16]), shape: torch.Size([32, 1539, 16])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.7473, 0.7565, 0.5851,  ..., 0.6824, 0.9054, 0.7894],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.7473, 0.7565, 0.5851,  ..., 0.6824, 0.9054, 0.7894],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

0,1
avg_test_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: h8c9yvez with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.05


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111233204444741, max=1.0)…

  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (24, 587, 8)) of distribution Weibull(scale: torch.Size([24, 587, 8]), shape: torch.Size([24, 587, 8])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[   nan, 0.6734, 0.7205,  ..., 0.6174, 0.6912, 0.3787],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[   nan, 0.6734, 0.7205,  ..., 0.6174, 0.6912, 0.3787],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,  

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: qtvne1dj with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16446


[34m[1mwandb[0m: Agent Starting Run: tay9ad36 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16904


[34m[1mwandb[0m: Agent Starting Run: fdhl65z7 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.192756…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.1782


[34m[1mwandb[0m: Agent Starting Run: sb3drlqw with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 1539, 64)) of distribution Weibull(scale: torch.Size([16, 1539, 64]), shape: torch.Size([16, 1539, 64])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[1.1582, 2.9449, 0.2962,  ..., 0.4086, 0.4235, 0.1571],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[1.1582, 2.9449, 0.2962,  ..., 0.4086, 0.4235, 0.1571],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: wfb0gnkh with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17601


[34m[1mwandb[0m: Agent Starting Run: cmibrula with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.15735


[34m[1mwandb[0m: Agent Starting Run: xbnoikwn with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (64, 2935, 64)) of distribution Weibull(scale: torch.Size([64, 2935, 64]), shape: torch.Size([64, 2935, 64])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.7034, 0.8726, 0.8387,  ..., 0.7350, 0.6769, 0.8187],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.7034, 0.8726, 0.8387,  ..., 0.7350, 0.6769, 0.8187],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: euw5br61 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17486


[34m[1mwandb[0m: Agent Starting Run: clhwfk4h with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16567


[34m[1mwandb[0m: Agent Starting Run: smkr3n9o with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16302


[34m[1mwandb[0m: Agent Starting Run: ojyvgp68 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.21251


[34m[1mwandb[0m: Agent Starting Run: fym91ruh with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16782


[34m[1mwandb[0m: Agent Starting Run: o6f2rhjm with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.1722


[34m[1mwandb[0m: Agent Starting Run: ksnhj8wm with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17094


[34m[1mwandb[0m: Agent Starting Run: e2bu8q1b with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 2935, 32)) of distribution Weibull(scale: torch.Size([16, 2935, 32]), shape: torch.Size([16, 2935, 32])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.8626, 0.9293, 0.8911,  ..., 1.3364, 1.3425, 2.0519],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.8626, 0.9293, 0.8911,  ..., 1.3364, 1.3425, 2.0519],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

0,1
avg_test_loss,


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: bwkunbmj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16801


[34m[1mwandb[0m: Agent Starting Run: 01jldty5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.21786


[34m[1mwandb[0m: Agent Starting Run: d30arbq6 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17425


[34m[1mwandb[0m: Agent Starting Run: cnbop7dm with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 643, 128)) of distribution Weibull(scale: torch.Size([8, 643, 128]), shape: torch.Size([8, 643, 128])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.8074, 0.5551, 0.4743,  ..., 0.7552, 0.6703, 1.6492],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.8074, 0.5551, 0.4743,  ..., 0.7552, 0.6703, 1.6492],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: ezuae5jm with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16574


[34m[1mwandb[0m: Agent Starting Run: yj0x8u64 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16338


[34m[1mwandb[0m: Agent Starting Run: iq68nn2z with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.20307


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 4i6g08fg with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16533


[34m[1mwandb[0m: Agent Starting Run: k8d6jy9q with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.20021


[34m[1mwandb[0m: Agent Starting Run: o8433rik with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.014 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.750554…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.18612


[34m[1mwandb[0m: Agent Starting Run: xdhh77fu with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16902


[34m[1mwandb[0m: Agent Starting Run: w10itsx7 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.019 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.20303


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ssem0b9q with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.20087


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: qljupd3g with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 909, 64)) of distribution Weibull(scale: torch.Size([8, 909, 64]), shape: torch.Size([8, 909, 64])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[1.2155, 0.7746, 1.8103,  ..., 2.1707, 1.5183, 0.7188],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[1.2155, 0.7746, 1.8103,  ..., 2.1707, 1.5183, 0.7188],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,  

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: od2ihz98 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.01


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112390255584614, max=1.0…

  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 498, 16)) of distribution Weibull(scale: torch.Size([8, 498, 16]), shape: torch.Size([8, 498, 16])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.8190, 0.6556, 0.5395,  ..., 0.5743, 0.5324, 0.8049],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.8190, 0.6556, 0.5395,  ..., 0.5743, 0.5324, 0.8049],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,  

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: 9o25i4hs with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.014 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.757595…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16853


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: gpnr89i9 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (64, 3025, 32)) of distribution Weibull(scale: torch.Size([64, 3025, 32]), shape: torch.Size([64, 3025, 32])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.7264, 0.1865, 0.9606,  ..., 1.1829, 1.0140, 0.7858],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.7264, 0.1865, 0.9606,  ..., 1.1829, 1.0140, 0.7858],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: scb9rhnz with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.1708


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: jtnm76hf with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.014 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.757711…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.22122


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: vvp0uoqx with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 851, 16)) of distribution Weibull(scale: torch.Size([16, 851, 16]), shape: torch.Size([16, 851, 16])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[1.0230, 0.9667, 0.9398,  ..., 1.9668, 6.0034, 0.4380],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[1.0230, 0.9667, 0.9398,  ..., 1.9668, 6.0034, 0.4380],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: sh7j8125 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 1539, 32)) of distribution Weibull(scale: torch.Size([16, 1539, 32]), shape: torch.Size([16, 1539, 32])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.7011, 0.9131, 0.5155,  ..., 0.5511, 0.3479, 0.8231],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.7011, 0.9131, 0.5155,  ..., 0.5511, 0.3479, 0.8231],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: b6gmi060 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17552


[34m[1mwandb[0m: Agent Starting Run: fvqsappg with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 498, 16)) of distribution Weibull(scale: torch.Size([8, 498, 16]), shape: torch.Size([8, 498, 16])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.6898, 0.8705, 0.6010,  ..., 0.4162, 0.6519, 1.2935],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.6898, 0.8705, 0.6010,  ..., 0.4162, 0.6519, 1.2935],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,  

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: iqee9up9 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16555


[34m[1mwandb[0m: Agent Starting Run: dkb3ha3m with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17043


[34m[1mwandb[0m: Agent Starting Run: tkb8jh8x with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.184703…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16199


[34m[1mwandb[0m: Agent Starting Run: g4sl7ptn with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16567


[34m[1mwandb[0m: Agent Starting Run: j1vrthf5 with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.018 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.192279…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.17714


[34m[1mwandb[0m: Agent Starting Run: bllonq5z with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.01


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16883


[34m[1mwandb[0m: Agent Starting Run: 784mp7dk with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16852


[34m[1mwandb[0m: Agent Starting Run: jt9uih09 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.019 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.184530…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16786


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: d4qh81br with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (32, 828, 64)) of distribution Weibull(scale: torch.Size([32, 828, 64]), shape: torch.Size([32, 828, 64])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[2.5099, 7.9285, 2.3087,  ..., 2.2299, 1.6062, 2.3115],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[2.5099, 7.9285, 2.3087,  ..., 2.2299, 1.6062, 2.3115],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: 7ngkp9iu with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.21478


[34m[1mwandb[0m: Agent Starting Run: ljjgo0sr with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (16, 1076, 16)) of distribution Weibull(scale: torch.Size([16, 1076, 16]), shape: torch.Size([16, 1076, 16])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.8850, 0.7245, 1.1577,  ..., 0.7717, 0.3603, 0.6934],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.8850, 0.7245, 1.1577,  ..., 0.7717, 0.3603, 0.6934],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: wymuseci with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.001
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.003 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.1784


[34m[1mwandb[0m: Agent Starting Run: 7w0s7aqd with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (32, 2935, 128)) of distribution Weibull(scale: torch.Size([32, 2935, 128]), shape: torch.Size([32, 2935, 128])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.7005, 2.0533, 0.3462,  ..., 1.6893, 2.1679, 0.4387],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.7005, 2.0533, 0.3462,  ..., 1.6893, 2.1679, 0.4387],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan, 

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: qagt2a0r with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.2
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.1675


[34m[1mwandb[0m: Agent Starting Run: 05a2wrb0 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 32
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16737


[34m[1mwandb[0m: Agent Starting Run: ulynxvxk with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 16
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.001


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.21121


[34m[1mwandb[0m: Agent Starting Run: qai6jklj with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.005


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.18282


[34m[1mwandb[0m: Agent Starting Run: fu5mjuao with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.002
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.05


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.16446


[34m[1mwandb[0m: Agent Starting Run: e9xa1txo with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 32
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.02
[34m[1mwandb[0m: 	num_components: 128
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 498, 128)) of distribution Weibull(scale: torch.Size([8, 498, 128]), shape: torch.Size([8, 498, 128])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[1.0279, 0.5652, 0.6934,  ..., 0.4409, 0.2124, 1.2281],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[1.0279, 0.5652, 0.6934,  ..., 0.4409, 0.2124, 1.2281],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: 1bb0sd4b with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.18028


[34m[1mwandb[0m: Agent Starting Run: f66fkv4r with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 128
[34m[1mwandb[0m: 	dropout_proba: 0.4
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: GRU
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.014 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.243631…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.24103


[34m[1mwandb[0m: Agent Starting Run: azxhywq8 with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.3
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0.02


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.20769


[34m[1mwandb[0m: Agent Starting Run: 2xg0c8ml with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0.1
[34m[1mwandb[0m: 	lr: 0.05
[34m[1mwandb[0m: 	num_components: 16
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.177


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: cy4hg6ln with config:
[34m[1mwandb[0m: 	batch_size: 8
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 64
[34m[1mwandb[0m: 	dropout_proba: 0
[34m[1mwandb[0m: 	lr: 0.01
[34m[1mwandb[0m: 	num_components: 64
[34m[1mwandb[0m: 	rnn_type: RNN
[34m[1mwandb[0m: 	weight_decay: 0


  0%|          | 0/200 [00:00<?, ?it/s]

Expected parameter scale (Tensor of shape (8, 857, 64)) of distribution Weibull(scale: torch.Size([8, 857, 64]), shape: torch.Size([8, 857, 64])) to satisfy the constraint GreaterThan(lower_bound=0.0), but found invalid values:
tensor([[[0.9195, 0.6618, 0.8896,  ..., 0.3535, 0.2973, 0.6411],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan]],

        [[0.9195, 0.6618, 0.8896,  ..., 0.3535, 0.2973, 0.6411],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         ...,
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,    nan,  ...,    nan,    nan,    nan],
         [   nan,    nan,  

0,1
avg_test_loss,


[34m[1mwandb[0m: Agent Starting Run: 7882uf3t with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	betas: [0.9, 0.999]
[34m[1mwandb[0m: 	context_size: 8
[34m[1mwandb[0m: 	dropout_proba: 0.5
[34m[1mwandb[0m: 	lr: 0.005
[34m[1mwandb[0m: 	num_components: 8
[34m[1mwandb[0m: 	rnn_type: LSTM
[34m[1mwandb[0m: 	weight_decay: 0.002


  0%|          | 0/200 [00:00<?, ?it/s]

VBox(children=(Label(value='0.003 MB of 0.014 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.243681…

0,1
avg_test_loss,▁

0,1
avg_test_loss,0.19077
