In [1]:
cd /home/tvangraft/tudelft/thesis/metaengineering

/home/tvangraft/tudelft/thesis/metaengineering


In [5]:
from collections import defaultdict
from typing import DefaultDict, List, Hashable, Dict, Any

from src.utils.utils import get_generator, get_project_root
from src.utils.test_result_store import TestResultStore

from src.pipeline.config import DataLoaderConfig, TaskLoaderConfig
from src.pipeline.taskloader import TaskLoader, TaskFrame
from src.pipeline.dataloader import DataLoader

from src.orchestrator.trainer import Trainer

from src.settings.tier import Tier
from src.settings.strategy import Strategy
from src.settings.metabolites import ENZYMES, METABOLITES, PRECURSOR_METABOLITES, PRECURSOR_METABOLITES_NO_TRANSFORM

from src.gnn.data_augmentation import DataAugmentation
from src.gnn.embeddings import generate_embedding
from src.gnn.graph_builder import get_samples_hetero_graph, get_graph_fc

import pandas as pd
import numpy as np

import cobra
from cobra.util import create_stoichiometric_matrix
from cobra.core import Reaction

import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

from more_itertools import flatten

from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics import mean_absolute_error

from scipy.stats import pearsonr

In [3]:
path = "/home/tvangraft/tudelft/thesis/metaengineering/data"
model = cobra.io.read_sbml_model(f'{path}/iMM904.xml')

Scaling...
 A: min|aij| =  1.000e+00  max|aij| =  1.000e+00  ratio =  1.000e+00
Problem data seem to be well scaled


In [6]:
edge_list_df_unfiltered = pd.read_csv('./data/training/edge_list_unfiltered.csv')
graph_fc_unfiltered = get_graph_fc(edge_list_df_unfiltered, PRECURSOR_METABOLITES_NO_TRANSFORM)

edge_list_df_strict = pd.read_csv('./data/training/edge_list_strict.csv')
graph_fc_strict = get_graph_fc(edge_list_df_strict, PRECURSOR_METABOLITES)

edge_list_df_all = pd.read_csv('./data/training/edge_list_all.csv')
graph_fc_all = get_graph_fc(edge_list_df_all, METABOLITES)

KeyError: '2pg'

In [15]:
graph_fc_df = get_graph_fc_df(edge_list_df_unfiltered, METABOLITES)
graph_fc_df.std()

YBL015W    1.005305
YBR026C    1.005305
YBR221C    1.005305
YCR005C    1.005305
YDL040C    1.005305
             ...   
e4p        0.764922
f6p        1.005305
oaa        0.437595
pyr        1.005305
r5p        1.005305
Length: 77, dtype: float64

In [16]:
split_kwargs = dict(
    stratify='metabolite_id',
    shuffle=True
)
X_train, X_test, y_train, y_test = trainer.do_train_test_split(tf, Strategy.ALL, **split_kwargs)


In [17]:
h = np.array(X_test['KO_ORF'].unique())
k = np.array(X_test[X_test['metabolite_id'] == 'pyr']['KO_ORF'].unique())

np.argwhere(np.isin(h, k)).flatten().shape

(29,)

# Modeling

In [18]:
import os
from functools import partial

from tqdm import tqdm

import torch
from torch.nn import BatchNorm1d, ModuleList
import torch.nn.functional as F

from torch_geometric.data import Data, HeteroData
from torch_geometric.utils import from_networkx, to_networkx
from torch_geometric.loader import DataLoader as GeoDataLoader
from torch_geometric.nn import GAT, GCNConv, to_hetero, SAGEConv, GATConv, HeteroLinear, Linear, Node2Vec
import torch_geometric.transforms as T
from torch_geometric.nn.conv import HeteroConv

import mlflow.pytorch

from config import HYPERPARAMETERS, BEST_PARAMETERS

from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.air import session, RunConfig
from ray.tune.integration.mlflow import mlflow_mixin
from ray.tune.integration.mlflow import MLflowLoggerCallback

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
mlflow.set_tracking_uri("http://localhost:5000")
device = torch.device("cpu")
torch.manual_seed(42)

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x7fb0d0c7f410>

## Data preparation

## Model prep

In [None]:
def count_parameters(model):
    # for p in model.parameters():
    #     print(p)

    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def log_metrics(all_preds, all_ground_truth, all_knockout_ids, epoch, type: str, debug=False):
    mae = mean_absolute_error(all_ground_truth, all_preds)
    r2 = pearsonr(all_preds, all_ground_truth)[0]
    # all_knockout_ids = [data_augmentation_get_knockout_label(knockout_id) for knockout_id in all_knockout_ids]
    
    # k = np.array(X_test[X_test['metabolite_id'] == 'pyr']['KO_ORF'].unique())
    # mask_idx = np.argwhere(np.isin(all_knockout_ids, k)).flatten()
    # masked_mae = mean_absolute_error(all_ground_truth[mask_idx], all_preds[mask_idx])
    # masked_r2 = pearsonr(all_preds[mask_idx], all_ground_truth[mask_idx])[0]
    
    if debug:
        print(f"{mae=}")
        print(f"{r2=}")
    mlflow.log_metric(key="Mean absolute error", value=float(mae), step=epoch)
    mlflow.log_metric(key="R2 score", value=float(r2), step=epoch)
    # mlflow.log_metric(key="Masked Mean absolute error", value=float(masked_mae), step=epoch)
    # mlflow.log_metric(key="Masked R2 score", value=float(masked_r2), step=epoch)

In [None]:
def train_one_epoch(epoch, model, train_loader, optimizer, loss_fn):
    # Enumerate over the data
    running_loss = 0.0
    step = 0
    for _, batch in enumerate(train_loader):
        # Use GPU
        batch.to(device)
        # Reset gradients
        optimizer.zero_grad() 
        # Passing the node features and the connection info
        pred = model.forward(
            batch.x_dict, 
            batch.edge_index_dict,
            # batch.edge_attr.float(),
            # batch.batch
        ) 
        # Calculating the loss and gradients
        train_mask = batch['metabolites'].train_mask.bool()
        loss = loss_fn(
            torch.squeeze(pred['metabolites']), 
            batch['metabolites'].y.float()
        )

        loss.backward()  
        optimizer.step()  
        # Update tracking
        running_loss += loss.item()
        step += 1
    
    with tune.checkpoint_dir(epoch) as checkpoint_dir:
        path = os.path.join(checkpoint_dir, "checkpoint")
        torch.save((model.state_dict(), optimizer.state_dict()), path)

    tune.report(loss=(running_loss/step))

    return running_loss/step
    
@mlflow_mixin
def test(epoch, model, test_loader, loss_fn, debug=False):
    all_preds_raw = []
    all_labels = []
    all_knockout_ids = []
    running_loss = 0.0
    step = 0
    for batch in test_loader:
        batch.to(device)
        # print(batch)  
        pred = model(
            batch.x_dict, 
            batch.edge_index_dict,
        )
        test_mask = batch['metabolites'].test_mask.bool()

        if debug:
            print(f"{test_mask.sum()=}")
            print(
                f"{pred['metabolites'].shape=} \n"
                f"{pred['metabolites'][test_mask].mean()=} \n"
                f"{pred['metabolites'][test_mask].max()=} \n"
                f"{pred['metabolites'][test_mask].min()=} \n"
                f"{pred['metabolites'][test_mask].shape=} \n"
            ) 
        
        loss = loss_fn(
            torch.squeeze(pred['metabolites'][test_mask]), 
            torch.squeeze(batch['metabolites'].y.float()[test_mask])
        )

         # Update tracking
        running_loss += loss.item()
        step += 1
        all_preds_raw.append(pred['metabolites'][test_mask].cpu().detach().numpy())
        all_labels.append(batch['metabolites'].y[test_mask].cpu().detach().numpy())
        # all_knockout_ids.append(batch['enzymes'].knockout_label_id.cpu().detach().numpy())
    
    all_preds_raw = np.concatenate(all_preds_raw).ravel()
    all_labels = np.concatenate(all_labels).ravel()
    # all_knockout_ids = np.concatenate(all_knockout_ids).ravel()
    log_metrics(all_preds_raw, all_labels, all_knockout_ids, epoch, "test")
    return running_loss/step

## Model construction

In [None]:
class HeteroGCNModel(torch.nn.Module):
    def __init__(self, model_config) -> None:
        super(HeteroGCNModel, self).__init__()
        embedding_size = model_config["model_embedding_size"]
        n_heads = model_config["model_attention_heads"]
        self.n_layers = model_config["model_layers"]
        
        self.conv_layers = ModuleList([])
        self.transf_layers = ModuleList([])
        self.pooling_layers = ModuleList([])
        self.bn_layers = ModuleList([])
        
        self.conv1 = GATConv(
            (-1, -1), out_channels=embedding_size, heads=n_heads, add_self_loops=False, bias=False
        )
        self.transf1 = Linear(
            in_channels=embedding_size*n_heads, 
            out_channels=embedding_size, 
        )

        for i in range(self.n_layers):
            self.conv_layers.append(
                GATConv(
                    (-1, -1), 
                    out_channels=embedding_size, 
                    heads=n_heads, 
                    add_self_loops=False
                )
            )
            self.transf_layers.append(
                Linear(
                    embedding_size*n_heads, 
                    embedding_size
                )
            )

        self.conv2 = GATConv(
            (-1, -1), out_channels=1, add_self_loops=False, bias=False
        )

        self.linear1 = Linear(n_heads * embedding_size, embedding_size)
        self.linear2 = Linear(embedding_size, 1)
    
    def forward(self, x, edge_index):
        # random weights for metabolite nodes should cancel out their contribution
        # metabolite_fc = torch.rand(x.shape[0], device=device)
        
        x = self.conv1(x, edge_index)
        # print(x)
        # x = torch.relu(self.transf1(x))

        for i in range(self.n_layers):
            x = self.conv_layers[i](x, edge_index)
            x = torch.relu(self.transf_layers[i](x))

        x = self.conv2(x, edge_index)
        
        # x = torch.relu(self.linear1(x))
        # x = F.dropout(x, p=0.8, training=self.training)
        # x = self.linear2(x)
        return x 

## Model running

In [None]:
model_config = {
    "model_embedding_size": 64,
    "model_attention_heads": 3,
    "model_layers": 5,
    "batch_size": 4,
    "learning_rate": 0.01,
    "sgd_momentum": 0.8,
    "scheduler_gamma": 1,
}

@mlflow_mixin
def run_one_training(model_config, metabolite_id, checkpoint_dir):
    print(f"creating model {model_config=}")
    samples = get_samples_hetero_graph(metabolite_id)
    params = model_config
    run_id = model_config['mlflow']['tags']['mlflow.parentRunId']
    with mlflow.start_run(nested=True) as run:
        mlflow.set_tag("mlflow.parentRunId", run_id)
        # Logging params
        for key in params.keys():
            mlflow.log_param(key, params[key])

        # Preparing training
        train_loader = GeoDataLoader(samples, batch_size=params['batch_size'])
        test_loader = GeoDataLoader(samples, batch_size=1)
        
        # Loading the model
        print("Loading model...")
        model = HeteroGCNModel(model_config=params)
        model = to_hetero(model, samples[0].metadata(), aggr='mean')
        model = model.to(device)

        if checkpoint_dir:
            model_state, optimizer_state = torch.load(
                os.path.join(checkpoint_dir, "checkpoint"))
            model.load_state_dict(model_state)
            optimizer.load_state_dict(optimizer_state)

        # < 1 increases precision, > 1 recall
        # loss_fn = torch.nn.MSELoss(reduction='none')
        loss_fn = torch.nn.MSELoss()
        # we need to keep the lr quite low since otherwise the weights explode
        optimizer = torch.optim.SGD(
            model.parameters(), 
            lr=params['learning_rate'],
            momentum=params['sgd_momentum'],
            # weight_decay=5e-4
        )
        scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=params['scheduler_gamma'])
        
        # Start training
        best_loss = 1000
        early_stopping_counter = 0
        max_epochs = 1
        for epoch in tqdm(range(max_epochs)): 
            if early_stopping_counter <= 10: # = x * 5 
                # Training
                model.train()
                loss = train_one_epoch(epoch, model, train_loader, optimizer, loss_fn)
                # print(f"Epoch {epoch} | Train Loss {loss}")
                mlflow.log_metric(key="Train loss", value=float(loss), step=epoch)

                # Testing
                model.eval()
                if epoch % 5 == 0 or epoch == max_epochs - 1:
                    loss = test(epoch, model, test_loader, loss_fn)
                    # print(f"Epoch {epoch} | Test Loss {loss}")
                    mlflow.log_metric(key="Test loss", value=float(loss), step=epoch)
                    
                    # Update best loss
                    if float(loss) < best_loss:
                        best_loss = loss
                        # Save the currently best model 
                        # mlflow.pytorch.log_model(model, "model", signature=SIGNATURE)
                        early_stopping_counter = 0
                    else:
                        early_stopping_counter += 1

                scheduler.step()
                mlflow.log_metric(key="Learning rate", value=float(scheduler.get_last_lr()[0]), step=epoch)
            else:
                print("Early stopping due to no improvement.")
                session.report({
                    "loss": best_loss
                })
                return {"loss": best_loss}
    print(f"Finishing training with best test loss: {best_loss}")

    with torch.no_grad():
        sample = samples[0].to(device)
        model.forward(sample.x_dict, sample.edge_index_dict)
        print(f"Number of parameters: {count_parameters(model)}")

    session.report({
        "loss": best_loss
    })
    return {"loss": best_loss}

In [40]:
@mlflow_mixin
def run_one_training(model_config, metabolite_id, checkpoint_dir):
    print(f"creating model {model_config=}")

    samples = get_samples_hetero_graph(metabolite_id)
    params = model_config
    run_id = model_config['mlflow']['tags']['mlflow.parentRunId']
    mlflow.set_tag("mlflow.parentRunId", run_id)
    # Logging params
    for key in params.keys():
        mlflow.log_param(key, params[key])

    # Preparing training
    train_loader = GeoDataLoader(samples, batch_size=params['batch_size'])
    test_loader = GeoDataLoader(samples, batch_size=1)
    
    # Loading the model
    print("Loading model...")
    model = HeteroGCNModel(model_config=params)
    model = to_hetero(model, samples[0].metadata(), aggr='mean')
    model = model.to(device)

    if checkpoint_dir:
        model_state, optimizer_state = torch.load(
            os.path.join(checkpoint_dir, "checkpoint"))
        model.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)

    # < 1 increases precision, > 1 recall
    # loss_fn = torch.nn.MSELoss(reduction='none')
    loss_fn = torch.nn.MSELoss()
    # we need to keep the lr quite low since otherwise the weights explode
    optimizer = torch.optim.SGD(
        model.parameters(), 
        lr=params['learning_rate'],
        momentum=params['sgd_momentum'],
        # weight_decay=5e-4
    )
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=params['scheduler_gamma'])
    
    # Start training
    best_loss = 1000
    early_stopping_counter = 0
    max_epochs = 300
    for epoch in tqdm(range(max_epochs)): 
        if early_stopping_counter <= 25: # = x * 5 
            # Training
            model.train()
            loss = train_one_epoch(epoch, model, train_loader, optimizer, loss_fn)
            # print(f"Epoch {epoch} | Train Loss {loss}")
            mlflow.log_metric(key="Train loss", value=float(loss), step=epoch)
            
            # Testing
            model.eval()
            if epoch % 5 == 0 or epoch == max_epochs - 1:
                loss = test(epoch, model, test_loader, loss_fn)
                # print(f"Epoch {epoch} | Test Loss {loss}")
                mlflow.log_metric(key="Test loss", value=float(loss), step=epoch)
                
                # Update best loss
                if float(loss) < best_loss:
                    best_loss = loss
                    # Save the currently best model 
                    # mlflow.pytorch.log_model(model, "model", signature=SIGNATURE)
                    early_stopping_counter = 0
                else:
                    early_stopping_counter += 1

            scheduler.step()
            mlflow.log_metric(key="Learning rate", value=float(scheduler.get_last_lr()[0]), step=epoch)
            
        else:
            print("Early stopping due to no improvement.")
            session.report({
                "loss": best_loss
            })
            return {"loss": best_loss}
    print(f"Finishing training with best test loss: {best_loss}")

    with torch.no_grad():
        sample = samples[0].to(device)
        model.forward(sample.x_dict, sample.edge_index_dict)
        print(f"Number of parameters: {count_parameters(model)}")

    session.report({
        "loss": best_loss
    })

    mlflow.end_run()

    return {"loss": best_loss}

In [36]:
HYPERPARAMETERS = {
    "batch_size": tune.choice([2, 4, 8]),
    "learning_rate": tune.choice([0.1, 0.05, 0.01, 0.001]),
    "sgd_momentum": tune.choice([0.9, 0.8, 0.5]),
    "scheduler_gamma": tune.choice([0.995, 0.9, 0.8, 0.5, 1]),
    "model_embedding_size": tune.choice([8, 16, 32, 64, 128]),
    "model_attention_heads": tune.choice([1, 2, 3, 4]),
    "model_layers": tune.choice([1, 3, 5, 7]),
}

In [43]:
def tune_metabolite_hyper_parameters(metabolite_id, experiment_name, num_samples=10):
    # mlflow.set_tracking_uri(mlflow.get_tracking_uri())
    # mlflow.set_experiment(experiment_name=experiment_name)

    parent_id = mlflow.active_run().info.run_id if mlflow.active_run() else None

    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=300,
        grace_period=10,
        reduction_factor=2
    )
    reporter = CLIReporter(
        metric_columns=["loss", "training_iteration"]
    )

    trainable = tune.with_parameters(
        run_one_training, 
        metabolite_id=metabolite_id,
        checkpoint_dir=None,
    )

    result = tune.run(
        trainable,
        # resources_per_trial={"cpu": 1, "gpu": 1},
        config={
            **HYPERPARAMETERS,
            "mlflow": {
                "experiment_name": experiment_name,
                "tracking_uri": mlflow.get_tracking_uri(),
                "save_artifacts": True,
                "tags": {
                    "mlflow.parentRunId": parent_id
                }
            }
        },
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter,
    )
    return result

# tune_metabolite_hyper_parameters("pyr")

In [45]:
run_name = "model_gat_node_embeddings"
experiment_name = 'metabolite_gnn_sweep_full'
mlflow.set_experiment(experiment_name)
with mlflow.start_run(run_name=run_name) as run:
    for metabolite_id in PRECURSOR_METABOLITES:
        with mlflow.start_run(run_name=f"model_{metabolite_id}", nested=True):
            result = tune_metabolite_hyper_parameters(metabolite_id, experiment_name, num_samples=16)
            print(result)

== Status ==
Current time: 2022-12-11 18:34:34 (running for 00:00:01.17)
Memory usage on this node: 5.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (15 PENDING, 1 RUNNING)
+------------------------------+----------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+
| Trial name                   | status   | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |
|                              |          |                    |              |                 |                      s |     

  0%|          | 0/300 [00:00<?, ?it/s]0m 


Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_136e7_00000,2022-12-11_18-35-25,True,,33c6689ea163442ca46f502cea599b39,VGI-DEKSPTOP-TIJMEn,40,0.691313,172.29.86.26,26578,True,47.687,0.644296,47.687,1670780125,0,,40,136e7_00000,0.124693
run_one_training_136e7_00001,2022-12-11_18-35-05,True,,8c379741491d4b0da3f0197a8787f06b,VGI-DEKSPTOP-TIJMEn,20,0.703326,172.29.86.26,26689,True,22.3516,0.58685,22.3516,1670780105,0,,20,136e7_00001,0.252936
run_one_training_136e7_00002,2022-12-11_18-34-52,True,,9a332a3f9fab4c649ca68d9e04a0bc96,VGI-DEKSPTOP-TIJMEn,10,0.704342,172.29.86.26,26691,True,9.32165,0.667562,9.32165,1670780092,0,,10,136e7_00002,0.2263
run_one_training_136e7_00003,2022-12-11_18-34-52,True,,f9aa2418ace5448cbb31744b81fd76cd,VGI-DEKSPTOP-TIJMEn,10,0.70405,172.29.86.26,26693,True,9.23457,0.63072,9.23457,1670780092,0,,10,136e7_00003,0.210002
run_one_training_136e7_00004,2022-12-11_18-35-19,True,,7a3fd7948a0b46fbbdac1a253d8d2e05,VGI-DEKSPTOP-TIJMEn,40,0.693243,172.29.86.26,26695,True,36.3528,0.560302,36.3528,1670780119,0,,40,136e7_00004,0.20078
run_one_training_136e7_00005,2022-12-11_18-35-14,True,,a656105661434dffbb10d7de2e7fe45e,VGI-DEKSPTOP-TIJMEn,10,0.703734,172.29.86.26,26697,True,31.3902,2.07448,31.3902,1670780114,0,,10,136e7_00005,0.225905
run_one_training_136e7_00006,2022-12-11_18-34-53,True,,74c3bc93e7bd4e839ea19b40efb29a75,VGI-DEKSPTOP-TIJMEn,10,0.704402,172.29.86.26,26699,True,10.4462,0.529096,10.4462,1670780093,0,,10,136e7_00006,0.292018
run_one_training_136e7_00007,2022-12-11_18-35-03,True,,a2ac2ecb4070484085363b80edd90e1a,VGI-DEKSPTOP-TIJMEn,10,0.705958,172.29.86.26,26701,True,20.2915,1.31262,20.2915,1670780103,0,,10,136e7_00007,0.22197
run_one_training_136e7_00008,2022-12-11_18-35-04,True,,d1ebbb18434a4d4fa3aec636ab13cb94,VGI-DEKSPTOP-TIJMEn,20,0.703275,172.29.86.26,26703,True,21.246,0.55011,21.246,1670780104,0,,20,136e7_00008,0.231165
run_one_training_136e7_00009,2022-12-11_18-34-59,True,,929ebbe38b7749a28031ea0ca62fe7a9,VGI-DEKSPTOP-TIJMEn,10,0.703586,172.29.86.26,26706,True,16.4653,1.18474,16.4653,1670780099,0,,10,136e7_00009,0.270522


  0%|          | 1/300 [00:03<15:34,  3.12s/it]


[2m[36m(run_one_training pid=26689)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.01, 'sgd_momentum': 0.5, 'scheduler_gamma': 0.5, 'model_embedding_size': 64, 'model_attention_heads': 3, 'model_layers': 3, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '90a5008d4e44448ea3188234b4f04f36'}}}
[2m[36m(run_one_training pid=26708)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.01, 'sgd_momentum': 0.5, 'scheduler_gamma': 1, 'model_embedding_size': 64, 'model_attention_heads': 2, 'model_layers': 5, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '90a5008d4e44448ea3188234b4f04f36'}}}
[2m[36m(run_one_training pid=26716)[0m creating model model_config={'batch_size': 8, 'learning_rate': 0.01, 'sgd_momentum': 0.5, 'scheduler_gamma': 0.8

  1%|          | 2/300 [00:05<11:58,  2.41s/it]
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 


[2m[36m(run_one_training pid=26710)[0m Loading model...
[2m[36m(run_one_training pid=26699)[0m Loading model...
[2m[36m(run_one_training pid=26706)[0m Loading model...
[2m[36m(run_one_training pid=26718)[0m Loading model...
[2m[36m(run_one_training pid=26703)[0m Loading model...


  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  1%|          | 3/300 [00:06<10:00,  2.02s/it]
  0%|          | 1/300 [00:01<07:15,  1.46s/it]
  0%|          | 1/300 [00:01<09:25,  1.89s/it]
  0%|          | 1/300 [00:01<09:49,  1.97s/it]
  0%|          | 1/300 [00:02<10:08,  2.04s/it]
  0%|          | 1/300 [00:02<11:57,  2.40s/it]
  1%|          | 2/300 [00:02<04:46,  1.04it/s]
  0%|          | 1/300 [00:02<12:35,  2.53s/it]
  0%|          | 1/300 [00:02<12:18,  2.47s/it]
  1%|          | 2/300 [00:02<05:47,  1.17s/it]
  1%|          | 2/300 [00:02<05:45,  1.16s/it]
  1%|          | 2/300 [00:02<05:56,  1.20s/it]
  1%|▏         | 4/300 [00:07<08:32,  1.73s/it]
  0%|          | 1/300 [00:03<15:25,  3.10s/it]
  0%|          | 1/300 [00:02<13:21,  2.68s/it]
  1%|          | 3/300 [00:02<03:53,  1.27it/s]
  1%|          | 2/300 [00:03<06:50,  1.38s/it]
  0%|       

== Status ==
Current time: 2022-12-11 18:34:47 (running for 00:00:13.63)
Memory usage on this node: 9.0/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                    |     

  2%|▏         | 5/300 [00:09<07:54,  1.61s/it]
  1%|          | 3/300 [00:04<06:02,  1.22s/it]
  2%|▏         | 5/300 [00:04<03:12,  1.53it/s]
  2%|▏         | 5/300 [00:03<03:23,  1.45it/s]
  2%|▏         | 5/300 [00:04<03:38,  1.35it/s]
  1%|          | 3/300 [00:04<06:27,  1.31s/it]
  2%|▏         | 5/300 [00:04<03:27,  1.42it/s]
  2%|▏         | 5/300 [00:04<03:42,  1.33it/s]
  1%|▏         | 4/300 [00:04<04:10,  1.18it/s]
  1%|          | 3/300 [00:04<06:08,  1.24s/it]
  1%|          | 2/300 [00:04<11:24,  2.30s/it]
  0%|          | 1/300 [00:04<23:22,  4.69s/it]
  1%|          | 3/300 [00:05<07:15,  1.47s/it]
  1%|          | 2/300 [00:04<10:38,  2.14s/it]
  1%|▏         | 4/300 [00:05<05:26,  1.10s/it]
  2%|▏         | 5/300 [00:04<03:38,  1.35it/s]
  1%|▏         | 4/300 [00:05<05:11,  1.05s/it]
  0%|          | 1/300 [00:05<27:46,  5.57s/it]
  1%|▏         | 4/300 [00:05<06:01,  1.22s/it]
  2%|▏         | 6/300 [00:05<04:31,  1.08it/s]
  2%|▏         | 6/300 [00:05<04:53,  1.

== Status ==
Current time: 2022-12-11 18:34:52 (running for 00:00:18.69)
Memory usage on this node: 8.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=3
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: -0.7036604667082429
Resources requested: 13.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (13 RUNNING, 3 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |    

  2%|▏         | 5/300 [00:09<08:36,  1.75s/it]
  2%|▏         | 7/300 [00:09<06:43,  1.38s/it]
  3%|▎         | 8/300 [00:14<08:09,  1.68s/it]
  3%|▎         | 9/300 [00:09<04:10,  1.16it/s]
  4%|▎         | 11/300 [00:09<04:36,  1.04it/s]
  2%|▏         | 7/300 [00:09<06:21,  1.30s/it]
  1%|▏         | 4/300 [00:09<10:21,  2.10s/it]
  1%|          | 2/300 [00:09<22:52,  4.61s/it]
  2%|▏         | 7/300 [00:09<06:43,  1.38s/it]
  2%|▏         | 6/300 [00:10<08:54,  1.82s/it]
  4%|▍         | 12/300 [00:10<03:59,  1.20it/s]
  3%|▎         | 8/300 [00:10<05:54,  1.21s/it]
  3%|▎         | 8/300 [00:10<05:26,  1.12s/it]
  3%|▎         | 8/300 [00:10<05:54,  1.21s/it]
  4%|▍         | 13/300 [00:10<03:32,  1.35it/s]
  3%|▎         | 9/300 [00:16<07:24,  1.53s/it]
  2%|▏         | 7/300 [00:11<07:29,  1.53s/it]
  3%|▎         | 9/300 [00:10<04:44,  1.02it/s]
  2%|▏         | 5/300 [00:11<08:57,  1.82s/it]
  3%|▎         | 9/300 [00:11<05:14,  1.08s/it]
  5%|▍         | 14/300 [00:11<03:13,

== Status ==
Current time: 2022-12-11 18:34:57 (running for 00:00:24.03)
Memory usage on this node: 7.4/23.5 GiB 
Using AsyncHyperBand: num_stopped=6
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: -0.70333566951255
Resources requested: 10.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (10 RUNNING, 6 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |      

  2%|▏         | 6/300 [00:14<11:37,  2.37s/it]
  3%|▎         | 8/300 [00:14<08:07,  1.67s/it]
  6%|▋         | 19/300 [00:14<03:01,  1.55it/s]
  4%|▍         | 12/300 [00:14<05:34,  1.16s/it]
  1%|▏         | 4/300 [00:15<16:17,  3.30s/it]
  4%|▍         | 12/300 [00:15<05:36,  1.17s/it]
  3%|▎         | 9/300 [00:14<06:36,  1.36s/it]
  7%|▋         | 20/300 [00:15<02:57,  1.57it/s]
  4%|▍         | 13/300 [00:15<04:45,  1.00it/s]
  4%|▍         | 12/300 [00:20<07:39,  1.60s/it]
  4%|▍         | 13/300 [00:15<05:14,  1.10s/it]
  2%|▏         | 7/300 [00:16<10:10,  2.08s/it]
  4%|▍         | 13/300 [00:16<05:18,  1.11s/it]
  5%|▍         | 14/300 [00:15<04:24,  1.08it/s]
  3%|▎         | 9/300 [00:16<07:57,  1.64s/it]
  5%|▍         | 14/300 [00:16<04:51,  1.02s/it]
  7%|▋         | 21/300 [00:16<03:51,  1.21it/s]
  5%|▌         | 15/300 [00:16<04:01,  1.18it/s]
  4%|▍         | 13/300 [00:22<06:59,  1.46s/it]
  5%|▍         | 14/300 [00:17<04:54,  1.03s/it]
  2%|▏         | 5/300 [00

== Status ==
Current time: 2022-12-11 18:35:02 (running for 00:00:29.19)
Memory usage on this node: 7.2/23.5 GiB 
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: -0.6870126067660749 | Iter 10.000: -0.70333566951255
Resources requested: 9.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (9 RUNNING, 7 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                        

  9%|▊         | 26/300 [00:19<03:32,  1.29it/s]
  6%|▌         | 17/300 [00:20<05:10,  1.10s/it]
  4%|▎         | 11/300 [00:20<08:46,  1.82s/it]
  6%|▌         | 18/300 [00:19<04:22,  1.07it/s]
  6%|▌         | 17/300 [00:20<05:04,  1.07s/it]
  9%|▉         | 27/300 [00:20<03:03,  1.49it/s]
  6%|▋         | 19/300 [00:20<03:44,  1.25it/s]
  6%|▌         | 18/300 [00:20<04:25,  1.06it/s]
  9%|▉         | 28/300 [00:20<02:36,  1.73it/s]
  6%|▌         | 18/300 [00:20<04:29,  1.05it/s]
 10%|▉         | 29/300 [00:20<02:26,  1.85it/s]
  4%|▍         | 12/300 [00:21<07:43,  1.61s/it]
  5%|▌         | 16/300 [00:26<07:18,  1.55s/it]
  6%|▋         | 19/300 [00:21<04:01,  1.17it/s]
  2%|▏         | 6/300 [00:21<17:06,  3.49s/it]
  6%|▋         | 19/300 [00:21<04:00,  1.17it/s]
 10%|█         | 30/300 [00:21<02:14,  2.00it/s]
  6%|▌         | 17/300 [00:27<06:12,  1.32s/it]
  7%|▋         | 20/300 [00:22<03:37,  1.29it/s]
  4%|▍         | 13/300 [00:22<06:51,  1.43s/it]
 10%|█         | 31/3

== Status ==
Current time: 2022-12-11 18:35:08 (running for 00:00:34.49)
Memory usage on this node: 6.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: -0.6933295226966342 | Iter 10.000: -0.7034606620048482
Resources requested: 6.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (6 RUNNING, 10 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                    

 12%|█▏        | 37/300 [00:24<02:10,  2.02it/s]
  8%|▊         | 24/300 [00:25<03:11,  1.44it/s]
  3%|▎         | 8/300 [00:25<13:04,  2.69s/it]
 13%|█▎        | 38/300 [00:25<01:58,  2.20it/s]
  8%|▊         | 25/300 [00:25<03:02,  1.51it/s]
 13%|█▎        | 39/300 [00:25<01:49,  2.38it/s]
 13%|█▎        | 40/300 [00:25<01:40,  2.58it/s]
  7%|▋         | 21/300 [00:31<05:32,  1.19s/it]
  5%|▌         | 16/300 [00:26<06:48,  1.44s/it]
  3%|▎         | 8/300 [00:26<13:37,  2.80s/it]
 14%|█▎        | 41/300 [00:26<02:27,  1.76it/s]
  7%|▋         | 22/300 [00:32<05:08,  1.11s/it]
  9%|▊         | 26/300 [00:27<04:06,  1.11it/s]
  3%|▎         | 9/300 [00:27<12:10,  2.51s/it]
  6%|▌         | 17/300 [00:27<06:30,  1.38s/it]
 14%|█▍        | 42/300 [00:27<02:17,  1.88it/s]
  9%|▉         | 27/300 [00:27<03:50,  1.19it/s]
 14%|█▍        | 43/300 [00:27<02:08,  2.00it/s]
  8%|▊         | 23/300 [00:33<04:57,  1.07s/it]
 15%|█▍        | 44/300 [00:28<02:00,  2.12it/s]
  9%|▉         | 28/300

== Status ==
Current time: 2022-12-11 18:35:13 (running for 00:00:39.76)
Memory usage on this node: 6.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6807634524690608 | Iter 20.000: -0.6933295226966342 | Iter 10.000: -0.70333566951255
Resources requested: 6.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (6 RUNNING, 10 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|       

 16%|█▌        | 48/300 [00:30<01:58,  2.12it/s]
 16%|█▋        | 49/300 [00:30<01:50,  2.27it/s]
  7%|▋         | 20/300 [00:30<05:32,  1.19s/it]
 10%|█         | 31/300 [00:30<03:49,  1.17it/s]
 17%|█▋        | 50/300 [00:30<01:45,  2.37it/s]
  9%|▊         | 26/300 [00:36<05:13,  1.14s/it]
 11%|█         | 32/300 [00:31<03:22,  1.32it/s]
 17%|█▋        | 51/300 [00:31<02:04,  2.00it/s]
  9%|▉         | 27/300 [00:37<04:32,  1.00it/s]
 11%|█         | 33/300 [00:31<03:00,  1.48it/s]
 17%|█▋        | 52/300 [00:31<01:48,  2.29it/s]
  7%|▋         | 21/300 [00:32<05:54,  1.27s/it]
 18%|█▊        | 53/300 [00:32<01:35,  2.58it/s]
 11%|█▏        | 34/300 [00:32<02:41,  1.65it/s]
  9%|▉         | 28/300 [00:37<03:59,  1.14it/s]
  4%|▎         | 11/300 [00:32<12:23,  2.57s/it]
 18%|█▊        | 54/300 [00:32<01:30,  2.72it/s]
 12%|█▏        | 35/300 [00:32<02:30,  1.76it/s]
 18%|█▊        | 55/300 [00:32<01:28,  2.77it/s]
  7%|▋         | 22/300 [00:33<05:26,  1.17s/it]
 10%|▉         | 29/

== Status ==
Current time: 2022-12-11 18:35:18 (running for 00:00:44.94)
Memory usage on this node: 5.9/23.5 GiB 
Using AsyncHyperBand: num_stopped=11
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6807634524690608 | Iter 20.000: -0.6923210147457819 | Iter 10.000: -0.7034606620048482
Resources requested: 5.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (5 RUNNING, 11 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|     

 10%|█         | 31/300 [00:40<04:20,  1.03it/s]
 13%|█▎        | 39/300 [00:35<02:30,  1.74it/s]
 20%|██        | 61/300 [00:35<01:49,  2.18it/s]
  8%|▊         | 25/300 [00:36<04:40,  1.02s/it]
 21%|██        | 62/300 [00:35<01:39,  2.39it/s]
  4%|▍         | 13/300 [00:36<10:14,  2.14s/it]
 11%|█         | 32/300 [00:41<04:02,  1.11it/s]
 21%|██        | 63/300 [00:35<01:29,  2.64it/s]
 21%|██▏       | 64/300 [00:36<01:24,  2.79it/s]
 11%|█         | 33/300 [00:41<03:38,  1.22it/s]
 22%|██▏       | 65/300 [00:36<01:18,  2.98it/s]
 11%|█▏        | 34/300 [00:42<03:22,  1.32it/s]
  9%|▊         | 26/300 [00:37<05:14,  1.15s/it]
 22%|██▏       | 66/300 [00:37<01:39,  2.34it/s]
  5%|▍         | 14/300 [00:37<09:26,  1.98s/it]
 22%|██▏       | 67/300 [00:37<01:32,  2.53it/s]
 12%|█▏        | 35/300 [00:43<03:16,  1.35it/s]
 23%|██▎       | 68/300 [00:37<01:27,  2.64it/s]
  9%|▉         | 27/300 [00:38<04:53,  1.08s/it]
 23%|██▎       | 69/300 [00:38<01:21,  2.82it/s]
 23%|██▎       | 70/

== Status ==
Current time: 2022-12-11 18:35:23 (running for 00:00:50.10)
Memory usage on this node: 5.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=12
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6870032488368452 | Iter 20.000: -0.6923210147457819 | Iter 10.000: -0.7034606620048482
Resources requested: 4.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (4 RUNNING, 12 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|     

 25%|██▌       | 75/300 [00:40<01:17,  2.92it/s]
 13%|█▎        | 38/300 [00:45<03:26,  1.27it/s]
 10%|█         | 30/300 [00:41<04:16,  1.05it/s]
 25%|██▌       | 76/300 [00:41<01:39,  2.26it/s]
 13%|█▎        | 39/300 [00:46<03:16,  1.33it/s]
 26%|██▌       | 77/300 [00:41<01:29,  2.50it/s]
 26%|██▌       | 78/300 [00:41<01:21,  2.72it/s]
  5%|▌         | 16/300 [00:42<10:02,  2.12s/it]
 26%|██▋       | 79/300 [00:41<01:14,  2.95it/s]
 10%|█         | 31/300 [00:42<04:49,  1.08s/it]
 27%|██▋       | 80/300 [00:42<01:09,  3.16it/s]
 27%|██▋       | 81/300 [00:42<01:27,  2.50it/s]
 11%|█         | 32/300 [00:43<04:25,  1.01it/s]
 27%|██▋       | 82/300 [00:43<01:18,  2.79it/s]
  6%|▌         | 17/300 [00:43<09:03,  1.92s/it]
 28%|██▊       | 83/300 [00:43<01:11,  3.04it/s]
 28%|██▊       | 84/300 [00:43<01:06,  3.23it/s]
 11%|█         | 33/300 [00:44<04:07,  1.08it/s]
 28%|██▊       | 85/300 [00:43<01:05,  3.29it/s]
 11%|█▏        | 34/300 [00:44<03:56,  1.12it/s]
 29%|██▊       | 86/

== Status ==
Current time: 2022-12-11 18:35:29 (running for 00:00:55.28)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: None | Iter 80.000: -0.6592410439625382 | Iter 40.000: -0.6913125067949295 | Iter 20.000: -0.6923210147457819 | Iter 10.000: -0.7034606620048482
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

 30%|███       | 90/300 [00:45<01:02,  3.33it/s]
  6%|▋         | 19/300 [00:46<07:56,  1.70s/it]
 30%|███       | 91/300 [00:46<01:20,  2.59it/s]
 31%|███       | 92/300 [00:46<01:13,  2.84it/s]
 12%|█▏        | 36/300 [00:47<04:22,  1.00it/s]
 31%|███       | 93/300 [00:46<01:06,  3.12it/s]
 31%|███▏      | 94/300 [00:46<01:01,  3.33it/s]
 32%|███▏      | 95/300 [00:47<00:59,  3.44it/s]
 12%|█▏        | 37/300 [00:47<04:07,  1.06it/s]
  7%|▋         | 20/300 [00:47<07:36,  1.63s/it]
 32%|███▏      | 96/300 [00:47<01:19,  2.57it/s]
 32%|███▏      | 97/300 [00:48<01:12,  2.80it/s]
 13%|█▎        | 38/300 [00:48<03:56,  1.11it/s]
 33%|███▎      | 98/300 [00:48<01:07,  3.00it/s]
 33%|███▎      | 99/300 [00:48<01:04,  3.13it/s]
 33%|███▎      | 100/300 [00:48<01:01,  3.26it/s]
 13%|█▎        | 39/300 [00:49<03:47,  1.15it/s]
 34%|███▎      | 101/300 [00:49<01:19,  2.50it/s]
 13%|█▎        | 40/300 [00:50<03:41,  1.17it/s]
 34%|███▍      | 102/300 [00:49<01:11,  2.78it/s]
 34%|███▍      | 

== Status ==
Current time: 2022-12-11 18:35:34 (running for 00:01:00.36)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: None | Iter 80.000: -0.6592410439625382 | Iter 40.000: -0.6860379796319951 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

 35%|███▌      | 105/300 [00:50<00:59,  3.27it/s]
 14%|█▎        | 41/300 [00:51<04:21,  1.01s/it]
 35%|███▌      | 106/300 [00:51<01:16,  2.54it/s]
 36%|███▌      | 107/300 [00:51<01:09,  2.78it/s]
  7%|▋         | 22/300 [00:52<08:17,  1.79s/it]
 36%|███▌      | 108/300 [00:51<01:04,  3.00it/s]
 14%|█▍        | 42/300 [00:52<04:07,  1.04it/s]
 36%|███▋      | 109/300 [00:52<01:00,  3.17it/s]
 37%|███▋      | 110/300 [00:52<00:56,  3.34it/s]
 14%|█▍        | 43/300 [00:53<03:52,  1.11it/s]
 37%|███▋      | 111/300 [00:52<01:11,  2.63it/s]
  8%|▊         | 23/300 [00:53<07:48,  1.69s/it]
 37%|███▋      | 112/300 [00:53<01:04,  2.89it/s]
 38%|███▊      | 113/300 [00:53<00:59,  3.15it/s]
 15%|█▍        | 44/300 [00:54<03:40,  1.16it/s]
 38%|███▊      | 114/300 [00:53<00:55,  3.33it/s]
 38%|███▊      | 115/300 [00:53<00:53,  3.48it/s]
 15%|█▌        | 45/300 [00:54<03:33,  1.19it/s]
 39%|███▊      | 116/300 [00:54<01:09,  2.64it/s]
  8%|▊         | 24/300 [00:54<07:28,  1.62s/it]
 39%|███

== Status ==
Current time: 2022-12-11 18:35:39 (running for 00:01:05.51)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: None | Iter 80.000: -0.6592410439625382 | Iter 40.000: -0.6860379796319951 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

  8%|▊         | 25/300 [00:56<07:11,  1.57s/it]
 40%|████      | 121/300 [00:56<01:06,  2.68it/s]
 41%|████      | 122/300 [00:56<00:59,  2.98it/s]
 16%|█▌        | 47/300 [00:56<03:52,  1.09it/s]
 41%|████      | 123/300 [00:56<00:55,  3.17it/s]
 41%|████▏     | 124/300 [00:56<00:52,  3.36it/s]
 16%|█▌        | 48/300 [00:57<03:40,  1.14it/s]
 42%|████▏     | 125/300 [00:57<00:50,  3.46it/s]
 42%|████▏     | 126/300 [00:57<01:06,  2.61it/s]
 16%|█▋        | 49/300 [00:58<03:36,  1.16it/s]
 42%|████▏     | 127/300 [00:58<01:00,  2.84it/s]
 43%|████▎     | 128/300 [00:58<00:57,  3.00it/s]
  9%|▊         | 26/300 [00:58<08:31,  1.87s/it]
 43%|████▎     | 129/300 [00:58<00:56,  3.05it/s]
 17%|█▋        | 50/300 [00:59<03:37,  1.15it/s]
 43%|████▎     | 130/300 [00:58<00:53,  3.16it/s]
 44%|████▎     | 131/300 [00:59<01:09,  2.45it/s]
 44%|████▍     | 132/300 [00:59<01:01,  2.74it/s]
 44%|████▍     | 133/300 [01:00<00:55,  2.99it/s]
  9%|▉         | 27/300 [01:00<08:04,  1.78s/it]
 17%|█▋

== Status ==
Current time: 2022-12-11 18:35:44 (running for 00:01:10.67)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: None | Iter 80.000: -0.6592410439625382 | Iter 40.000: -0.6860379796319951 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

 17%|█▋        | 52/300 [01:01<03:58,  1.04it/s]
 45%|████▌     | 136/300 [01:01<01:06,  2.47it/s]
 46%|████▌     | 137/300 [01:01<00:59,  2.73it/s]
  9%|▉         | 28/300 [01:02<07:45,  1.71s/it]
 46%|████▌     | 138/300 [01:01<00:54,  2.97it/s]
 18%|█▊        | 53/300 [01:02<03:48,  1.08it/s]
 46%|████▋     | 139/300 [01:02<00:52,  3.09it/s]
 47%|████▋     | 140/300 [01:02<00:49,  3.22it/s]
 18%|█▊        | 54/300 [01:03<03:39,  1.12it/s]
 47%|████▋     | 141/300 [01:02<01:02,  2.55it/s]
 10%|▉         | 29/300 [01:03<07:24,  1.64s/it]
 47%|████▋     | 142/300 [01:03<00:55,  2.83it/s]
 48%|████▊     | 143/300 [01:03<00:50,  3.08it/s]
 18%|█▊        | 55/300 [01:03<03:30,  1.16it/s]
 48%|████▊     | 144/300 [01:03<00:48,  3.24it/s]
 48%|████▊     | 145/300 [01:04<00:46,  3.34it/s]
 49%|████▊     | 146/300 [01:04<00:58,  2.63it/s]
 10%|█         | 30/300 [01:05<07:07,  1.58s/it]
 19%|█▊        | 56/300 [01:05<03:59,  1.02it/s]
 49%|████▉     | 147/300 [01:04<00:52,  2.91it/s]
 49%|███

== Status ==
Current time: 2022-12-11 18:35:49 (running for 00:01:15.90)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: None | Iter 80.000: -0.6592410439625382 | Iter 40.000: -0.6860379796319951 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

 50%|█████     | 151/300 [01:06<00:56,  2.66it/s]
 19%|█▉        | 58/300 [01:06<03:31,  1.14it/s]
 51%|█████     | 152/300 [01:06<00:50,  2.94it/s]
 51%|█████     | 153/300 [01:06<00:46,  3.18it/s]
 51%|█████▏    | 154/300 [01:06<00:43,  3.36it/s]
 10%|█         | 31/300 [01:07<08:12,  1.83s/it]
 20%|█▉        | 59/300 [01:07<03:22,  1.19it/s]
 52%|█████▏    | 155/300 [01:07<00:41,  3.47it/s]
 20%|██        | 60/300 [01:08<03:20,  1.20it/s]
 52%|█████▏    | 156/300 [01:07<00:55,  2.58it/s]
 52%|█████▏    | 157/300 [01:08<00:50,  2.84it/s]
 53%|█████▎    | 158/300 [01:08<00:47,  2.99it/s]
 11%|█         | 32/300 [01:08<07:46,  1.74s/it]
 53%|█████▎    | 159/300 [01:08<00:44,  3.13it/s]
 53%|█████▎    | 160/300 [01:08<00:42,  3.30it/s]
 20%|██        | 61/300 [01:09<03:53,  1.02it/s]
 54%|█████▎    | 161/300 [01:09<00:54,  2.53it/s]
 54%|█████▍    | 162/300 [01:09<00:49,  2.78it/s]
 21%|██        | 62/300 [01:10<03:39,  1.08it/s]
 11%|█         | 33/300 [01:10<07:25,  1.67s/it]
 54%|███

== Status ==
Current time: 2022-12-11 18:35:55 (running for 00:01:21.35)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6343298937038829 | Iter 80.000: -0.6592410439625382 | Iter 40.000: -0.6860379796319951 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 56%|█████▌    | 167/300 [01:11<00:48,  2.73it/s]
 11%|█▏        | 34/300 [01:12<07:17,  1.64s/it]
 21%|██▏       | 64/300 [01:12<03:26,  1.14it/s]
 56%|█████▌    | 168/300 [01:11<00:44,  2.96it/s]
 56%|█████▋    | 169/300 [01:12<00:42,  3.09it/s]
 57%|█████▋    | 170/300 [01:12<00:39,  3.26it/s]
 22%|██▏       | 65/300 [01:12<03:20,  1.17it/s]
 57%|█████▋    | 171/300 [01:13<00:50,  2.55it/s]
 12%|█▏        | 35/300 [01:13<07:02,  1.60s/it]
 57%|█████▋    | 172/300 [01:13<00:45,  2.79it/s]
 58%|█████▊    | 173/300 [01:13<00:42,  3.02it/s]
 22%|██▏       | 66/300 [01:14<03:49,  1.02it/s]
 58%|█████▊    | 174/300 [01:13<00:38,  3.25it/s]
 58%|█████▊    | 175/300 [01:14<00:37,  3.36it/s]
 22%|██▏       | 67/300 [01:15<03:35,  1.08it/s]
 59%|█████▊    | 176/300 [01:14<00:49,  2.51it/s]
 59%|█████▉    | 177/300 [01:14<00:43,  2.80it/s]
 59%|█████▉    | 178/300 [01:15<00:41,  2.97it/s]
 23%|██▎       | 68/300 [01:15<03:26,  1.12it/s]
 60%|█████▉    | 179/300 [01:15<00:38,  3.11it/s]
 12%|█▏

== Status ==
Current time: 2022-12-11 18:36:00 (running for 00:01:26.47)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6343298937038829 | Iter 80.000: -0.6592410439625382 | Iter 40.000: -0.6860379796319951 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 61%|██████    | 182/300 [01:16<00:42,  2.76it/s]
 23%|██▎       | 70/300 [01:17<03:15,  1.18it/s]
 61%|██████    | 183/300 [01:16<00:38,  3.02it/s]
 12%|█▏        | 37/300 [01:17<07:42,  1.76s/it]
 61%|██████▏   | 184/300 [01:17<00:36,  3.20it/s]
 62%|██████▏   | 185/300 [01:17<00:35,  3.24it/s]
 62%|██████▏   | 186/300 [01:18<00:45,  2.51it/s]
 24%|██▎       | 71/300 [01:18<03:48,  1.00it/s]
 62%|██████▏   | 187/300 [01:18<00:40,  2.82it/s]
 13%|█▎        | 38/300 [01:19<07:19,  1.68s/it]
 63%|██████▎   | 188/300 [01:18<00:36,  3.05it/s]
 63%|██████▎   | 189/300 [01:18<00:34,  3.24it/s]
 24%|██▍       | 72/300 [01:19<03:32,  1.07it/s]
 63%|██████▎   | 190/300 [01:19<00:32,  3.37it/s]
 64%|██████▎   | 191/300 [01:19<00:42,  2.57it/s]
 24%|██▍       | 73/300 [01:20<03:21,  1.13it/s]
 64%|██████▍   | 192/300 [01:20<00:37,  2.87it/s]
 13%|█▎        | 39/300 [01:20<07:00,  1.61s/it]
 64%|██████▍   | 193/300 [01:20<00:34,  3.10it/s]
 65%|██████▍   | 194/300 [01:20<00:32,  3.29it/s]
 25%|██

== Status ==
Current time: 2022-12-11 18:36:05 (running for 00:01:31.70)
Memory usage on this node: 5.0/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6343298937038829 | Iter 80.000: -0.6592410439625382 | Iter 40.000: -0.6913125067949295 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 66%|██████▌   | 198/300 [01:21<00:32,  3.17it/s]
 66%|██████▋   | 199/300 [01:22<00:30,  3.36it/s]
 67%|██████▋   | 200/300 [01:22<00:27,  3.62it/s]
 25%|██▌       | 76/300 [01:23<03:31,  1.06it/s]
 67%|██████▋   | 201/300 [01:22<00:35,  2.81it/s]
 67%|██████▋   | 202/300 [01:23<00:31,  3.11it/s]
 26%|██▌       | 77/300 [01:23<03:14,  1.14it/s]
 68%|██████▊   | 203/300 [01:23<00:28,  3.36it/s]
 68%|██████▊   | 204/300 [01:23<00:27,  3.49it/s]
 68%|██████▊   | 205/300 [01:23<00:26,  3.62it/s]
 26%|██▌       | 78/300 [01:24<03:04,  1.20it/s]
 69%|██████▊   | 206/300 [01:24<00:33,  2.83it/s]
 69%|██████▉   | 207/300 [01:24<00:29,  3.13it/s]
 26%|██▋       | 79/300 [01:25<02:56,  1.25it/s]
 69%|██████▉   | 208/300 [01:24<00:27,  3.39it/s]
 70%|██████▉   | 209/300 [01:25<00:25,  3.53it/s]
 70%|███████   | 210/300 [01:25<00:24,  3.69it/s]
 70%|███████   | 211/300 [01:25<00:30,  2.96it/s]
 71%|███████   | 212/300 [01:26<00:26,  3.27it/s]
 71%|███████   | 213/300 [01:26<00:24,  3.57it/s]
 71%

== Status ==
Current time: 2022-12-11 18:36:10 (running for 00:01:36.85)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.6343298937038829 | Iter 80.000: -0.660967141428652 | Iter 40.000: -0.6913125067949295 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss | 

 72%|███████▏  | 216/300 [01:27<00:27,  3.08it/s]
 72%|███████▏  | 217/300 [01:27<00:24,  3.40it/s]
 73%|███████▎  | 218/300 [01:27<00:22,  3.67it/s]
 73%|███████▎  | 219/300 [01:28<00:20,  3.92it/s]
 73%|███████▎  | 220/300 [01:28<00:19,  4.11it/s]
 74%|███████▎  | 221/300 [01:28<00:24,  3.16it/s]
 74%|███████▍  | 222/300 [01:28<00:22,  3.50it/s]
 74%|███████▍  | 223/300 [01:29<00:20,  3.76it/s]
 75%|███████▍  | 224/300 [01:29<00:19,  3.98it/s]
 75%|███████▌  | 225/300 [01:29<00:18,  4.12it/s]
 75%|███████▌  | 226/300 [01:30<00:23,  3.20it/s]
 76%|███████▌  | 227/300 [01:30<00:20,  3.50it/s]
 76%|███████▌  | 228/300 [01:30<00:19,  3.72it/s]
 76%|███████▋  | 229/300 [01:30<00:18,  3.94it/s]
 77%|███████▋  | 230/300 [01:30<00:17,  4.10it/s]
 77%|███████▋  | 231/300 [01:31<00:21,  3.18it/s]
 77%|███████▋  | 232/300 [01:31<00:19,  3.48it/s]
 78%|███████▊  | 233/300 [01:31<00:18,  3.72it/s]


== Status ==
Current time: 2022-12-11 18:36:15 (running for 00:01:41.88)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.6343298937038829 | Iter 80.000: -0.660967141428652 | Iter 40.000: -0.6913125067949295 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss | 

 78%|███████▊  | 234/300 [01:32<00:16,  3.89it/s]
 78%|███████▊  | 235/300 [01:32<00:16,  3.95it/s]
 79%|███████▊  | 236/300 [01:32<00:20,  3.08it/s]
 79%|███████▉  | 237/300 [01:33<00:18,  3.40it/s]
 79%|███████▉  | 238/300 [01:33<00:16,  3.66it/s]
 80%|███████▉  | 239/300 [01:33<00:15,  3.88it/s]
 80%|████████  | 240/300 [01:33<00:14,  4.05it/s]
 80%|████████  | 241/300 [01:34<00:19,  3.08it/s]
 81%|████████  | 242/300 [01:34<00:17,  3.40it/s]
 81%|████████  | 243/300 [01:34<00:15,  3.66it/s]
 81%|████████▏ | 244/300 [01:34<00:14,  3.88it/s]
 82%|████████▏ | 245/300 [01:35<00:13,  4.05it/s]
 82%|████████▏ | 246/300 [01:35<00:17,  3.14it/s]
 82%|████████▏ | 247/300 [01:35<00:15,  3.47it/s]
 83%|████████▎ | 248/300 [01:36<00:13,  3.73it/s]
 83%|████████▎ | 249/300 [01:36<00:13,  3.86it/s]
 83%|████████▎ | 250/300 [01:36<00:12,  4.03it/s]
 84%|████████▎ | 251/300 [01:37<00:15,  3.16it/s]


== Status ==
Current time: 2022-12-11 18:36:20 (running for 00:01:47.00)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.6343298937038829 | Iter 80.000: -0.660967141428652 | Iter 40.000: -0.6913125067949295 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss | 

 84%|████████▍ | 252/300 [01:37<00:13,  3.44it/s]
 84%|████████▍ | 253/300 [01:37<00:12,  3.67it/s]
 85%|████████▍ | 254/300 [01:37<00:11,  3.88it/s]
 85%|████████▌ | 255/300 [01:37<00:11,  4.06it/s]
 85%|████████▌ | 256/300 [01:38<00:13,  3.16it/s]
 86%|████████▌ | 257/300 [01:38<00:12,  3.49it/s]
 86%|████████▌ | 258/300 [01:38<00:11,  3.78it/s]
 86%|████████▋ | 259/300 [01:39<00:10,  4.01it/s]
 87%|████████▋ | 260/300 [01:39<00:09,  4.18it/s]
 87%|████████▋ | 261/300 [01:39<00:12,  3.22it/s]
 87%|████████▋ | 262/300 [01:39<00:10,  3.52it/s]
 88%|████████▊ | 263/300 [01:40<00:09,  3.80it/s]
 88%|████████▊ | 264/300 [01:40<00:08,  4.03it/s]
 88%|████████▊ | 265/300 [01:40<00:08,  4.17it/s]
 89%|████████▊ | 266/300 [01:41<00:10,  3.22it/s]
 89%|████████▉ | 267/300 [01:41<00:09,  3.54it/s]
 89%|████████▉ | 268/300 [01:41<00:08,  3.82it/s]
 90%|████████▉ | 269/300 [01:41<00:07,  4.03it/s]
 90%|█████████ | 270/300 [01:41<00:07,  4.21it/s]
 90%|█████████ | 271/300 [01:42<00:08,  3.26it/s]


== Status ==
Current time: 2022-12-11 18:36:26 (running for 00:01:52.41)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.6343298937038829 | Iter 80.000: -0.660967141428652 | Iter 40.000: -0.6913125067949295 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss | 

 91%|█████████ | 272/300 [01:42<00:08,  3.46it/s]
 91%|█████████ | 273/300 [01:42<00:07,  3.66it/s]
 91%|█████████▏| 274/300 [01:43<00:06,  3.89it/s]
 92%|█████████▏| 275/300 [01:43<00:06,  4.07it/s]
 92%|█████████▏| 276/300 [01:43<00:07,  3.16it/s]
 92%|█████████▏| 277/300 [01:44<00:06,  3.50it/s]
 93%|█████████▎| 278/300 [01:44<00:06,  3.57it/s]
 93%|█████████▎| 279/300 [01:44<00:06,  3.40it/s]
 93%|█████████▎| 280/300 [01:44<00:06,  3.26it/s]
 94%|█████████▎| 281/300 [01:45<00:07,  2.45it/s]
 94%|█████████▍| 282/300 [01:45<00:06,  2.84it/s]
 94%|█████████▍| 283/300 [01:46<00:05,  3.22it/s]
 95%|█████████▍| 284/300 [01:46<00:04,  3.54it/s]
 95%|█████████▌| 285/300 [01:46<00:03,  3.75it/s]
 95%|█████████▌| 286/300 [01:46<00:04,  3.03it/s]
 96%|█████████▌| 287/300 [01:47<00:03,  3.37it/s]
 96%|█████████▌| 288/300 [01:47<00:03,  3.65it/s]
 96%|█████████▋| 289/300 [01:47<00:02,  3.89it/s]


== Status ==
Current time: 2022-12-11 18:36:31 (running for 00:01:57.59)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.6343298937038829 | Iter 80.000: -0.660967141428652 | Iter 40.000: -0.6913125067949295 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss | 

 97%|█████████▋| 290/300 [01:47<00:02,  4.06it/s]
 97%|█████████▋| 291/300 [01:48<00:02,  3.17it/s]
 97%|█████████▋| 292/300 [01:48<00:02,  3.49it/s]
 98%|█████████▊| 293/300 [01:48<00:01,  3.77it/s]
 98%|█████████▊| 294/300 [01:48<00:01,  4.00it/s]
 98%|█████████▊| 295/300 [01:49<00:01,  4.16it/s]
 99%|█████████▊| 296/300 [01:49<00:01,  3.19it/s]
 99%|█████████▉| 297/300 [01:49<00:00,  3.51it/s]
 99%|█████████▉| 298/300 [01:50<00:00,  3.78it/s]
100%|█████████▉| 299/300 [01:50<00:00,  3.98it/s]
2022-12-11 18:36:34,172	INFO tune.py:777 -- Total run time: 120.46 seconds (120.30 seconds for the tuning loop).


== Status ==
Current time: 2022-12-11 18:36:34 (running for 00:02:00.32)
Memory usage on this node: 4.5/23.5 GiB 
Using AsyncHyperBand: num_stopped=16
Bracket: Iter 160.000: -0.6343298937038829 | Iter 80.000: -0.660967141428652 | Iter 40.000: -0.6913125067949295 | Iter 20.000: -0.6915521629465123 | Iter 10.000: -0.7034606620048482
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-34-33
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_it

  0%|          | 0/300 [00:00<?, ?it/s]0m 


Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_5b46f_00000,2022-12-11_18-37-34,True,,3a683b3e25ee4e69b6c18a53e8ae4737,VGI-DEKSPTOP-TIJMEn,40,0.691313,172.29.86.26,29031,True,57.5582,0.88472,57.5582,1670780254,0,,40,5b46f_00000,0.109583
run_one_training_5b46f_00001,2022-12-11_18-36-53,True,,b84073c92c374f3f8ae5bff9cbb27db1,VGI-DEKSPTOP-TIJMEn,10,0.703314,172.29.86.26,29094,True,10.4872,0.661259,10.4872,1670780213,0,,10,5b46f_00001,0.290506
run_one_training_5b46f_00002,2022-12-11_18-37-07,True,,69971149d5c8481591c1d9fa97ee22ed,VGI-DEKSPTOP-TIJMEn,10,0.70462,172.29.86.26,29096,True,24.5485,1.63075,24.5485,1670780227,0,,10,5b46f_00002,0.323279
run_one_training_5b46f_00003,2022-12-11_18-39-19,True,,9b02af513c194cda95ba8a8a9b8e53eb,VGI-DEKSPTOP-TIJMEn,300,0.00240468,172.29.86.26,29098,True,156.547,0.394311,156.547,1670780359,0,,300,5b46f_00003,0.295656
run_one_training_5b46f_00004,2022-12-11_18-37-34,True,,744a31f1cb5b432cbcebf37d60f67ed1,VGI-DEKSPTOP-TIJMEn,40,0.691081,172.29.86.26,29100,True,51.6423,0.917559,51.6423,1670780254,0,,40,5b46f_00004,0.356307
run_one_training_5b46f_00005,2022-12-11_18-36-52,True,,daede450429c435999b7f36dec0c5465,VGI-DEKSPTOP-TIJMEn,10,0.70395,172.29.86.26,29102,True,9.57627,0.519464,9.57627,1670780212,0,,10,5b46f_00005,0.3282
run_one_training_5b46f_00006,2022-12-11_18-36-52,True,,bd948607c28846c58e85551e9edecd70,VGI-DEKSPTOP-TIJMEn,10,0.704667,172.29.86.26,29104,True,9.75562,0.54667,9.75562,1670780212,0,,10,5b46f_00006,0.289625
run_one_training_5b46f_00007,2022-12-11_18-37-28,True,,3cb7d2658adb4d228f2b5c21807143bb,VGI-DEKSPTOP-TIJMEn,40,0.691404,172.29.86.26,29106,True,45.4163,0.802779,45.4163,1670780248,0,,40,5b46f_00007,0.315832
run_one_training_5b46f_00008,2022-12-11_18-36-56,True,,c72dda79e0824c5c8677812af0e10889,VGI-DEKSPTOP-TIJMEn,10,0.704649,172.29.86.26,29108,True,13.3357,0.759133,13.3357,1670780216,0,,10,5b46f_00008,0.277964
run_one_training_5b46f_00009,2022-12-11_18-37-57,True,,ea9b5bb1784447508fbd4b5ec69f2d9f,VGI-DEKSPTOP-TIJMEn,300,0.0660503,172.29.86.26,29110,True,74.4202,0.11617,74.4202,1670780277,0,,300,5b46f_00009,0.398357


  0%|          | 1/300 [00:03<18:14,  3.66s/it]


[2m[36m(run_one_training pid=29119)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.01, 'sgd_momentum': 0.8, 'scheduler_gamma': 0.5, 'model_embedding_size': 8, 'model_attention_heads': 4, 'model_layers': 1, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': 'a56e056b155d4fc98a92a31581249ddf'}}}
[2m[36m(run_one_training pid=29114)[0m creating model model_config={'batch_size': 8, 'learning_rate': 0.001, 'sgd_momentum': 0.9, 'scheduler_gamma': 0.5, 'model_embedding_size': 16, 'model_attention_heads': 1, 'model_layers': 3, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': 'a56e056b155d4fc98a92a31581249ddf'}}}
[2m[36m(run_one_training pid=29098)[0m creating model model_config={'batch_size': 2, 'learning_rate': 0.01, 'sgd_momentum': 0.9, 'scheduler_gamma': 0

  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  1%|          | 2/300 [00:05<12:49,  2.58s/it]


== Status ==
Current time: 2022-12-11 18:36:43 (running for 00:00:08.75)
Memory usage on this node: 8.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                    |     

  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 


[2m[36m(run_one_training pid=29110)[0m Loading model...
[2m[36m(run_one_training pid=29100)[0m Loading model...
[2m[36m(run_one_training pid=29116)[0m Loading model...


  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 1/300 [00:01<06:37,  1.33s/it]
  0%|          | 1/300 [00:01<06:32,  1.31s/it]
  0%|          | 1/300 [00:01<07:57,  1.60s/it]
  1%|          | 2/300 [00:01<03:44,  1.32it/s]
  0%|          | 1/300 [00:01<06:07,  1.23s/it]
  1%|          | 2/300 [00:01<03:44,  1.33it/s]
  1%|          | 3/300 [00:07<10:49,  2.19s/it]
  0%|          | 1/300 [00:01<09:25,  1.89s/it]
  1%|          | 3/300 [00:02<02:50,  1.74it/s]
  1%|          | 2/300 [00:01<04:25,  1.12it/s]
  1%|          | 2/300 [00:01<03:16,  1.51it/s]
  1%|          | 3/300 [00:02<02:48,  1.77it/s]
  1%|          | 3/300 [00:01<02:28,  1.99it/s]
  1%|▏         | 4/300 [00:02<02:24,  2.05it/s]
  1%|          | 3/300 [00:02<03:16,  1.52it/s]
  1%|▏         | 4/300 [00:02<02:24,  2.04it/s]
  1%|▏         | 4/300 [00:02<02:07,  2.32it/s]
  0%|          | 1/300 [00:02<12:25,  2.49s/it]
  0%|  

== Status ==
Current time: 2022-12-11 18:36:48 (running for 00:00:13.77)
Memory usage on this node: 8.9/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                    |     

  1%|▏         | 4/300 [00:05<05:08,  1.04s/it]
  3%|▎         | 10/300 [00:04<01:54,  2.53it/s]
  2%|▏         | 5/300 [00:04<03:42,  1.33it/s]
  3%|▎         | 8/300 [00:05<02:45,  1.76it/s]
  3%|▎         | 10/300 [00:05<02:19,  2.08it/s]
  1%|          | 3/300 [00:05<07:24,  1.50s/it]
  2%|▏         | 5/300 [00:05<04:16,  1.15it/s]
  1%|          | 2/300 [00:05<11:41,  2.36s/it]
  2%|▏         | 5/300 [00:10<09:48,  1.99s/it]
  3%|▎         | 9/300 [00:05<02:24,  2.01it/s]
  1%|▏         | 4/300 [00:05<06:06,  1.24s/it]
  2%|▏         | 5/300 [00:05<05:09,  1.05s/it]
  2%|▏         | 5/300 [00:05<04:39,  1.06it/s]
  0%|          | 1/300 [00:06<30:26,  6.11s/it]
  1%|          | 3/300 [00:05<08:48,  1.78s/it]
  1%|▏         | 4/300 [00:06<06:03,  1.23s/it]
  4%|▎         | 11/300 [00:05<02:54,  1.66it/s]
  4%|▍         | 12/300 [00:06<02:25,  1.98it/s]
  4%|▎         | 11/300 [00:06<03:15,  1.48it/s]
  2%|▏         | 5/300 [00:06<05:40,  1.16s/it]
  4%|▍         | 13/300 [00:06<02:0

== Status ==
Current time: 2022-12-11 18:36:53 (running for 00:00:18.77)
Memory usage on this node: 7.9/23.5 GiB 
Using AsyncHyperBand: num_stopped=4
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: -0.6963020525872707 | Iter 10.000: -0.7030476263413827
Resources requested: 12.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (12 RUNNING, 4 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                    

  3%|▎         | 8/300 [00:09<05:10,  1.06s/it]
  6%|▌         | 18/300 [00:10<02:21,  1.99it/s]
  2%|▏         | 7/300 [00:09<06:38,  1.36s/it]
  2%|▏         | 6/300 [00:09<08:24,  1.72s/it]
  3%|▎         | 9/300 [00:10<04:59,  1.03s/it]
  2%|▏         | 5/300 [00:09<08:33,  1.74s/it]
  6%|▋         | 19/300 [00:10<02:07,  2.21it/s]
  3%|▎         | 9/300 [00:10<04:33,  1.07it/s]
  1%|          | 3/300 [00:10<15:09,  3.06s/it]
  7%|▋         | 21/300 [00:10<02:31,  1.85it/s]
  2%|▏         | 7/300 [00:10<06:56,  1.42s/it]
  3%|▎         | 8/300 [00:10<05:53,  1.21s/it]
  7%|▋         | 22/300 [00:10<02:04,  2.24it/s]
  2%|▏         | 7/300 [00:16<11:07,  2.28s/it]
  1%|          | 2/300 [00:10<24:16,  4.89s/it]
  3%|▎         | 10/300 [00:11<04:36,  1.05it/s]
  8%|▊         | 23/300 [00:10<01:46,  2.60it/s]
  8%|▊         | 24/300 [00:10<01:34,  2.91it/s]
  2%|▏         | 6/300 [00:11<09:19,  1.90s/it]
  8%|▊         | 25/300 [00:11<01:22,  3.35it/s]
  3%|▎         | 9/300 [00:11<05

== Status ==
Current time: 2022-12-11 18:36:58 (running for 00:00:23.80)
Memory usage on this node: 6.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: -0.697956998211642 | Iter 10.000: -0.7027296094844739
Resources requested: 8.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (8 RUNNING, 8 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                       

  3%|▎         | 8/300 [00:14<07:30,  1.54s/it]
  5%|▌         | 15/300 [00:15<03:41,  1.29it/s]
  4%|▍         | 13/300 [00:15<04:24,  1.09it/s]
  3%|▎         | 10/300 [00:15<05:39,  1.17s/it]
 12%|█▏        | 36/300 [00:15<01:47,  2.46it/s]
 12%|█▏        | 37/300 [00:15<01:30,  2.91it/s]
 13%|█▎        | 38/300 [00:15<01:16,  3.43it/s]
  5%|▍         | 14/300 [00:15<04:03,  1.17it/s]
 13%|█▎        | 39/300 [00:15<01:07,  3.88it/s]
  3%|▎         | 9/300 [00:15<06:51,  1.41s/it]
 13%|█▎        | 40/300 [00:15<01:01,  4.25it/s]
  5%|▌         | 16/300 [00:16<04:20,  1.09it/s]
  5%|▌         | 15/300 [00:16<03:42,  1.28it/s]
  1%|▏         | 4/300 [00:16<17:20,  3.52s/it]
 14%|█▎        | 41/300 [00:16<01:33,  2.76it/s]
  6%|▌         | 17/300 [00:17<03:57,  1.19it/s]
 14%|█▍        | 42/300 [00:16<01:19,  3.23it/s]
  4%|▎         | 11/300 [00:22<08:47,  1.82s/it]
  3%|▎         | 10/300 [00:16<06:19,  1.31s/it]
  4%|▎         | 11/300 [00:17<06:39,  1.38s/it]
 14%|█▍        | 43/300

== Status ==
Current time: 2022-12-11 18:37:03 (running for 00:00:28.87)
Memory usage on this node: 6.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6733859653274218 | Iter 20.000: -0.6963020525872707 | Iter 10.000: -0.7015750880042713
Resources requested: 8.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (8 RUNNING, 8 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|       

  6%|▋         | 19/300 [00:20<03:37,  1.29it/s]
 17%|█▋        | 52/300 [00:19<01:21,  3.04it/s]
  5%|▍         | 14/300 [00:19<05:10,  1.09s/it]
  7%|▋         | 21/300 [00:20<04:01,  1.16it/s]
 18%|█▊        | 53/300 [00:19<01:12,  3.42it/s]
  4%|▍         | 12/300 [00:19<06:48,  1.42s/it]
 18%|█▊        | 54/300 [00:20<01:04,  3.79it/s]
 18%|█▊        | 55/300 [00:20<00:59,  4.15it/s]
  5%|▍         | 14/300 [00:26<06:40,  1.40s/it]
  7%|▋         | 20/300 [00:20<03:31,  1.32it/s]
  3%|▎         | 8/300 [00:20<10:23,  2.13s/it]
  7%|▋         | 22/300 [00:20<03:44,  1.24it/s]
  5%|▌         | 15/300 [00:20<04:57,  1.04s/it]
 19%|█▊        | 56/300 [00:20<01:34,  2.59it/s]
  8%|▊         | 23/300 [00:21<03:32,  1.30it/s]
 19%|█▉        | 57/300 [00:21<01:19,  3.07it/s]
  4%|▍         | 13/300 [00:21<06:23,  1.34s/it]
 19%|█▉        | 58/300 [00:21<01:10,  3.44it/s]
  5%|▌         | 15/300 [00:27<06:19,  1.33s/it]
 20%|█▉        | 59/300 [00:21<01:04,  3.74it/s]
  7%|▋         | 21/3

== Status ==
Current time: 2022-12-11 18:37:08 (running for 00:00:34.00)
Memory usage on this node: 6.4/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6733859653274218 | Iter 20.000: -0.6938088935179014 | Iter 10.000: -0.7021454498171806
Resources requested: 7.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (7 RUNNING, 9 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|       

  6%|▌         | 17/300 [00:30<06:55,  1.47s/it]
  9%|▉         | 28/300 [00:25<03:14,  1.40it/s]
  6%|▋         | 19/300 [00:25<04:45,  1.02s/it]
 24%|██▎       | 71/300 [00:25<01:18,  2.91it/s]
  5%|▌         | 16/300 [00:25<06:40,  1.41s/it]
 24%|██▍       | 72/300 [00:25<01:04,  3.52it/s]
 10%|▉         | 29/300 [00:25<03:05,  1.46it/s]
 24%|██▍       | 73/300 [00:25<00:57,  3.98it/s]
  9%|▊         | 26/300 [00:25<04:00,  1.14it/s]
 25%|██▍       | 74/300 [00:25<00:50,  4.43it/s]
 25%|██▌       | 75/300 [00:25<00:46,  4.80it/s]
  6%|▌         | 18/300 [00:31<06:18,  1.34s/it]
  2%|▏         | 7/300 [00:25<15:55,  3.26s/it]
  7%|▋         | 20/300 [00:26<04:30,  1.03it/s]
 10%|█         | 30/300 [00:26<02:54,  1.55it/s]
  9%|▉         | 27/300 [00:26<03:36,  1.26it/s]
  6%|▌         | 17/300 [00:26<06:04,  1.29s/it]
 25%|██▌       | 76/300 [00:26<01:13,  3.05it/s]
 26%|██▌       | 77/300 [00:26<01:02,  3.58it/s]
 26%|██▌       | 78/300 [00:26<00:54,  4.07it/s]
  9%|▉         | 28/3

== Status ==
Current time: 2022-12-11 18:37:13 (running for 00:00:39.05)
Memory usage on this node: 6.4/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: None | Iter 80.000: -0.5675915057460467 | Iter 40.000: -0.6733859653274218 | Iter 20.000: -0.6913157344485322 | Iter 10.000: -0.7021454498171806
Resources requested: 7.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (7 RUNNING, 9 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 30%|███       | 90/300 [00:29<00:43,  4.78it/s]
 11%|█         | 32/300 [00:30<03:28,  1.28it/s]
  8%|▊         | 24/300 [00:30<04:15,  1.08it/s]
  7%|▋         | 21/300 [00:35<06:48,  1.47s/it]
 30%|███       | 91/300 [00:30<01:06,  3.14it/s]
 11%|█         | 33/300 [00:30<03:10,  1.40it/s]
 12%|█▏        | 36/300 [00:31<03:20,  1.32it/s]
 31%|███       | 92/300 [00:30<00:56,  3.65it/s]
  3%|▎         | 9/300 [00:30<13:27,  2.78s/it]
 31%|███       | 93/300 [00:30<00:50,  4.14it/s]
 31%|███▏      | 94/300 [00:30<00:44,  4.60it/s]
  8%|▊         | 25/300 [00:31<04:04,  1.13it/s]
 11%|█▏        | 34/300 [00:31<03:00,  1.47it/s]
  7%|▋         | 22/300 [00:36<06:08,  1.32s/it]
 12%|█▏        | 37/300 [00:31<03:03,  1.44it/s]
 32%|███▏      | 95/300 [00:31<00:42,  4.84it/s]
  7%|▋         | 21/300 [00:31<06:11,  1.33s/it]
 12%|█▏        | 35/300 [00:31<02:51,  1.55it/s]
 13%|█▎        | 38/300 [00:32<02:52,  1.52it/s]
 32%|███▏      | 96/300 [00:31<01:09,  2.95it/s]
 32%|███▏      | 97/3

== Status ==
Current time: 2022-12-11 18:37:18 (running for 00:00:44.07)
Memory usage on this node: 6.4/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: None | Iter 80.000: -0.5675915057460467 | Iter 40.000: -0.5159526181329662 | Iter 20.000: -0.6913157344485322 | Iter 10.000: -0.7015750880042713
Resources requested: 7.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (7 RUNNING, 9 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 36%|███▋      | 109/300 [00:34<00:43,  4.40it/s]
 10%|▉         | 29/300 [00:35<04:08,  1.09it/s]
 14%|█▍        | 43/300 [00:35<02:47,  1.53it/s]
 37%|███▋      | 110/300 [00:35<00:40,  4.74it/s]
  8%|▊         | 25/300 [00:35<04:58,  1.09s/it]
 15%|█▍        | 44/300 [00:36<02:38,  1.61it/s]
 37%|███▋      | 111/300 [00:35<01:02,  3.04it/s]
 10%|█         | 30/300 [00:36<03:51,  1.16it/s]
 37%|███▋      | 112/300 [00:35<00:52,  3.58it/s]
 38%|███▊      | 113/300 [00:35<00:46,  4.02it/s]
 15%|█▌        | 45/300 [00:36<02:30,  1.69it/s]
 38%|███▊      | 114/300 [00:36<00:40,  4.55it/s]
  9%|▊         | 26/300 [00:42<06:25,  1.41s/it]
 38%|███▊      | 115/300 [00:36<00:36,  5.02it/s]
 39%|███▊      | 116/300 [00:36<00:55,  3.31it/s]
  9%|▊         | 26/300 [00:36<05:43,  1.25s/it]
  4%|▎         | 11/300 [00:36<14:32,  3.02s/it]
 39%|███▉      | 117/300 [00:36<00:47,  3.89it/s]
 15%|█▌        | 46/300 [00:37<02:56,  1.44it/s]
 39%|███▉      | 118/300 [00:37<00:41,  4.36it/s]
 10%|█    

== Status ==
Current time: 2022-12-11 18:37:23 (running for 00:00:49.08)
Memory usage on this node: 6.2/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: None | Iter 80.000: -0.5675915057460467 | Iter 40.000: -0.6733859653274218 | Iter 20.000: -0.6913157344485322 | Iter 10.000: -0.7015750880042713
Resources requested: 6.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (6 RUNNING, 10 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

 43%|████▎     | 130/300 [00:39<00:32,  5.21it/s]
 12%|█▏        | 35/300 [00:40<03:35,  1.23it/s]
 10%|█         | 30/300 [00:45<04:50,  1.07s/it]
 17%|█▋        | 51/300 [00:40<02:57,  1.41it/s]
 44%|████▎     | 131/300 [00:40<00:54,  3.12it/s]
 10%|█         | 30/300 [00:40<04:30,  1.00s/it]
 44%|████▍     | 132/300 [00:40<00:45,  3.67it/s]
 17%|█▋        | 52/300 [00:41<02:42,  1.53it/s]
 44%|████▍     | 133/300 [00:40<00:39,  4.24it/s]
 45%|████▍     | 134/300 [00:40<00:35,  4.74it/s]
 45%|████▌     | 135/300 [00:41<00:33,  4.96it/s]
  4%|▍         | 13/300 [00:41<12:26,  2.60s/it]
 18%|█▊        | 53/300 [00:41<02:34,  1.60it/s]
 45%|████▌     | 136/300 [00:41<00:51,  3.19it/s]
 12%|█▏        | 36/300 [00:41<04:31,  1.03s/it]
 18%|█▊        | 54/300 [00:42<02:24,  1.71it/s]
 46%|████▌     | 137/300 [00:41<00:43,  3.72it/s]
 46%|████▌     | 138/300 [00:42<00:37,  4.29it/s]
 10%|█         | 31/300 [00:47<05:58,  1.33s/it]
 46%|████▋     | 139/300 [00:42<00:35,  4.55it/s]
 10%|█    

== Status ==
Current time: 2022-12-11 18:37:28 (running for 00:00:54.17)
Memory usage on this node: 5.9/23.5 GiB 
Using AsyncHyperBand: num_stopped=11
Bracket: Iter 160.000: None | Iter 80.000: -0.5675915057460467 | Iter 40.000: -0.6823508430582781 | Iter 20.000: -0.6913157344485322 | Iter 10.000: -0.7015750880042713
Resources requested: 5.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (5 RUNNING, 11 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

 20%|██        | 60/300 [00:45<02:18,  1.74it/s]
 50%|█████     | 151/300 [00:45<00:47,  3.14it/s]
 51%|█████     | 152/300 [00:45<00:42,  3.46it/s]
  5%|▌         | 15/300 [00:45<11:25,  2.41s/it]
 51%|█████     | 153/300 [00:45<00:37,  3.89it/s]
 12%|█▏        | 35/300 [00:51<04:39,  1.05s/it]
 51%|█████▏    | 154/300 [00:45<00:32,  4.50it/s]
 12%|█▏        | 35/300 [00:45<04:28,  1.01s/it]
 52%|█████▏    | 155/300 [00:46<00:29,  4.86it/s]
 20%|██        | 61/300 [00:47<02:52,  1.39it/s]
 52%|█████▏    | 156/300 [00:46<00:46,  3.11it/s]
 52%|█████▏    | 157/300 [00:46<00:39,  3.62it/s]
 53%|█████▎    | 158/300 [00:47<00:35,  4.00it/s]
 21%|██        | 62/300 [00:47<02:38,  1.50it/s]
 53%|█████▎    | 159/300 [00:47<00:31,  4.41it/s]
 53%|█████▎    | 160/300 [00:47<00:28,  4.91it/s]
 21%|██        | 63/300 [00:48<02:26,  1.62it/s]
 12%|█▏        | 36/300 [00:47<05:19,  1.21s/it]
 12%|█▏        | 36/300 [00:53<05:41,  1.29s/it]
 54%|█████▎    | 161/300 [00:47<00:43,  3.23it/s]
 21%|██▏ 

== Status ==
Current time: 2022-12-11 18:37:33 (running for 00:00:59.31)
Memory usage on this node: 5.9/23.5 GiB 
Using AsyncHyperBand: num_stopped=11
Bracket: Iter 160.000: -0.21886255157490572 | Iter 80.000: -0.5675915057460467 | Iter 40.000: -0.6823508430582781 | Iter 20.000: -0.6913157344485322 | Iter 10.000: -0.7015750880042713
Resources requested: 5.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (5 RUNNING, 11 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss 

 22%|██▏       | 67/300 [00:50<02:26,  1.59it/s]
 57%|█████▋    | 171/300 [00:50<00:37,  3.42it/s]
 57%|█████▋    | 172/300 [00:50<00:32,  3.91it/s]
 13%|█▎        | 39/300 [00:50<04:25,  1.02s/it]
 13%|█▎        | 39/300 [00:56<04:35,  1.06s/it]
 23%|██▎       | 68/300 [00:51<02:19,  1.66it/s]
 58%|█████▊    | 173/300 [00:50<00:29,  4.34it/s]
 58%|█████▊    | 174/300 [00:50<00:25,  4.89it/s]
 58%|█████▊    | 175/300 [00:50<00:23,  5.31it/s]
 23%|██▎       | 69/300 [00:51<02:13,  1.73it/s]
 59%|█████▊    | 176/300 [00:51<00:36,  3.39it/s]
  6%|▌         | 17/300 [00:51<12:06,  2.57s/it]
 59%|█████▉    | 177/300 [00:51<00:30,  4.05it/s]
 23%|██▎       | 70/300 [00:52<02:04,  1.84it/s]
 59%|█████▉    | 178/300 [00:51<00:25,  4.71it/s]
 60%|█████▉    | 179/300 [00:51<00:22,  5.38it/s]
 60%|██████    | 180/300 [00:51<00:20,  5.98it/s]
 24%|██▎       | 71/300 [00:52<02:22,  1.61it/s]
 60%|██████    | 181/300 [00:52<00:29,  4.02it/s]
 61%|██████    | 182/300 [00:52<00:26,  4.50it/s]
 61%|███

== Status ==
Current time: 2022-12-11 18:37:38 (running for 00:01:04.66)
Memory usage on this node: 5.4/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.21886255157490572 | Iter 80.000: -0.5675915057460467 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913157344485322 | Iter 10.000: -0.7015750880042713
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss 

 66%|██████▌   | 198/300 [00:55<00:18,  5.40it/s]
 66%|██████▋   | 199/300 [00:55<00:16,  5.94it/s]
 67%|██████▋   | 200/300 [00:55<00:15,  6.42it/s]
 26%|██▌       | 78/300 [00:56<01:54,  1.94it/s]
 67%|██████▋   | 201/300 [00:56<00:24,  4.03it/s]
 26%|██▋       | 79/300 [00:56<01:49,  2.03it/s]
 67%|██████▋   | 202/300 [00:56<00:20,  4.73it/s]
 68%|██████▊   | 203/300 [00:56<00:18,  5.35it/s]
 68%|██████▊   | 204/300 [00:56<00:16,  5.94it/s]
 27%|██▋       | 80/300 [00:57<01:44,  2.11it/s]
 68%|██████▊   | 205/300 [00:56<00:14,  6.42it/s]
 69%|██████▊   | 206/300 [00:57<00:22,  4.27it/s]
 69%|██████▉   | 207/300 [00:57<00:18,  4.99it/s]
 27%|██▋       | 81/300 [00:57<02:01,  1.80it/s]
 69%|██████▉   | 208/300 [00:57<00:16,  5.73it/s]
 70%|██████▉   | 209/300 [00:57<00:14,  6.26it/s]
 70%|███████   | 210/300 [00:57<00:13,  6.79it/s]
 27%|██▋       | 82/300 [00:58<01:50,  1.97it/s]
 70%|███████   | 211/300 [00:58<00:20,  4.40it/s]
 71%|███████   | 212/300 [00:58<00:17,  5.10it/s]
 28%|

== Status ==
Current time: 2022-12-11 18:37:44 (running for 00:01:09.77)
Memory usage on this node: 5.0/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.21886255157490572 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss 

 29%|██▉       | 88/300 [01:01<01:38,  2.14it/s]
 75%|███████▌  | 226/300 [01:00<00:17,  4.15it/s]
 76%|███████▌  | 227/300 [01:01<00:15,  4.85it/s]
 30%|██▉       | 89/300 [01:01<01:36,  2.19it/s]
 76%|███████▌  | 228/300 [01:01<00:13,  5.50it/s]
 76%|███████▋  | 229/300 [01:01<00:11,  6.06it/s]
 77%|███████▋  | 230/300 [01:01<00:10,  6.58it/s]
 30%|███       | 90/300 [01:02<01:33,  2.25it/s]
 77%|███████▋  | 231/300 [01:01<00:16,  4.29it/s]
 77%|███████▋  | 232/300 [01:01<00:13,  5.02it/s]
 78%|███████▊  | 233/300 [01:02<00:11,  5.74it/s]
 78%|███████▊  | 234/300 [01:02<00:10,  6.42it/s]
 30%|███       | 91/300 [01:02<01:51,  1.88it/s]
 78%|███████▊  | 235/300 [01:02<00:09,  6.90it/s]
 31%|███       | 92/300 [01:03<01:43,  2.01it/s]
 79%|███████▊  | 236/300 [01:02<00:14,  4.39it/s]
 79%|███████▉  | 237/300 [01:02<00:12,  5.09it/s]
 79%|███████▉  | 238/300 [01:02<00:10,  5.80it/s]
 80%|███████▉  | 239/300 [01:03<00:09,  6.43it/s]
 31%|███       | 93/300 [01:03<01:37,  2.13it/s]
 80%|█

== Status ==
Current time: 2022-12-11 18:37:49 (running for 00:01:14.86)
Memory usage on this node: 5.0/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.21886255157490572 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 84%|████████▍ | 253/300 [01:05<00:08,  5.74it/s]
 85%|████████▍ | 254/300 [01:05<00:07,  6.29it/s]
 33%|███▎      | 99/300 [01:06<01:29,  2.25it/s]
 85%|████████▌ | 255/300 [01:05<00:06,  6.80it/s]
 33%|███▎      | 100/300 [01:06<01:25,  2.34it/s]
 85%|████████▌ | 256/300 [01:06<00:10,  4.35it/s]
 86%|████████▌ | 257/300 [01:06<00:08,  5.05it/s]
 86%|████████▌ | 258/300 [01:06<00:07,  5.72it/s]
 86%|████████▋ | 259/300 [01:06<00:06,  6.37it/s]
 87%|████████▋ | 260/300 [01:06<00:05,  6.94it/s]
 34%|███▎      | 101/300 [01:07<01:44,  1.91it/s]
 87%|████████▋ | 261/300 [01:07<00:08,  4.43it/s]
 87%|████████▋ | 262/300 [01:07<00:07,  5.17it/s]
 34%|███▍      | 102/300 [01:07<01:36,  2.06it/s]
 88%|████████▊ | 263/300 [01:07<00:06,  5.86it/s]
 88%|████████▊ | 264/300 [01:07<00:05,  6.42it/s]
 88%|████████▊ | 265/300 [01:07<00:05,  6.93it/s]
 34%|███▍      | 103/300 [01:08<01:30,  2.18it/s]
 35%|███▍      | 104/300 [01:08<01:26,  2.28it/s]
 89%|████████▊ | 266/300 [01:08<00:07,  4.38it/s]
 

== Status ==
Current time: 2022-12-11 18:37:54 (running for 00:01:20.14)
Memory usage on this node: 5.0/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.21886255157490572 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 94%|█████████▍| 283/300 [01:11<00:02,  5.77it/s]
 95%|█████████▍| 284/300 [01:11<00:02,  6.34it/s]
 95%|█████████▌| 285/300 [01:11<00:02,  6.77it/s]
 37%|███▋      | 111/300 [01:12<01:38,  1.92it/s]
 95%|█████████▌| 286/300 [01:11<00:03,  4.39it/s]
 96%|█████████▌| 287/300 [01:11<00:02,  5.08it/s]
 96%|█████████▌| 288/300 [01:11<00:02,  5.77it/s]
 37%|███▋      | 112/300 [01:12<01:30,  2.07it/s]
 96%|█████████▋| 289/300 [01:12<00:01,  6.38it/s]
 97%|█████████▋| 290/300 [01:12<00:01,  6.83it/s]
 38%|███▊      | 113/300 [01:12<01:25,  2.18it/s]
 97%|█████████▋| 291/300 [01:12<00:02,  4.22it/s]
 97%|█████████▋| 292/300 [01:12<00:01,  4.80it/s]
 38%|███▊      | 114/300 [01:13<01:21,  2.27it/s]
 98%|█████████▊| 293/300 [01:12<00:01,  5.52it/s]
 98%|█████████▊| 294/300 [01:13<00:00,  6.14it/s]
 98%|█████████▊| 295/300 [01:13<00:00,  6.67it/s]
 38%|███▊      | 115/300 [01:13<01:18,  2.34it/s]
 99%|█████████▊| 296/300 [01:13<00:00,  4.19it/s]
 99%|█████████▉| 297/300 [01:13<00:00,  4.88it/s]


== Status ==
Current time: 2022-12-11 18:38:00 (running for 00:01:25.82)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.21886255157490572 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 41%|████      | 122/300 [01:17<01:22,  2.16it/s]
 41%|████      | 123/300 [01:17<01:17,  2.28it/s]
 41%|████▏     | 124/300 [01:17<01:13,  2.38it/s]
 42%|████▏     | 125/300 [01:18<01:10,  2.46it/s]
 42%|████▏     | 126/300 [01:18<01:25,  2.04it/s]
 42%|████▏     | 127/300 [01:19<01:18,  2.19it/s]
 43%|████▎     | 128/300 [01:19<01:14,  2.31it/s]
 43%|████▎     | 129/300 [01:20<01:11,  2.40it/s]
 43%|████▎     | 130/300 [01:20<01:08,  2.48it/s]
 44%|████▎     | 131/300 [01:21<01:23,  2.03it/s]
 44%|████▍     | 132/300 [01:21<01:16,  2.19it/s]
 44%|████▍     | 133/300 [01:21<01:12,  2.30it/s]


== Status ==
Current time: 2022-12-11 18:38:05 (running for 00:01:30.99)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.21886255157490572 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 45%|████▍     | 134/300 [01:22<01:09,  2.37it/s]
 45%|████▌     | 135/300 [01:22<01:07,  2.45it/s]
 45%|████▌     | 136/300 [01:23<01:21,  2.02it/s]
 46%|████▌     | 137/300 [01:23<01:16,  2.14it/s]
 46%|████▌     | 138/300 [01:24<01:11,  2.27it/s]
 46%|████▋     | 139/300 [01:24<01:07,  2.37it/s]
 47%|████▋     | 140/300 [01:24<01:05,  2.46it/s]
 47%|████▋     | 141/300 [01:25<01:18,  2.02it/s]
 47%|████▋     | 142/300 [01:25<01:13,  2.16it/s]
 48%|████▊     | 143/300 [01:26<01:08,  2.28it/s]
 48%|████▊     | 144/300 [01:26<01:05,  2.38it/s]
 48%|████▊     | 145/300 [01:27<01:03,  2.44it/s]


== Status ==
Current time: 2022-12-11 18:38:10 (running for 00:01:36.21)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.21886255157490572 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 49%|████▊     | 146/300 [01:27<01:17,  1.99it/s]
 49%|████▉     | 147/300 [01:28<01:11,  2.14it/s]
 49%|████▉     | 148/300 [01:28<01:07,  2.26it/s]
 50%|████▉     | 149/300 [01:28<01:03,  2.36it/s]
 50%|█████     | 150/300 [01:29<01:02,  2.40it/s]
 50%|█████     | 151/300 [01:30<01:15,  1.98it/s]
 51%|█████     | 152/300 [01:30<01:09,  2.14it/s]
 51%|█████     | 153/300 [01:30<01:04,  2.27it/s]
 51%|█████▏    | 154/300 [01:31<01:02,  2.35it/s]
 52%|█████▏    | 155/300 [01:31<00:59,  2.43it/s]
 52%|█████▏    | 156/300 [01:32<01:12,  1.98it/s]


== Status ==
Current time: 2022-12-11 18:38:15 (running for 00:01:41.44)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.21886255157490572 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 52%|█████▏    | 157/300 [01:32<01:07,  2.13it/s]
 53%|█████▎    | 158/300 [01:33<01:03,  2.25it/s]
 53%|█████▎    | 159/300 [01:33<01:00,  2.34it/s]
 53%|█████▎    | 160/300 [01:33<00:58,  2.39it/s]
 54%|█████▎    | 161/300 [01:34<01:10,  1.97it/s]
 54%|█████▍    | 162/300 [01:34<01:05,  2.11it/s]
 54%|█████▍    | 163/300 [01:35<01:01,  2.24it/s]
 55%|█████▍    | 164/300 [01:35<00:58,  2.33it/s]
 55%|█████▌    | 165/300 [01:36<00:56,  2.40it/s]
 55%|█████▌    | 166/300 [01:36<01:07,  2.00it/s]
 56%|█████▌    | 167/300 [01:37<01:02,  2.13it/s]
 56%|█████▌    | 168/300 [01:37<00:58,  2.27it/s]


== Status ==
Current time: 2022-12-11 18:38:21 (running for 00:01:46.73)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 56%|█████▋    | 169/300 [01:37<00:55,  2.36it/s]
 57%|█████▋    | 170/300 [01:38<00:53,  2.44it/s]
 57%|█████▋    | 171/300 [01:39<01:04,  2.00it/s]
 57%|█████▋    | 172/300 [01:39<00:59,  2.15it/s]
 58%|█████▊    | 173/300 [01:39<00:56,  2.27it/s]
 58%|█████▊    | 174/300 [01:40<00:53,  2.35it/s]
 58%|█████▊    | 175/300 [01:40<00:52,  2.40it/s]
 59%|█████▊    | 176/300 [01:41<01:03,  1.95it/s]
 59%|█████▉    | 177/300 [01:41<00:58,  2.11it/s]
 59%|█████▉    | 178/300 [01:42<00:54,  2.24it/s]
 60%|█████▉    | 179/300 [01:42<00:51,  2.35it/s]
 60%|██████    | 180/300 [01:42<00:49,  2.43it/s]


== Status ==
Current time: 2022-12-11 18:38:26 (running for 00:01:52.02)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 60%|██████    | 181/300 [01:43<00:59,  2.00it/s]
 61%|██████    | 182/300 [01:43<00:55,  2.12it/s]
 61%|██████    | 183/300 [01:44<00:52,  2.22it/s]
 61%|██████▏   | 184/300 [01:44<00:49,  2.33it/s]
 62%|██████▏   | 185/300 [01:45<00:47,  2.40it/s]
 62%|██████▏   | 186/300 [01:45<00:57,  1.99it/s]
 62%|██████▏   | 187/300 [01:46<00:52,  2.15it/s]
 63%|██████▎   | 188/300 [01:46<00:49,  2.28it/s]
 63%|██████▎   | 189/300 [01:47<00:46,  2.37it/s]
 63%|██████▎   | 190/300 [01:47<00:45,  2.41it/s]
 64%|██████▎   | 191/300 [01:48<00:54,  2.00it/s]


== Status ==
Current time: 2022-12-11 18:38:31 (running for 00:01:57.23)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 64%|██████▍   | 192/300 [01:48<00:49,  2.16it/s]
 64%|██████▍   | 193/300 [01:48<00:46,  2.29it/s]
 65%|██████▍   | 194/300 [01:49<00:44,  2.38it/s]
 65%|██████▌   | 195/300 [01:49<00:43,  2.43it/s]
 65%|██████▌   | 196/300 [01:50<00:54,  1.92it/s]
 66%|██████▌   | 197/300 [01:50<00:48,  2.10it/s]
 66%|██████▌   | 198/300 [01:51<00:45,  2.25it/s]
 66%|██████▋   | 199/300 [01:51<00:42,  2.36it/s]
 67%|██████▋   | 200/300 [01:51<00:40,  2.46it/s]
 67%|██████▋   | 201/300 [01:52<00:48,  2.03it/s]
 67%|██████▋   | 202/300 [01:52<00:44,  2.19it/s]
 68%|██████▊   | 203/300 [01:53<00:41,  2.32it/s]


== Status ==
Current time: 2022-12-11 18:38:36 (running for 00:02:02.46)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+------------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |       l

 68%|██████▊   | 204/300 [01:53<00:39,  2.41it/s]
 68%|██████▊   | 205/300 [01:54<00:38,  2.44it/s]
 69%|██████▊   | 206/300 [01:54<00:46,  2.03it/s]
 69%|██████▉   | 207/300 [01:55<00:42,  2.19it/s]
 69%|██████▉   | 208/300 [01:55<00:39,  2.31it/s]
 70%|██████▉   | 209/300 [01:55<00:37,  2.42it/s]
 70%|███████   | 210/300 [01:56<00:36,  2.48it/s]
 70%|███████   | 211/300 [01:56<00:43,  2.04it/s]
 71%|███████   | 212/300 [01:57<00:40,  2.18it/s]
 71%|███████   | 213/300 [01:57<00:38,  2.29it/s]
 71%|███████▏  | 214/300 [01:58<00:36,  2.37it/s]
 72%|███████▏  | 215/300 [01:58<00:34,  2.46it/s]


== Status ==
Current time: 2022-12-11 18:38:41 (running for 00:02:07.65)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 72%|███████▏  | 216/300 [01:59<00:42,  1.99it/s]
 72%|███████▏  | 217/300 [01:59<00:38,  2.14it/s]
 73%|███████▎  | 218/300 [02:00<00:36,  2.24it/s]
 73%|███████▎  | 219/300 [02:00<00:34,  2.34it/s]
 73%|███████▎  | 220/300 [02:00<00:33,  2.41it/s]
 74%|███████▎  | 221/300 [02:01<00:39,  1.98it/s]
 74%|███████▍  | 222/300 [02:01<00:36,  2.12it/s]
 74%|███████▍  | 223/300 [02:02<00:34,  2.25it/s]
 75%|███████▍  | 224/300 [02:02<00:32,  2.35it/s]
 75%|███████▌  | 225/300 [02:03<00:30,  2.43it/s]
 75%|███████▌  | 226/300 [02:03<00:36,  2.02it/s]


== Status ==
Current time: 2022-12-11 18:38:47 (running for 00:02:12.85)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+------------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |       l

 76%|███████▌  | 227/300 [02:04<00:33,  2.17it/s]
 76%|███████▌  | 228/300 [02:04<00:31,  2.28it/s]
 76%|███████▋  | 229/300 [02:04<00:29,  2.38it/s]
 77%|███████▋  | 230/300 [02:05<00:28,  2.45it/s]
 77%|███████▋  | 231/300 [02:05<00:33,  2.03it/s]
 77%|███████▋  | 232/300 [02:06<00:31,  2.16it/s]
 78%|███████▊  | 233/300 [02:06<00:29,  2.28it/s]
 78%|███████▊  | 234/300 [02:07<00:27,  2.37it/s]
 78%|███████▊  | 235/300 [02:07<00:26,  2.45it/s]
 79%|███████▊  | 236/300 [02:08<00:31,  2.01it/s]
 79%|███████▉  | 237/300 [02:08<00:28,  2.17it/s]
 79%|███████▉  | 238/300 [02:08<00:26,  2.30it/s]


== Status ==
Current time: 2022-12-11 18:38:52 (running for 00:02:18.05)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+------------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |       l

 80%|███████▉  | 239/300 [02:09<00:25,  2.40it/s]
 80%|████████  | 240/300 [02:09<00:24,  2.47it/s]
 80%|████████  | 241/300 [02:10<00:28,  2.04it/s]
 81%|████████  | 242/300 [02:10<00:26,  2.20it/s]
 81%|████████  | 243/300 [02:11<00:24,  2.32it/s]
 81%|████████▏ | 244/300 [02:11<00:23,  2.42it/s]
 82%|████████▏ | 245/300 [02:11<00:21,  2.51it/s]
 82%|████████▏ | 246/300 [02:12<00:26,  2.05it/s]
 82%|████████▏ | 247/300 [02:12<00:24,  2.20it/s]
 83%|████████▎ | 248/300 [02:13<00:22,  2.32it/s]
 83%|████████▎ | 249/300 [02:13<00:21,  2.42it/s]
 83%|████████▎ | 250/300 [02:14<00:20,  2.48it/s]


== Status ==
Current time: 2022-12-11 18:38:57 (running for 00:02:23.19)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+------------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |       l

 84%|████████▎ | 251/300 [02:14<00:24,  2.00it/s]
 84%|████████▍ | 252/300 [02:15<00:22,  2.14it/s]
 84%|████████▍ | 253/300 [02:15<00:20,  2.29it/s]
 85%|████████▍ | 254/300 [02:15<00:19,  2.40it/s]
 85%|████████▌ | 255/300 [02:16<00:18,  2.48it/s]
 85%|████████▌ | 256/300 [02:16<00:21,  2.04it/s]
 86%|████████▌ | 257/300 [02:17<00:19,  2.20it/s]
 86%|████████▌ | 258/300 [02:17<00:18,  2.33it/s]
 86%|████████▋ | 259/300 [02:18<00:17,  2.41it/s]
 87%|████████▋ | 260/300 [02:18<00:16,  2.46it/s]
 87%|████████▋ | 261/300 [02:19<00:19,  2.04it/s]


== Status ==
Current time: 2022-12-11 18:39:02 (running for 00:02:28.29)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 87%|████████▋ | 262/300 [02:19<00:17,  2.19it/s]
 88%|████████▊ | 263/300 [02:19<00:15,  2.32it/s]
 88%|████████▊ | 264/300 [02:20<00:15,  2.39it/s]
 88%|████████▊ | 265/300 [02:20<00:14,  2.47it/s]
 89%|████████▊ | 266/300 [02:21<00:16,  2.03it/s]
 89%|████████▉ | 267/300 [02:21<00:15,  2.17it/s]
 89%|████████▉ | 268/300 [02:22<00:13,  2.30it/s]
 90%|████████▉ | 269/300 [02:22<00:12,  2.40it/s]
 90%|█████████ | 270/300 [02:22<00:12,  2.48it/s]
 90%|█████████ | 271/300 [02:23<00:14,  2.01it/s]
 91%|█████████ | 272/300 [02:23<00:12,  2.17it/s]
 91%|█████████ | 273/300 [02:24<00:11,  2.30it/s]


== Status ==
Current time: 2022-12-11 18:39:07 (running for 00:02:33.48)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 91%|█████████▏| 274/300 [02:24<00:10,  2.39it/s]
 92%|█████████▏| 275/300 [02:25<00:10,  2.47it/s]
 92%|█████████▏| 276/300 [02:25<00:11,  2.02it/s]
 92%|█████████▏| 277/300 [02:26<00:10,  2.18it/s]
 93%|█████████▎| 278/300 [02:26<00:09,  2.31it/s]
 93%|█████████▎| 279/300 [02:26<00:08,  2.41it/s]
 93%|█████████▎| 280/300 [02:27<00:08,  2.47it/s]
 94%|█████████▎| 281/300 [02:28<00:09,  2.03it/s]
 94%|█████████▍| 282/300 [02:28<00:08,  2.18it/s]
 94%|█████████▍| 283/300 [02:28<00:07,  2.31it/s]
 95%|█████████▍| 284/300 [02:29<00:06,  2.40it/s]
 95%|█████████▌| 285/300 [02:29<00:06,  2.46it/s]


== Status ==
Current time: 2022-12-11 18:39:12 (running for 00:02:38.66)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+------------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |       l

 95%|█████████▌| 286/300 [02:30<00:07,  1.99it/s]
 96%|█████████▌| 287/300 [02:30<00:06,  2.14it/s]
 96%|█████████▌| 288/300 [02:31<00:05,  2.26it/s]
 96%|█████████▋| 289/300 [02:31<00:04,  2.37it/s]
 97%|█████████▋| 290/300 [02:31<00:04,  2.45it/s]
 97%|█████████▋| 291/300 [02:32<00:04,  2.01it/s]
 97%|█████████▋| 292/300 [02:32<00:03,  2.16it/s]
 98%|█████████▊| 293/300 [02:33<00:03,  2.27it/s]
 98%|█████████▊| 294/300 [02:33<00:02,  2.36it/s]
 98%|█████████▊| 295/300 [02:34<00:02,  2.43it/s]
 99%|█████████▊| 296/300 [02:34<00:01,  2.00it/s]


== Status ==
Current time: 2022-12-11 18:39:18 (running for 00:02:43.85)
Memory usage on this node: 4.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+------------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |       l

 99%|█████████▉| 297/300 [02:35<00:01,  2.15it/s]
 99%|█████████▉| 298/300 [02:35<00:00,  2.27it/s]
100%|█████████▉| 299/300 [02:35<00:00,  2.33it/s]
2022-12-11 18:39:19,427	INFO tune.py:777 -- Total run time: 165.17 seconds (165.03 seconds for the tuning loop).


== Status ==
Current time: 2022-12-11 18:39:19 (running for 00:02:45.04)
Memory usage on this node: 4.5/23.5 GiB 
Using AsyncHyperBand: num_stopped=16
Bracket: Iter 160.000: -0.11908148629542363 | Iter 80.000: -0.3395597636893702 | Iter 40.000: -0.6911968553904444 | Iter 20.000: -0.6913561472514023 | Iter 10.000: -0.7015750880042713
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-36-34
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+------------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |       loss |   train

  0%|          | 0/300 [00:00<?, ?it/s]0m 


Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_bdc55_00000,2022-12-11_18-40-52,True,,83e76f4f462e42068ae5f98360ac182a,VGI-DEKSPTOP-TIJMEn,300,0.0474397,172.29.86.26,31922,True,89.4587,0.17756,89.4587,1670780452,0,,300,bdc55_00000,0.109134
run_one_training_bdc55_00001,2022-12-11_18-39-36,True,,8ff21dd8e950437f941aec46eaae798a,VGI-DEKSPTOP-TIJMEn,10,0.691313,172.29.86.26,32036,True,8.08735,0.742269,8.08735,1670780376,0,,10,bdc55_00001,0.291803
run_one_training_bdc55_00002,2022-12-11_18-40-02,True,,b540d84b4c1248f7b834c7920c3cf13e,VGI-DEKSPTOP-TIJMEn,20,0.691313,172.29.86.26,32038,True,34.1565,1.02326,34.1565,1670780402,0,,20,bdc55_00002,0.265248
run_one_training_bdc55_00003,2022-12-11_18-39-37,True,,8aba00280d4b4f358b1889e2c7da918f,VGI-DEKSPTOP-TIJMEn,10,0.704938,172.29.86.26,32040,True,9.33396,0.550206,9.33396,1670780377,0,,10,bdc55_00003,0.287354
run_one_training_bdc55_00004,2022-12-11_18-39-42,True,,96ac569bbd914afaadc5aedd76380647,VGI-DEKSPTOP-TIJMEn,20,0.671811,172.29.86.26,32042,True,14.4832,0.436772,14.4832,1670780382,0,,20,bdc55_00004,0.298306
run_one_training_bdc55_00005,2022-12-11_18-39-48,True,,20f072389f8440b6bf0e77607e7219a3,VGI-DEKSPTOP-TIJMEn,20,0.691313,172.29.86.26,32044,True,20.5218,0.563529,20.5218,1670780388,0,,20,bdc55_00005,0.269269
run_one_training_bdc55_00006,2022-12-11_18-39-36,True,,472f7bb31af748b2a5ae47637156c4cc,VGI-DEKSPTOP-TIJMEn,10,0.703402,172.29.86.26,32046,True,8.28097,0.566328,8.28097,1670780376,0,,10,bdc55_00006,0.3359
run_one_training_bdc55_00007,2022-12-11_18-39-48,True,,b6f89d183b204ea2bc3f79b31eca8262,VGI-DEKSPTOP-TIJMEn,10,0.707438,172.29.86.26,32048,True,19.9986,1.24899,19.9986,1670780388,0,,10,bdc55_00007,0.282241
run_one_training_bdc55_00008,2022-12-11_18-39-40,True,,58e897fdaef6488dae62597d7f78142e,VGI-DEKSPTOP-TIJMEn,20,0.674547,172.29.86.26,32050,True,11.8076,0.388264,11.8076,1670780380,0,,20,bdc55_00008,0.285393
run_one_training_bdc55_00009,2022-12-11_18-39-40,True,,506dfefb7cf34c0ea7dfbd54f82c57ee,VGI-DEKSPTOP-TIJMEn,10,0.691703,172.29.86.26,32052,True,12.2399,0.745662,12.2399,1670780380,0,,10,bdc55_00009,0.28984


  0%|          | 1/300 [00:01<06:34,  1.32s/it]
  1%|          | 2/300 [00:01<04:28,  1.11it/s]
  1%|          | 3/300 [00:02<03:10,  1.56it/s]


[2m[36m(run_one_training pid=32060)[0m creating model model_config={'batch_size': 2, 'learning_rate': 0.05, 'sgd_momentum': 0.8, 'scheduler_gamma': 0.9, 'model_embedding_size': 8, 'model_attention_heads': 4, 'model_layers': 3, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': 'f588130f795148ed94e4b7035ee04419'}}}
[2m[36m(run_one_training pid=32050)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.01, 'sgd_momentum': 0.8, 'scheduler_gamma': 1, 'model_embedding_size': 32, 'model_attention_heads': 3, 'model_layers': 1, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': 'f588130f795148ed94e4b7035ee04419'}}}
[2m[36m(run_one_training pid=32036)[0m creating model model_config={'batch_size': 2, 'learning_rate': 0.05, 'sgd_momentum': 0.9, 'scheduler_gamma': 0.9,

  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  1%|▏         | 4/300 [00:05<08:44,  1.77s/it]
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 


[2m[36m(run_one_training pid=32060)[0m Loading model...
[2m[36m(run_one_training pid=32050)[0m Loading model...
[2m[36m(run_one_training pid=32036)[0m Loading model...
[2m[36m(run_one_training pid=32056)[0m Loading model...
[2m[36m(run_one_training pid=32063)[0m Loading model...
[2m[36m(run_one_training pid=32054)[0m Loading model...
[2m[36m(run_one_training pid=32052)[0m Loading model...
[2m[36m(run_one_training pid=32048)[0m Loading model...
== Status ==
Current time: 2022-12-11 18:39:25 (running for 00:00:06.03)
Memory usage on this node: 7.4/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+--------------------+-----

  0%|          | 0/300 [00:00<?, ?it/s]0m 


[2m[36m(run_one_training pid=32058)[0m Loading model...


  0%|          | 0/300 [00:00<?, ?it/s]0m 
  2%|▏         | 5/300 [00:06<06:19,  1.29s/it]
  0%|          | 1/300 [00:01<06:51,  1.38s/it]
  0%|          | 1/300 [00:01<06:51,  1.38s/it]
  0%|          | 1/300 [00:01<07:51,  1.58s/it]
  2%|▏         | 6/300 [00:07<06:17,  1.29s/it]
  0%|          | 1/300 [00:01<07:52,  1.58s/it]
  1%|          | 2/300 [00:01<04:02,  1.23it/s]


== Status ==
Current time: 2022-12-11 18:39:30 (running for 00:00:11.06)
Memory usage on this node: 8.9/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                    |     

  1%|          | 2/300 [00:01<04:12,  1.18it/s]
  0%|          | 1/300 [00:01<09:07,  1.83s/it]
  1%|          | 2/300 [00:02<04:54,  1.01it/s]
  0%|          | 1/300 [00:02<11:05,  2.23s/it]
  1%|          | 3/300 [00:02<03:07,  1.58it/s]
  2%|▏         | 7/300 [00:07<04:57,  1.02s/it]
  1%|          | 3/300 [00:02<03:03,  1.62it/s]
  1%|          | 2/300 [00:02<05:25,  1.09s/it]
  1%|▏         | 4/300 [00:02<02:23,  2.07it/s]
  1%|          | 2/300 [00:02<05:23,  1.08s/it]
  0%|          | 1/300 [00:02<11:55,  2.39s/it]
  0%|          | 1/300 [00:02<11:52,  2.38s/it]
  0%|          | 1/300 [00:02<12:05,  2.43s/it]
  1%|          | 3/300 [00:02<04:00,  1.23it/s]
  1%|▏         | 4/300 [00:02<02:38,  1.87it/s]
  3%|▎         | 8/300 [00:08<04:07,  1.18it/s]
  0%|          | 1/300 [00:02<13:11,  2.65s/it]
  2%|▏         | 5/300 [00:02<02:05,  2.36it/s]
  1%|          | 3/300 [00:02<04:15,  1.16it/s]
  2%|▏         | 5/300 [00:03<02:26,  2.02it/s]
  1%|          | 2/300 [00:02<06:27,  1.

== Status ==
Current time: 2022-12-11 18:39:35 (running for 00:00:16.13)
Memory usage on this node: 8.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: -0.6882527756194273
Resources requested: 15.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (15 RUNNING, 1 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |    

  3%|▎         | 9/300 [00:07<03:29,  1.39it/s]
  4%|▎         | 11/300 [00:07<03:30,  1.37it/s]
  3%|▎         | 9/300 [00:07<03:25,  1.42it/s]
  2%|▏         | 7/300 [00:07<04:54,  1.00s/it]
  2%|▏         | 6/300 [00:07<06:17,  1.28s/it]
  4%|▍         | 12/300 [00:07<03:04,  1.56it/s]
  3%|▎         | 9/300 [00:07<03:24,  1.42it/s]
  5%|▌         | 16/300 [00:13<03:35,  1.32it/s]
  3%|▎         | 10/300 [00:07<03:15,  1.48it/s]
  4%|▍         | 13/300 [00:08<02:46,  1.73it/s]
  1%|          | 3/300 [00:07<11:46,  2.38s/it]
  3%|▎         | 8/300 [00:07<04:13,  1.15it/s]
  2%|▏         | 7/300 [00:07<05:14,  1.07s/it]
  1%|▏         | 4/300 [00:07<08:30,  1.73s/it]
  1%|          | 3/300 [00:07<12:12,  2.47s/it]
  6%|▌         | 17/300 [00:13<03:07,  1.51it/s]
  2%|▏         | 6/300 [00:08<07:07,  1.46s/it]
  5%|▍         | 14/300 [00:08<02:34,  1.86it/s]
  3%|▎         | 9/300 [00:08<03:47,  1.28it/s]
  1%|▏         | 4/300 [00:08<09:41,  1.96s/it]
  6%|▌         | 18/300 [00:14<02

== Status ==
Current time: 2022-12-11 18:39:40 (running for 00:00:21.20)
Memory usage on this node: 7.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: -0.6437319410033524 | Iter 10.000: -0.6915081432865311
Resources requested: 9.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (9 RUNNING, 7 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                      

  8%|▊         | 25/300 [00:17<02:01,  2.26it/s]
  2%|▏         | 6/300 [00:11<09:41,  1.98s/it]
  3%|▎         | 10/300 [00:12<04:46,  1.01it/s]
  5%|▌         | 16/300 [00:12<03:38,  1.30it/s]
  2%|▏         | 6/300 [00:12<10:03,  2.05s/it]
  6%|▌         | 17/300 [00:12<03:07,  1.51it/s]
  9%|▊         | 26/300 [00:18<02:40,  1.70it/s]
  2%|▏         | 7/300 [00:12<08:12,  1.68s/it]
  9%|▉         | 27/300 [00:19<02:19,  1.96it/s]
  4%|▎         | 11/300 [00:13<05:35,  1.16s/it]
  6%|▌         | 18/300 [00:13<02:47,  1.69it/s]
  9%|▉         | 28/300 [00:19<02:02,  2.22it/s]
  4%|▎         | 11/300 [00:13<05:32,  1.15s/it]
  2%|▏         | 7/300 [00:13<08:38,  1.77s/it]
  6%|▋         | 19/300 [00:13<02:31,  1.85it/s]
 10%|▉         | 29/300 [00:19<01:48,  2.49it/s]
  4%|▍         | 12/300 [00:14<04:53,  1.02s/it]
  1%|▏         | 4/300 [00:13<14:27,  2.93s/it]
  3%|▎         | 8/300 [00:13<07:09,  1.47s/it]
 10%|█         | 30/300 [00:20<01:42,  2.64it/s]
  2%|▏         | 6/300 [00

== Status ==
Current time: 2022-12-11 18:39:45 (running for 00:00:26.20)
Memory usage on this node: 6.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: -0.671811425825581 | Iter 10.000: -0.6913127761799842
Resources requested: 8.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (8 RUNNING, 8 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                       

 12%|█▏        | 36/300 [00:22<02:11,  2.00it/s]
  3%|▎         | 8/300 [00:17<08:43,  1.79s/it]
 12%|█▏        | 37/300 [00:23<01:55,  2.28it/s]
  5%|▌         | 16/300 [00:17<04:36,  1.03it/s]
  3%|▎         | 10/300 [00:17<06:42,  1.39s/it]
  3%|▎         | 8/300 [00:17<09:10,  1.89s/it]
 13%|█▎        | 38/300 [00:23<01:43,  2.52it/s]
  5%|▌         | 16/300 [00:17<04:36,  1.03it/s]
 13%|█▎        | 39/300 [00:23<01:36,  2.70it/s]
  6%|▌         | 17/300 [00:18<04:06,  1.15it/s]
  4%|▎         | 11/300 [00:17<07:11,  1.49s/it]
 13%|█▎        | 40/300 [00:24<01:31,  2.85it/s]
  3%|▎         | 9/300 [00:18<07:58,  1.64s/it]
  6%|▌         | 17/300 [00:18<04:11,  1.13it/s]
  6%|▌         | 18/300 [00:18<03:43,  1.26it/s]
 14%|█▎        | 41/300 [00:24<02:01,  2.13it/s]
  3%|▎         | 9/300 [00:18<08:32,  1.76s/it]
  6%|▌         | 18/300 [00:19<03:45,  1.25it/s]
  6%|▋         | 19/300 [00:19<03:25,  1.37it/s]
  4%|▍         | 12/300 [00:18<06:26,  1.34s/it]
 14%|█▍        | 42/300 

== Status ==
Current time: 2022-12-11 18:39:50 (running for 00:00:31.26)
Memory usage on this node: 6.0/23.5 GiB 
Using AsyncHyperBand: num_stopped=11
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6745471029231945 | Iter 10.000: -0.691312626004219
Resources requested: 5.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (5 RUNNING, 11 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|      

  5%|▍         | 14/300 [00:22<05:42,  1.20s/it]
 17%|█▋        | 51/300 [00:28<01:48,  2.30it/s]
  4%|▎         | 11/300 [00:22<08:51,  1.84s/it]
 17%|█▋        | 52/300 [00:28<01:32,  2.69it/s]
 18%|█▊        | 53/300 [00:28<01:23,  2.97it/s]
 18%|█▊        | 54/300 [00:29<01:17,  3.19it/s]
  5%|▌         | 15/300 [00:23<05:19,  1.12s/it]
  5%|▌         | 16/300 [00:23<05:42,  1.21s/it]
 18%|█▊        | 55/300 [00:29<01:10,  3.45it/s]
  3%|▎         | 8/300 [00:23<11:32,  2.37s/it]
  4%|▍         | 12/300 [00:23<07:57,  1.66s/it]
 19%|█▊        | 56/300 [00:30<01:42,  2.38it/s]
  6%|▌         | 17/300 [00:23<05:11,  1.10s/it]
 19%|█▉        | 57/300 [00:30<01:30,  2.68it/s]
 19%|█▉        | 58/300 [00:30<01:22,  2.95it/s]
 20%|█▉        | 59/300 [00:30<01:14,  3.23it/s]
  6%|▌         | 18/300 [00:24<04:52,  1.04s/it]
 20%|██        | 60/300 [00:31<01:10,  3.40it/s]
  5%|▌         | 16/300 [00:25<06:16,  1.33s/it]
  4%|▍         | 13/300 [00:25<07:29,  1.57s/it]
  3%|▎         | 9/30

== Status ==
Current time: 2022-12-11 18:39:55 (running for 00:00:36.36)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6829297145983825 | Iter 10.000: -0.6913127761799842
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|     

  6%|▌         | 18/300 [00:27<05:28,  1.16s/it]
 22%|██▏       | 66/300 [00:33<01:29,  2.63it/s]
 22%|██▏       | 67/300 [00:33<01:27,  2.67it/s]
  5%|▌         | 15/300 [00:27<06:47,  1.43s/it]
 23%|██▎       | 68/300 [00:33<01:20,  2.89it/s]
  6%|▋         | 19/300 [00:28<05:19,  1.14s/it]
 23%|██▎       | 69/300 [00:34<01:10,  3.28it/s]
 23%|██▎       | 70/300 [00:34<01:03,  3.63it/s]
 24%|██▎       | 71/300 [00:34<01:20,  2.84it/s]
 24%|██▍       | 72/300 [00:35<01:09,  3.29it/s]
 24%|██▍       | 73/300 [00:35<01:00,  3.72it/s]
 25%|██▍       | 74/300 [00:35<00:54,  4.12it/s]
  5%|▌         | 16/300 [00:29<07:23,  1.56s/it]
 25%|██▌       | 75/300 [00:35<00:51,  4.40it/s]
 25%|██▌       | 76/300 [00:36<01:10,  3.18it/s]
 26%|██▌       | 77/300 [00:36<01:01,  3.61it/s]
 26%|██▌       | 78/300 [00:36<00:55,  3.99it/s]
  6%|▌         | 17/300 [00:30<06:36,  1.40s/it]
 26%|██▋       | 79/300 [00:36<00:51,  4.29it/s]
 27%|██▋       | 80/300 [00:36<00:49,  4.47it/s]
 27%|██▋       | 81/

== Status ==
Current time: 2022-12-11 18:40:00 (running for 00:00:41.45)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: None | Iter 80.000: -0.3032785852750142 | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6913123262735704 | Iter 10.000: -0.6913127761799842
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

 28%|██▊       | 84/300 [00:38<00:51,  4.21it/s]
 28%|██▊       | 85/300 [00:38<00:48,  4.47it/s]
  6%|▋         | 19/300 [00:32<05:42,  1.22s/it]
 29%|██▊       | 86/300 [00:38<01:06,  3.21it/s]
 29%|██▉       | 87/300 [00:38<00:58,  3.64it/s]
 29%|██▉       | 88/300 [00:39<00:52,  4.03it/s]
 30%|██▉       | 89/300 [00:39<00:48,  4.37it/s]
 30%|███       | 90/300 [00:39<00:46,  4.56it/s]
 30%|███       | 91/300 [00:40<01:02,  3.33it/s]
 31%|███       | 92/300 [00:40<00:54,  3.79it/s]
 31%|███       | 93/300 [00:40<00:49,  4.20it/s]
 31%|███▏      | 94/300 [00:40<00:45,  4.56it/s]
 32%|███▏      | 95/300 [00:40<00:42,  4.84it/s]
 32%|███▏      | 96/300 [00:41<00:58,  3.47it/s]
 32%|███▏      | 97/300 [00:41<00:52,  3.89it/s]
 33%|███▎      | 98/300 [00:41<00:47,  4.29it/s]
 33%|███▎      | 99/300 [00:41<00:43,  4.58it/s]
 33%|███▎      | 100/300 [00:41<00:41,  4.84it/s]
 34%|███▎      | 101/300 [00:42<00:57,  3.46it/s]
 34%|███▍      | 102/300 [00:42<00:51,  3.88it/s]
 34%|███▍      | 

== Status ==
Current time: 2022-12-11 18:40:06 (running for 00:00:46.52)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: None | Iter 80.000: -0.3032785852750142 | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6913123547565192 | Iter 10.000: -0.6913127761799842
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

 35%|███▌      | 105/300 [00:43<00:40,  4.83it/s]
 35%|███▌      | 106/300 [00:43<00:55,  3.51it/s]
 36%|███▌      | 107/300 [00:43<00:48,  3.98it/s]
 36%|███▌      | 108/300 [00:43<00:43,  4.38it/s]
 36%|███▋      | 109/300 [00:44<00:40,  4.69it/s]
 37%|███▋      | 110/300 [00:44<00:38,  4.97it/s]
 37%|███▋      | 111/300 [00:44<00:53,  3.55it/s]
 37%|███▋      | 112/300 [00:44<00:46,  4.02it/s]
 38%|███▊      | 113/300 [00:45<00:42,  4.37it/s]
 38%|███▊      | 114/300 [00:45<00:40,  4.59it/s]
 38%|███▊      | 115/300 [00:45<00:38,  4.85it/s]
 39%|███▊      | 116/300 [00:45<00:51,  3.55it/s]
 39%|███▉      | 117/300 [00:46<00:45,  3.98it/s]
 39%|███▉      | 118/300 [00:46<00:41,  4.38it/s]
 40%|███▉      | 119/300 [00:46<00:38,  4.73it/s]
 40%|████      | 120/300 [00:46<00:36,  4.99it/s]
 40%|████      | 121/300 [00:47<00:49,  3.60it/s]
 41%|████      | 122/300 [00:47<00:43,  4.07it/s]
 41%|████      | 123/300 [00:47<00:39,  4.46it/s]
 41%|████▏     | 124/300 [00:47<00:36,  4.76it/s]


== Status ==
Current time: 2022-12-11 18:40:11 (running for 00:00:51.85)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: None | Iter 80.000: -0.3032785852750142 | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6913123547565192 | Iter 10.000: -0.6913127761799842
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

 42%|████▏     | 127/300 [00:48<00:43,  3.96it/s]
 43%|████▎     | 128/300 [00:48<00:39,  4.37it/s]
 43%|████▎     | 129/300 [00:48<00:36,  4.66it/s]
 43%|████▎     | 130/300 [00:49<00:34,  4.92it/s]
 44%|████▎     | 131/300 [00:49<00:47,  3.55it/s]
 44%|████▍     | 132/300 [00:49<00:41,  4.00it/s]
 44%|████▍     | 133/300 [00:49<00:38,  4.38it/s]
 45%|████▍     | 134/300 [00:50<00:35,  4.70it/s]
 45%|████▌     | 135/300 [00:50<00:33,  4.93it/s]
 45%|████▌     | 136/300 [00:50<00:46,  3.53it/s]
 46%|████▌     | 137/300 [00:50<00:41,  3.97it/s]
 46%|████▌     | 138/300 [00:51<00:37,  4.32it/s]
 46%|████▋     | 139/300 [00:51<00:34,  4.60it/s]
 47%|████▋     | 140/300 [00:51<00:32,  4.86it/s]
 47%|████▋     | 141/300 [00:51<00:45,  3.51it/s]
 47%|████▋     | 142/300 [00:52<00:39,  3.97it/s]
 48%|████▊     | 143/300 [00:52<00:37,  4.16it/s]
 48%|████▊     | 144/300 [00:52<00:34,  4.47it/s]
 48%|████▊     | 145/300 [00:52<00:32,  4.72it/s]
 49%|████▊     | 146/300 [00:53<00:45,  3.37it/s]


== Status ==
Current time: 2022-12-11 18:40:16 (running for 00:00:57.01)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: None | Iter 80.000: -0.3032785852750142 | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6913123547565192 | Iter 10.000: -0.6913127761799842
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

 50%|████▉     | 149/300 [00:53<00:32,  4.59it/s]
 50%|█████     | 150/300 [00:53<00:31,  4.84it/s]
 50%|█████     | 151/300 [00:54<00:42,  3.47it/s]
 51%|█████     | 152/300 [00:54<00:37,  3.93it/s]
 51%|█████     | 153/300 [00:54<00:33,  4.33it/s]
 51%|█████▏    | 154/300 [00:54<00:31,  4.65it/s]
 52%|█████▏    | 155/300 [00:54<00:29,  4.90it/s]
 52%|█████▏    | 156/300 [00:55<00:40,  3.52it/s]
 52%|█████▏    | 157/300 [00:55<00:36,  3.97it/s]
 53%|█████▎    | 158/300 [00:55<00:32,  4.36it/s]
 53%|█████▎    | 159/300 [00:55<00:30,  4.69it/s]
 53%|█████▎    | 160/300 [00:56<00:28,  4.93it/s]
 54%|█████▎    | 161/300 [00:56<00:38,  3.57it/s]
 54%|█████▍    | 162/300 [00:56<00:34,  4.00it/s]
 54%|█████▍    | 163/300 [00:56<00:31,  4.36it/s]
 55%|█████▍    | 164/300 [00:57<00:29,  4.63it/s]
 55%|█████▌    | 165/300 [00:57<00:27,  4.87it/s]
 55%|█████▌    | 166/300 [00:57<00:38,  3.52it/s]
 56%|█████▌    | 167/300 [00:58<00:33,  3.96it/s]
 56%|█████▌    | 168/300 [00:58<00:30,  4.33it/s]


== Status ==
Current time: 2022-12-11 18:40:21 (running for 00:01:02.11)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11070627253502607 | Iter 80.000: -0.3032785852750142 | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6913123547565192 | Iter 10.000: -0.6913127761799842
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss 

 57%|█████▋    | 171/300 [00:59<00:37,  3.44it/s]
 57%|█████▋    | 172/300 [00:59<00:33,  3.88it/s]
 58%|█████▊    | 173/300 [00:59<00:29,  4.26it/s]
 58%|█████▊    | 174/300 [00:59<00:27,  4.59it/s]
 58%|█████▊    | 175/300 [00:59<00:25,  4.84it/s]
 59%|█████▊    | 176/300 [01:00<00:35,  3.47it/s]
 59%|█████▉    | 177/300 [01:00<00:31,  3.93it/s]
 59%|█████▉    | 178/300 [01:00<00:28,  4.32it/s]
 60%|█████▉    | 179/300 [01:00<00:26,  4.63it/s]
 60%|██████    | 180/300 [01:00<00:24,  4.88it/s]
 60%|██████    | 181/300 [01:01<00:33,  3.53it/s]
 61%|██████    | 182/300 [01:01<00:29,  3.99it/s]
 61%|██████    | 183/300 [01:01<00:26,  4.39it/s]
 61%|██████▏   | 184/300 [01:01<00:24,  4.73it/s]
 62%|██████▏   | 185/300 [01:02<00:23,  4.98it/s]
 62%|██████▏   | 186/300 [01:02<00:31,  3.59it/s]
 62%|██████▏   | 187/300 [01:02<00:27,  4.04it/s]
 63%|██████▎   | 188/300 [01:02<00:25,  4.42it/s]
 63%|██████▎   | 189/300 [01:03<00:23,  4.72it/s]
 63%|██████▎   | 190/300 [01:03<00:22,  4.96it/s]


== Status ==
Current time: 2022-12-11 18:40:26 (running for 00:01:07.32)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11070627253502607 | Iter 80.000: -0.3032785852750142 | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6913123547565192 | Iter 10.000: -0.6913127761799842
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 64%|██████▍   | 192/300 [01:03<00:27,  3.95it/s]
 64%|██████▍   | 193/300 [01:04<00:24,  4.33it/s]
 65%|██████▍   | 194/300 [01:04<00:22,  4.65it/s]
 65%|██████▌   | 195/300 [01:04<00:21,  4.92it/s]
 65%|██████▌   | 196/300 [01:04<00:29,  3.57it/s]
 66%|██████▌   | 197/300 [01:05<00:25,  4.03it/s]
 66%|██████▌   | 198/300 [01:05<00:23,  4.38it/s]
 66%|██████▋   | 199/300 [01:05<00:21,  4.60it/s]
 67%|██████▋   | 200/300 [01:05<00:20,  4.90it/s]
 67%|██████▋   | 201/300 [01:06<00:27,  3.59it/s]
 67%|██████▋   | 202/300 [01:06<00:24,  4.04it/s]
 68%|██████▊   | 203/300 [01:06<00:21,  4.42it/s]
 68%|██████▊   | 204/300 [01:06<00:20,  4.76it/s]
 68%|██████▊   | 205/300 [01:06<00:18,  5.01it/s]
 69%|██████▊   | 206/300 [01:07<00:26,  3.58it/s]
 69%|██████▉   | 207/300 [01:07<00:23,  4.03it/s]
 69%|██████▉   | 208/300 [01:07<00:20,  4.42it/s]
 70%|██████▉   | 209/300 [01:07<00:19,  4.74it/s]
 70%|███████   | 210/300 [01:07<00:17,  5.02it/s]
 70%|███████   | 211/300 [01:08<00:25,  3.53it/s]


== Status ==
Current time: 2022-12-11 18:40:31 (running for 00:01:12.37)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11070627253502607 | Iter 80.000: -0.3032785852750142 | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6913123547565192 | Iter 10.000: -0.6913127761799842
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 72%|███████▏  | 215/300 [01:09<00:17,  4.84it/s]
 72%|███████▏  | 216/300 [01:09<00:24,  3.45it/s]
 72%|███████▏  | 217/300 [01:09<00:21,  3.92it/s]
 73%|███████▎  | 218/300 [01:10<00:18,  4.32it/s]
 73%|███████▎  | 219/300 [01:10<00:17,  4.67it/s]
 73%|███████▎  | 220/300 [01:10<00:16,  4.95it/s]
 74%|███████▎  | 221/300 [01:10<00:22,  3.56it/s]
 74%|███████▍  | 222/300 [01:10<00:19,  4.01it/s]
 74%|███████▍  | 223/300 [01:11<00:17,  4.38it/s]
 75%|███████▍  | 224/300 [01:11<00:16,  4.68it/s]
 75%|███████▌  | 225/300 [01:11<00:15,  4.97it/s]
 75%|███████▌  | 226/300 [01:11<00:20,  3.59it/s]
 76%|███████▌  | 227/300 [01:12<00:18,  4.05it/s]
 76%|███████▌  | 228/300 [01:12<00:16,  4.45it/s]
 76%|███████▋  | 229/300 [01:12<00:14,  4.76it/s]
 77%|███████▋  | 230/300 [01:12<00:13,  5.04it/s]
 77%|███████▋  | 231/300 [01:13<00:19,  3.60it/s]
 77%|███████▋  | 232/300 [01:13<00:16,  4.03it/s]
 78%|███████▊  | 233/300 [01:13<00:15,  4.41it/s]
 78%|███████▊  | 234/300 [01:13<00:13,  4.75it/s]


== Status ==
Current time: 2022-12-11 18:40:36 (running for 00:01:17.40)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11070627253502607 | Iter 80.000: -0.3032785852750142 | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6913123547565192 | Iter 10.000: -0.6913127761799842
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 79%|███████▊  | 236/300 [01:14<00:18,  3.55it/s]
 79%|███████▉  | 237/300 [01:14<00:15,  4.00it/s]
 79%|███████▉  | 238/300 [01:14<00:14,  4.38it/s]
 80%|███████▉  | 239/300 [01:14<00:13,  4.69it/s]
 80%|████████  | 240/300 [01:15<00:12,  4.92it/s]
 80%|████████  | 241/300 [01:15<00:16,  3.56it/s]
 81%|████████  | 242/300 [01:15<00:14,  4.03it/s]
 81%|████████  | 243/300 [01:15<00:12,  4.44it/s]
 81%|████████▏ | 244/300 [01:16<00:11,  4.77it/s]
 82%|████████▏ | 245/300 [01:16<00:11,  4.95it/s]
 82%|████████▏ | 246/300 [01:16<00:15,  3.56it/s]
 82%|████████▏ | 247/300 [01:16<00:13,  4.02it/s]
 83%|████████▎ | 248/300 [01:17<00:11,  4.39it/s]
 83%|████████▎ | 249/300 [01:17<00:10,  4.67it/s]
 83%|████████▎ | 250/300 [01:17<00:10,  4.94it/s]
 84%|████████▎ | 251/300 [01:17<00:13,  3.56it/s]
 84%|████████▍ | 252/300 [01:18<00:11,  4.01it/s]
 84%|████████▍ | 253/300 [01:18<00:10,  4.40it/s]
 85%|████████▍ | 254/300 [01:18<00:09,  4.70it/s]
 85%|████████▌ | 255/300 [01:18<00:09,  4.96it/s]


== Status ==
Current time: 2022-12-11 18:40:42 (running for 00:01:22.56)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11070627253502607 | Iter 80.000: -0.3032785852750142 | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6913123547565192 | Iter 10.000: -0.6913127761799842
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 86%|████████▌ | 257/300 [01:19<00:10,  3.99it/s]
 86%|████████▌ | 258/300 [01:19<00:09,  4.39it/s]
 86%|████████▋ | 259/300 [01:19<00:08,  4.66it/s]
 87%|████████▋ | 260/300 [01:19<00:08,  4.90it/s]
 87%|████████▋ | 261/300 [01:20<00:10,  3.55it/s]
 87%|████████▋ | 262/300 [01:20<00:09,  3.99it/s]
 88%|████████▊ | 263/300 [01:20<00:08,  4.37it/s]
 88%|████████▊ | 264/300 [01:20<00:07,  4.72it/s]
 88%|████████▊ | 265/300 [01:20<00:07,  4.98it/s]
 89%|████████▊ | 266/300 [01:21<00:09,  3.59it/s]
 89%|████████▉ | 267/300 [01:21<00:08,  4.01it/s]
 89%|████████▉ | 268/300 [01:21<00:07,  4.35it/s]
 90%|████████▉ | 269/300 [01:21<00:06,  4.62it/s]
 90%|█████████ | 270/300 [01:22<00:06,  4.94it/s]
 90%|█████████ | 271/300 [01:22<00:08,  3.54it/s]
 91%|█████████ | 272/300 [01:22<00:06,  4.01it/s]
 91%|█████████ | 273/300 [01:22<00:06,  4.39it/s]
 91%|█████████▏| 274/300 [01:23<00:05,  4.70it/s]
 92%|█████████▏| 275/300 [01:23<00:05,  4.91it/s]
 92%|█████████▏| 276/300 [01:23<00:06,  3.52it/s]


== Status ==
Current time: 2022-12-11 18:40:47 (running for 00:01:27.63)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.11070627253502607 | Iter 80.000: -0.3032785852750142 | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6913123547565192 | Iter 10.000: -0.6913127761799842
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      los

 93%|█████████▎| 279/300 [01:24<00:04,  4.67it/s]
 93%|█████████▎| 280/300 [01:24<00:04,  4.92it/s]
 94%|█████████▎| 281/300 [01:24<00:05,  3.55it/s]
 94%|█████████▍| 282/300 [01:25<00:04,  4.01it/s]
 94%|█████████▍| 283/300 [01:25<00:03,  4.36it/s]
 95%|█████████▍| 284/300 [01:25<00:03,  4.68it/s]
 95%|█████████▌| 285/300 [01:25<00:03,  4.92it/s]
 95%|█████████▌| 286/300 [01:26<00:03,  3.57it/s]
 96%|█████████▌| 287/300 [01:26<00:03,  4.03it/s]
 96%|█████████▌| 288/300 [01:26<00:02,  4.37it/s]
 96%|█████████▋| 289/300 [01:26<00:02,  4.71it/s]
 97%|█████████▋| 290/300 [01:26<00:01,  5.01it/s]
 97%|█████████▋| 291/300 [01:27<00:02,  3.56it/s]
 97%|█████████▋| 292/300 [01:27<00:01,  4.02it/s]
 98%|█████████▊| 293/300 [01:27<00:01,  4.42it/s]
 98%|█████████▊| 294/300 [01:27<00:01,  4.76it/s]
 98%|█████████▊| 295/300 [01:27<00:00,  5.02it/s]
 99%|█████████▊| 296/300 [01:28<00:01,  3.58it/s]
 99%|█████████▉| 297/300 [01:28<00:00,  4.01it/s]
 99%|█████████▉| 298/300 [01:28<00:00,  4.29it/s]


== Status ==
Current time: 2022-12-11 18:40:52 (running for 00:01:32.54)
Memory usage on this node: 4.5/23.5 GiB 
Using AsyncHyperBand: num_stopped=16
Bracket: Iter 160.000: -0.11070627253502607 | Iter 80.000: -0.3032785852750142 | Iter 40.000: -0.5187963880598545 | Iter 20.000: -0.6913123547565192 | Iter 10.000: -0.6913127761799842
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-39-19
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      loss |   trainin

  0%|          | 0/300 [00:00<?, ?it/s]m 


[2m[36m(run_one_training pid=1732)[0m Loading model...


Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_f50ab_00000,2022-12-11_18-41-07,True,,51ad40112f0a41c7b0e6439555f91b93,VGI-DEKSPTOP-TIJMEn,10,0.703317,172.29.86.26,1732,True,12.1106,0.754628,12.1106,1670780467,0,,10,f50ab_00000,0.104368
run_one_training_f50ab_00001,2022-12-11_18-42-46,True,,9496b0fa70c341ce9c68de71c1098521,VGI-DEKSPTOP-TIJMEn,80,0.691302,172.29.86.26,1801,True,106.554,1.04987,106.554,1670780566,0,,80,f50ab_00001,0.284396
run_one_training_f50ab_00002,2022-12-11_18-41-14,True,,c38604e176b44d2e8d6d8dcdeb452486,VGI-DEKSPTOP-TIJMEn,20,0.70352,172.29.86.26,1803,True,13.8133,0.391076,13.8133,1670780474,0,,20,f50ab_00002,0.265903
run_one_training_f50ab_00003,2022-12-11_18-41-19,True,,2830a0f19ba74df1890eb981613b6d92,VGI-DEKSPTOP-TIJMEn,10,0.703814,172.29.86.26,1805,True,19.7035,1.32102,19.7035,1670780479,0,,10,f50ab_00003,0.274543
run_one_training_f50ab_00004,2022-12-11_18-41-07,True,,e1c02e4997b94ff59c525b67c147e856,VGI-DEKSPTOP-TIJMEn,10,0.707553,172.29.86.26,1807,True,7.49089,0.436489,7.49089,1670780467,0,,10,f50ab_00004,0.245041
run_one_training_f50ab_00005,2022-12-11_18-41-07,True,,04e42ea60da54d7c97d538861f334b92,VGI-DEKSPTOP-TIJMEn,10,0.704798,172.29.86.26,1809,True,7.47202,0.476495,7.47202,1670780467,0,,10,f50ab_00005,0.252511
run_one_training_f50ab_00006,2022-12-11_18-41-09,True,,b340f34911ae4be1b2ed6294c7baaf47,VGI-DEKSPTOP-TIJMEn,10,0.707985,172.29.86.26,1811,True,9.31719,0.580267,9.31719,1670780469,0,,10,f50ab_00006,0.290431
run_one_training_f50ab_00007,2022-12-11_18-41-07,True,,133529a3268b476483fee8d3c2ca3caa,VGI-DEKSPTOP-TIJMEn,10,0.704152,172.29.86.26,1812,True,7.55159,0.601483,7.55159,1670780467,0,,10,f50ab_00007,0.3073
run_one_training_f50ab_00008,2022-12-11_18-41-32,True,,db5596aabc9f4c9b9ba45f61f35cd7c8,VGI-DEKSPTOP-TIJMEn,40,0.691445,172.29.86.26,1815,True,31.7156,0.467274,31.7156,1670780492,0,,40,f50ab_00008,0.315434
run_one_training_f50ab_00009,2022-12-11_18-41-31,True,,7761d4d752dd44e6905caa281a722129,VGI-DEKSPTOP-TIJMEn,40,0.703275,172.29.86.26,1817,True,31.5203,0.413066,31.5203,1670780491,0,,40,f50ab_00009,0.308341


  0%|          | 1/300 [00:02<10:34,  2.12s/it]


[2m[36m(run_one_training pid=1801)[0m creating model model_config={'batch_size': 2, 'learning_rate': 0.05, 'sgd_momentum': 0.5, 'scheduler_gamma': 0.9, 'model_embedding_size': 64, 'model_attention_heads': 3, 'model_layers': 7, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '0f773ce66ea64bc7a7c4b25d5aefe5ec'}}}
[2m[36m(run_one_training pid=1817)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.1, 'sgd_momentum': 0.9, 'scheduler_gamma': 0.8, 'model_embedding_size': 8, 'model_attention_heads': 1, 'model_layers': 7, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '0f773ce66ea64bc7a7c4b25d5aefe5ec'}}}
[2m[36m(run_one_training pid=1811)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.01, 'sgd_momentum': 0.8, 'scheduler_gamma': 0.995,

  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 


[2m[36m(run_one_training pid=1812)[0m Loading model...
[2m[36m(run_one_training pid=1819)[0m Loading model...
[2m[36m(run_one_training pid=1807)[0m Loading model...
[2m[36m(run_one_training pid=1809)[0m Loading model...
[2m[36m(run_one_training pid=1805)[0m Loading model...
[2m[36m(run_one_training pid=1803)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.001, 'sgd_momentum': 0.8, 'scheduler_gamma': 0.995, 'model_embedding_size': 16, 'model_attention_heads': 2, 'model_layers': 3, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '0f773ce66ea64bc7a7c4b25d5aefe5ec'}}}
[2m[36m(run_one_training pid=1821)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.05, 'sgd_momentum': 0.5, 'scheduler_gamma': 0.9, 'model_embedding_size': 64, 'model_attention_heads': 1, 'model_layers': 5, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_fu

  1%|          | 2/300 [00:04<11:46,  2.37s/it]
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 


[2m[36m(run_one_training pid=1803)[0m Loading model...
[2m[36m(run_one_training pid=1821)[0m Loading model...
[2m[36m(run_one_training pid=1830)[0m Loading model...
[2m[36m(run_one_training pid=1815)[0m Loading model...


  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  1%|          | 3/300 [00:05<08:14,  1.67s/it]
  0%|          | 1/300 [00:01<07:02,  1.41s/it]
  0%|          | 1/300 [00:01<06:14,  1.25s/it]
  1%|▏         | 4/300 [00:06<06:16,  1.27s/it]
  1%|          | 2/300 [00:01<03:51,  1.29it/s]
  0%|          | 1/300 [00:01<09:30,  1.91s/it]
  1%|          | 2/300 [00:01<04:26,  1.12it/s]
  0%|          | 1/300 [00:02<10:26,  2.09s/it]
  0%|          | 1/300 [00:01<09:49,  1.97s/it]
  0%|          | 1/300 [00:01<08:19,  1.67s/it]
  0%|          | 1/300 [00:01<08:10,  1.64s/it]
  1%|          | 3/300 [00:02<02:55,  1.69it/s]
  2%|▏         | 5/300 [00:06<05:12,  1.06s/it]
  1%|          | 2/300 [00:02<05:06,  1.03s/it]
  1%|          | 2/300 [00:02<05:31,  1.11s/it]
  1%|▏         | 4/300 [00:02<02:21,  2.10it/s]
  1%|          | 3/300 [00:02<03:44,  1.32it/s]
  0%|          |

== Status ==
Current time: 2022-12-11 18:41:03 (running for 00:00:11.71)
Memory usage on this node: 8.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                   |        

  2%|▏         | 5/300 [00:03<02:41,  1.82it/s]
  1%|▏         | 4/300 [00:03<03:22,  1.46it/s]
  1%|          | 2/300 [00:03<06:59,  1.41s/it]
  2%|▏         | 5/300 [00:03<03:05,  1.59it/s]
  2%|▏         | 5/300 [00:03<02:43,  1.80it/s]
  0%|          | 1/300 [00:03<18:17,  3.67s/it]
  1%|▏         | 4/300 [00:03<03:25,  1.44it/s]
  2%|▏         | 6/300 [00:03<03:03,  1.60it/s]
  1%|          | 2/300 [00:04<09:27,  1.90s/it]
  1%|▏         | 4/300 [00:04<04:22,  1.13it/s]
  2%|▏         | 5/300 [00:03<02:59,  1.65it/s]
  2%|▏         | 6/300 [00:08<06:29,  1.33s/it]
  0%|          | 1/300 [00:04<21:39,  4.35s/it]
  2%|▏         | 5/300 [00:03<03:05,  1.59it/s]
  1%|          | 3/300 [00:03<05:27,  1.10s/it]
  1%|          | 3/300 [00:04<05:57,  1.20s/it]
  0%|          | 1/300 [00:04<21:14,  4.26s/it]
  2%|▏         | 7/300 [00:04<02:43,  1.79it/s]
  1%|          | 2/300 [00:04<10:18,  2.08s/it]
  2%|▏         | 5/300 [00:04<03:52,  1.27it/s]
  3%|▎         | 8/300 [00:04<02:27,  1.

== Status ==
Current time: 2022-12-11 18:41:09 (running for 00:00:16.79)
Memory usage on this node: 7.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=4
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: -0.7036930434405804
Resources requested: 12.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (12 RUNNING, 4 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |      

  2%|▏         | 6/300 [00:07<06:36,  1.35s/it]
  1%|          | 3/300 [00:08<13:15,  2.68s/it]
  1%|▏         | 4/300 [00:08<09:23,  1.90s/it]
  5%|▌         | 16/300 [00:08<02:35,  1.83it/s]
  2%|▏         | 7/300 [00:08<05:38,  1.16s/it]
  2%|▏         | 5/300 [00:08<07:31,  1.53s/it]
  2%|▏         | 7/300 [00:09<05:59,  1.23s/it]
  6%|▌         | 17/300 [00:08<02:13,  2.13it/s]
  4%|▎         | 11/300 [00:08<04:03,  1.19it/s]
  6%|▌         | 18/300 [00:09<01:55,  2.45it/s]
  3%|▎         | 8/300 [00:09<04:52,  1.00s/it]
  2%|▏         | 7/300 [00:08<05:43,  1.17s/it]
  6%|▋         | 19/300 [00:09<01:44,  2.69it/s]
  4%|▎         | 11/300 [00:09<04:05,  1.18it/s]
  4%|▍         | 12/300 [00:09<03:24,  1.41it/s]
  3%|▎         | 8/300 [00:09<05:06,  1.05s/it]
  1%|▏         | 4/300 [00:09<10:20,  2.10s/it]
  7%|▋         | 20/300 [00:09<01:35,  2.92it/s]
  4%|▍         | 12/300 [00:09<03:26,  1.39it/s]
  3%|▎         | 9/300 [00:09<04:20,  1.12it/s]
  4%|▍         | 13/300 [00:09<

== Status ==
Current time: 2022-12-11 18:41:14 (running for 00:00:21.92)
Memory usage on this node: 7.0/23.5 GiB 
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: -0.690137439717849 | Iter 10.000: -0.7036930434405804
Resources requested: 9.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (9 RUNNING, 7 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                         

 10%|█         | 30/300 [00:13<01:21,  3.31it/s]
  4%|▍         | 12/300 [00:13<04:25,  1.08it/s]
  4%|▍         | 13/300 [00:13<04:03,  1.18it/s]
  3%|▎         | 10/300 [00:14<05:21,  1.11s/it]
  2%|▏         | 7/300 [00:13<08:38,  1.77s/it]
  2%|▏         | 6/300 [00:14<11:04,  2.26s/it]
 10%|█         | 31/300 [00:14<01:49,  2.45it/s]
  4%|▍         | 13/300 [00:13<03:58,  1.20it/s]
  5%|▍         | 14/300 [00:14<03:31,  1.36it/s]
 11%|█         | 32/300 [00:14<01:34,  2.84it/s]
  3%|▎         | 9/300 [00:14<06:22,  1.31s/it]
 11%|█         | 33/300 [00:14<01:21,  3.27it/s]
 11%|█▏        | 34/300 [00:14<01:14,  3.55it/s]
  5%|▍         | 14/300 [00:14<03:39,  1.30it/s]
  5%|▌         | 15/300 [00:14<03:10,  1.49it/s]
  2%|▏         | 6/300 [00:15<11:36,  2.37s/it]
  3%|▎         | 8/300 [00:14<07:29,  1.54s/it]
 12%|█▏        | 35/300 [00:14<01:08,  3.87it/s]
  3%|▎         | 10/300 [00:15<05:52,  1.21s/it]
  5%|▌         | 15/300 [00:14<03:25,  1.39it/s]
  2%|▏         | 7/300 [0

== Status ==
Current time: 2022-12-11 18:41:19 (running for 00:00:27.10)
Memory usage on this node: 6.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6720682053516308 | Iter 20.000: -0.7032965036729971 | Iter 10.000: -0.7034627677251895
Resources requested: 8.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (8 RUNNING, 8 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|         

 15%|█▌        | 46/300 [00:18<01:37,  2.59it/s]
  5%|▌         | 15/300 [00:19<04:28,  1.06it/s]
 16%|█▌        | 47/300 [00:18<01:23,  3.02it/s]
  7%|▋         | 20/300 [00:18<03:05,  1.51it/s]
  4%|▍         | 13/300 [00:18<05:44,  1.20s/it]
 16%|█▌        | 48/300 [00:19<01:14,  3.37it/s]
  3%|▎         | 9/300 [00:19<08:27,  1.74s/it]
  4%|▎         | 11/300 [00:19<07:35,  1.58s/it]
 16%|█▋        | 49/300 [00:19<01:08,  3.65it/s]
 17%|█▋        | 50/300 [00:19<01:03,  3.93it/s]
  7%|▋         | 21/300 [00:19<03:57,  1.17it/s]
  5%|▍         | 14/300 [00:19<05:19,  1.12s/it]
  7%|▋         | 21/300 [00:19<03:44,  1.24it/s]
 17%|█▋        | 51/300 [00:20<01:28,  2.80it/s]
  7%|▋         | 22/300 [00:20<03:23,  1.37it/s]
  4%|▍         | 12/300 [00:20<06:41,  1.39s/it]
 17%|█▋        | 52/300 [00:20<01:16,  3.25it/s]
  5%|▌         | 16/300 [00:20<05:22,  1.13s/it]
 18%|█▊        | 53/300 [00:20<01:06,  3.73it/s]
  3%|▎         | 10/300 [00:20<07:47,  1.61s/it]
  7%|▋         | 22/3

== Status ==
Current time: 2022-12-11 18:41:24 (running for 00:00:32.16)
Memory usage on this node: 6.4/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6720682053516308 | Iter 20.000: -0.6974383973671744 | Iter 10.000: -0.7034627677251895
Resources requested: 7.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (7 RUNNING, 9 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|         

  9%|▉         | 28/300 [00:24<02:50,  1.60it/s]
  4%|▍         | 12/300 [00:24<08:04,  1.68s/it]
 22%|██▏       | 66/300 [00:24<01:21,  2.86it/s]
  6%|▌         | 18/300 [00:24<04:58,  1.06s/it]
  9%|▉         | 28/300 [00:23<02:50,  1.59it/s]
 10%|▉         | 29/300 [00:24<02:37,  1.72it/s]
 22%|██▏       | 67/300 [00:24<01:11,  3.28it/s]
 23%|██▎       | 68/300 [00:24<01:01,  3.75it/s]
 10%|█         | 30/300 [00:24<02:25,  1.86it/s]
 23%|██▎       | 69/300 [00:24<00:57,  4.02it/s]
 10%|▉         | 29/300 [00:24<02:45,  1.64it/s]
  7%|▋         | 21/300 [00:25<04:48,  1.03s/it]
 23%|██▎       | 70/300 [00:25<00:51,  4.49it/s]
  5%|▌         | 16/300 [00:25<06:30,  1.37s/it]
  6%|▋         | 19/300 [00:25<04:42,  1.00s/it]
 10%|█         | 30/300 [00:24<02:38,  1.70it/s]
  4%|▍         | 13/300 [00:25<07:25,  1.55s/it]
 24%|██▎       | 71/300 [00:25<01:14,  3.08it/s]
  7%|▋         | 22/300 [00:25<04:22,  1.06it/s]
 24%|██▍       | 72/300 [00:25<01:05,  3.46it/s]
 24%|██▍       | 73/

== Status ==
Current time: 2022-12-11 18:41:29 (running for 00:00:37.26)
Memory usage on this node: 6.4/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: None | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6720682053516308 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 7.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (7 RUNNING, 9 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iterati

 28%|██▊       | 84/300 [00:28<00:52,  4.08it/s]
 28%|██▊       | 85/300 [00:29<00:49,  4.32it/s]
 12%|█▏        | 36/300 [00:29<03:29,  1.26it/s]
  9%|▊         | 26/300 [00:29<04:41,  1.03s/it]
  8%|▊         | 23/300 [00:29<04:48,  1.04s/it]
 29%|██▊       | 86/300 [00:29<01:09,  3.10it/s]
 12%|█▏        | 36/300 [00:29<03:13,  1.36it/s]
 12%|█▏        | 37/300 [00:29<02:58,  1.47it/s]
 29%|██▉       | 87/300 [00:29<00:59,  3.58it/s]
 29%|██▉       | 88/300 [00:30<00:51,  4.08it/s]
  9%|▉         | 27/300 [00:30<04:12,  1.08it/s]
 13%|█▎        | 38/300 [00:30<02:39,  1.64it/s]
 30%|██▉       | 89/300 [00:30<00:47,  4.48it/s]
 12%|█▏        | 37/300 [00:29<02:54,  1.51it/s]
 30%|███       | 90/300 [00:30<00:43,  4.84it/s]
  8%|▊         | 24/300 [00:30<04:32,  1.01it/s]
  5%|▌         | 16/300 [00:30<08:03,  1.70s/it]
 13%|█▎        | 39/300 [00:30<02:23,  1.82it/s]
 13%|█▎        | 38/300 [00:30<02:38,  1.65it/s]
  9%|▉         | 28/300 [00:30<03:52,  1.17it/s]
 30%|███       | 91/

== Status ==
Current time: 2022-12-11 18:41:34 (running for 00:00:42.32)
Memory usage on this node: 5.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=12
Bracket: Iter 160.000: None | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6914449345010022 | Iter 20.000: -0.691631869180128 | Iter 10.000: -0.7034627677251895
Resources requested: 4.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (4 RUNNING, 12 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iterat

  6%|▋         | 19/300 [00:34<06:05,  1.30s/it]
 35%|███▌      | 105/300 [00:33<00:36,  5.32it/s]
 11%|█         | 32/300 [00:34<03:39,  1.22it/s]
  9%|▉         | 28/300 [00:34<04:12,  1.08it/s]
 35%|███▌      | 106/300 [00:34<00:54,  3.55it/s]
 36%|███▌      | 107/300 [00:34<00:47,  4.05it/s]
 11%|█         | 33/300 [00:34<03:23,  1.31it/s]
 36%|███▌      | 108/300 [00:34<00:41,  4.59it/s]
  7%|▋         | 20/300 [00:35<05:44,  1.23s/it]
 36%|███▋      | 109/300 [00:34<00:38,  4.92it/s]
 37%|███▋      | 110/300 [00:35<00:36,  5.19it/s]
 10%|▉         | 29/300 [00:34<03:56,  1.15it/s]
 11%|█▏        | 34/300 [00:35<03:09,  1.41it/s]
 37%|███▋      | 111/300 [00:35<00:53,  3.53it/s]
 37%|███▋      | 112/300 [00:35<00:47,  3.95it/s]
 10%|█         | 30/300 [00:35<03:48,  1.18it/s]
 38%|███▊      | 113/300 [00:35<00:42,  4.44it/s]
 12%|█▏        | 35/300 [00:36<03:01,  1.46it/s]
 38%|███▊      | 114/300 [00:36<00:38,  4.84it/s]
 38%|███▊      | 115/300 [00:36<00:35,  5.21it/s]
 39%|███▊

== Status ==
Current time: 2022-12-11 18:41:39 (running for 00:00:47.42)
Memory usage on this node: 5.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=12
Bracket: Iter 160.000: None | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6914449345010022 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 4.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (4 RUNNING, 12 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 42%|████▏     | 127/300 [00:39<00:43,  3.98it/s]
 43%|████▎     | 128/300 [00:39<00:38,  4.44it/s]
 11%|█▏        | 34/300 [00:39<03:42,  1.19it/s]
 43%|████▎     | 129/300 [00:39<00:35,  4.78it/s]
 43%|████▎     | 130/300 [00:39<00:33,  5.03it/s]
  8%|▊         | 24/300 [00:40<05:29,  1.19s/it]
 44%|████▎     | 131/300 [00:40<00:46,  3.61it/s]
 12%|█▏        | 35/300 [00:39<03:35,  1.23it/s]
 44%|████▍     | 132/300 [00:40<00:40,  4.15it/s]
 44%|████▍     | 133/300 [00:40<00:35,  4.67it/s]
 45%|████▍     | 134/300 [00:40<00:32,  5.14it/s]
 45%|████▌     | 135/300 [00:40<00:30,  5.35it/s]
  8%|▊         | 25/300 [00:41<05:12,  1.14s/it]
 45%|████▌     | 136/300 [00:41<00:44,  3.66it/s]
 46%|████▌     | 137/300 [00:41<00:39,  4.09it/s]
 12%|█▏        | 36/300 [00:41<04:16,  1.03it/s]
 46%|████▌     | 138/300 [00:41<00:35,  4.54it/s]
 46%|████▋     | 139/300 [00:41<00:32,  4.96it/s]
 47%|████▋     | 140/300 [00:41<00:29,  5.37it/s]
 12%|█▏        | 37/300 [00:42<03:56,  1.11it/s]
 47%|█

== Status ==
Current time: 2022-12-11 18:41:44 (running for 00:00:52.51)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: None | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6915233250086505 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 13%|█▎        | 40/300 [00:44<03:26,  1.26it/s]
 50%|█████     | 151/300 [00:44<00:40,  3.66it/s]
 51%|█████     | 152/300 [00:44<00:35,  4.15it/s]
 51%|█████     | 153/300 [00:44<00:31,  4.65it/s]
  9%|▉         | 28/300 [00:45<05:22,  1.19s/it]
 51%|█████▏    | 154/300 [00:44<00:28,  5.16it/s]
 52%|█████▏    | 155/300 [00:45<00:26,  5.47it/s]
 52%|█████▏    | 156/300 [00:45<00:39,  3.68it/s]
 52%|█████▏    | 157/300 [00:45<00:33,  4.25it/s]
 14%|█▎        | 41/300 [00:45<04:06,  1.05it/s]
 10%|▉         | 29/300 [00:46<05:06,  1.13s/it]
 53%|█████▎    | 158/300 [00:45<00:29,  4.76it/s]
 53%|█████▎    | 159/300 [00:46<00:27,  5.17it/s]
 53%|█████▎    | 160/300 [00:46<00:25,  5.50it/s]
 14%|█▍        | 42/300 [00:46<03:48,  1.13it/s]
 54%|█████▎    | 161/300 [00:46<00:37,  3.69it/s]
 54%|█████▍    | 162/300 [00:46<00:32,  4.25it/s]
 10%|█         | 30/300 [00:47<04:56,  1.10s/it]
 54%|█████▍    | 163/300 [00:46<00:28,  4.76it/s]
 55%|█████▍    | 164/300 [00:47<00:26,  5.18it/s]
 14%|█

== Status ==
Current time: 2022-12-11 18:41:49 (running for 00:00:57.59)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6914449345010022 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 58%|█████▊    | 174/300 [00:49<00:23,  5.28it/s]
 58%|█████▊    | 175/300 [00:49<00:21,  5.70it/s]
 11%|█         | 32/300 [00:50<05:29,  1.23s/it]
 59%|█████▊    | 176/300 [00:49<00:31,  3.92it/s]
 15%|█▌        | 46/300 [00:49<03:57,  1.07it/s]
 59%|█████▉    | 177/300 [00:50<00:27,  4.51it/s]
 59%|█████▉    | 178/300 [00:50<00:24,  5.03it/s]
 60%|█████▉    | 179/300 [00:50<00:22,  5.29it/s]
 60%|██████    | 180/300 [00:50<00:21,  5.67it/s]
 16%|█▌        | 47/300 [00:50<03:42,  1.14it/s]
 11%|█         | 33/300 [00:51<05:12,  1.17s/it]
 60%|██████    | 181/300 [00:50<00:31,  3.75it/s]
 61%|██████    | 182/300 [00:51<00:27,  4.27it/s]
 61%|██████    | 183/300 [00:51<00:24,  4.78it/s]
 61%|██████▏   | 184/300 [00:51<00:22,  5.21it/s]
 16%|█▌        | 48/300 [00:51<03:30,  1.20it/s]
 62%|██████▏   | 185/300 [00:51<00:20,  5.57it/s]
 11%|█▏        | 34/300 [00:52<04:59,  1.13s/it]
 62%|██████▏   | 186/300 [00:52<00:29,  3.84it/s]
 62%|██████▏   | 187/300 [00:52<00:25,  4.42it/s]
 16%|█

== Status ==
Current time: 2022-12-11 18:41:54 (running for 00:01:02.63)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6914449345010022 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 66%|██████▌   | 197/300 [00:54<00:23,  4.36it/s]
 66%|██████▌   | 198/300 [00:54<00:20,  4.88it/s]
 66%|██████▋   | 199/300 [00:54<00:19,  5.27it/s]
 12%|█▏        | 36/300 [00:55<05:51,  1.33s/it]
 67%|██████▋   | 200/300 [00:54<00:17,  5.64it/s]
 17%|█▋        | 52/300 [00:54<03:37,  1.14it/s]
 67%|██████▋   | 201/300 [00:55<00:26,  3.77it/s]
 67%|██████▋   | 202/300 [00:55<00:22,  4.36it/s]
 68%|██████▊   | 203/300 [00:55<00:19,  4.90it/s]
 18%|█▊        | 53/300 [00:55<03:25,  1.20it/s]
 68%|██████▊   | 204/300 [00:55<00:18,  5.30it/s]
 12%|█▏        | 37/300 [00:56<05:26,  1.24s/it]
 68%|██████▊   | 205/300 [00:55<00:16,  5.65it/s]
 69%|██████▊   | 206/300 [00:56<00:24,  3.84it/s]
 18%|█▊        | 54/300 [00:56<03:18,  1.24it/s]
 69%|██████▉   | 207/300 [00:56<00:21,  4.39it/s]
 69%|██████▉   | 208/300 [00:56<00:19,  4.80it/s]
 70%|██████▉   | 209/300 [00:56<00:17,  5.13it/s]
 13%|█▎        | 38/300 [00:57<05:12,  1.19s/it]
 70%|███████   | 210/300 [00:56<00:16,  5.32it/s]
 18%|█

== Status ==
Current time: 2022-12-11 18:42:00 (running for 00:01:08.11)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 74%|███████▍  | 223/300 [00:59<00:16,  4.54it/s]
 19%|█▉        | 58/300 [00:59<03:28,  1.16it/s]
 75%|███████▍  | 224/300 [01:00<00:15,  4.78it/s]
 75%|███████▌  | 225/300 [01:00<00:14,  5.14it/s]
 75%|███████▌  | 226/300 [01:00<00:20,  3.53it/s]
 20%|█▉        | 59/300 [01:00<03:23,  1.19it/s]
 76%|███████▌  | 227/300 [01:00<00:17,  4.13it/s]
 14%|█▎        | 41/300 [01:01<06:01,  1.40s/it]
 76%|███████▌  | 228/300 [01:01<00:15,  4.70it/s]
 76%|███████▋  | 229/300 [01:01<00:13,  5.08it/s]
 77%|███████▋  | 230/300 [01:01<00:12,  5.44it/s]
 20%|██        | 60/300 [01:01<03:16,  1.22it/s]
 77%|███████▋  | 231/300 [01:01<00:19,  3.60it/s]
 14%|█▍        | 42/300 [01:02<05:35,  1.30s/it]
 77%|███████▋  | 232/300 [01:02<00:16,  4.04it/s]
 78%|███████▊  | 233/300 [01:02<00:14,  4.55it/s]
 78%|███████▊  | 234/300 [01:02<00:13,  5.03it/s]
 78%|███████▊  | 235/300 [01:02<00:12,  5.33it/s]
 20%|██        | 61/300 [01:02<03:53,  1.02it/s]
 79%|███████▊  | 236/300 [01:03<00:17,  3.67it/s]
 14%|█

== Status ==
Current time: 2022-12-11 18:42:05 (running for 00:01:13.19)
Memory usage on this node: 5.4/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 21%|██▏       | 64/300 [01:05<03:14,  1.22it/s]
 15%|█▌        | 45/300 [01:05<04:45,  1.12s/it]
 82%|████████▏ | 246/300 [01:05<00:14,  3.66it/s]
 82%|████████▏ | 247/300 [01:05<00:12,  4.18it/s]
 83%|████████▎ | 248/300 [01:05<00:11,  4.60it/s]
 83%|████████▎ | 249/300 [01:05<00:10,  5.02it/s]
 83%|████████▎ | 250/300 [01:05<00:09,  5.38it/s]
 22%|██▏       | 65/300 [01:05<03:08,  1.24it/s]
 84%|████████▎ | 251/300 [01:06<00:13,  3.72it/s]
 84%|████████▍ | 252/300 [01:06<00:11,  4.30it/s]
 84%|████████▍ | 253/300 [01:06<00:09,  4.79it/s]
 85%|████████▍ | 254/300 [01:06<00:08,  5.16it/s]
 85%|████████▌ | 255/300 [01:06<00:08,  5.59it/s]
 15%|█▌        | 46/300 [01:07<05:46,  1.36s/it]
 22%|██▏       | 66/300 [01:07<03:42,  1.05it/s]
 85%|████████▌ | 256/300 [01:07<00:11,  3.73it/s]
 86%|████████▌ | 257/300 [01:07<00:10,  4.21it/s]
 86%|████████▌ | 258/300 [01:07<00:09,  4.61it/s]
 86%|████████▋ | 259/300 [01:07<00:08,  5.07it/s]
 22%|██▏       | 67/300 [01:07<03:27,  1.12it/s]
 87%|█

[2m[36m(run_one_training pid=1828)[0m Early stopping due to no improvement.
== Status ==
Current time: 2022-12-11 18:42:10 (running for 00:01:18.41)
Memory usage on this node: 5.0/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embe

 23%|██▎       | 70/300 [01:10<02:58,  1.29it/s]
 16%|█▋        | 49/300 [01:10<04:43,  1.13s/it]
 17%|█▋        | 50/300 [01:11<04:30,  1.08s/it]
 24%|██▎       | 71/300 [01:11<03:30,  1.09it/s]
 24%|██▍       | 72/300 [01:11<03:16,  1.16it/s]
 24%|██▍       | 73/300 [01:12<03:09,  1.20it/s]
 17%|█▋        | 51/300 [01:13<05:31,  1.33s/it]
 25%|██▍       | 74/300 [01:13<03:00,  1.25it/s]
 17%|█▋        | 52/300 [01:14<05:07,  1.24s/it]
 25%|██▌       | 75/300 [01:14<02:53,  1.30it/s]


== Status ==
Current time: 2022-12-11 18:42:15 (running for 00:01:23.41)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 18%|█▊        | 53/300 [01:15<04:49,  1.17s/it]
 25%|██▌       | 76/300 [01:15<03:30,  1.07it/s]
 18%|█▊        | 54/300 [01:16<04:35,  1.12s/it]
 26%|██▌       | 77/300 [01:16<03:13,  1.16it/s]
 18%|█▊        | 55/300 [01:17<04:22,  1.07s/it]
 26%|██▌       | 78/300 [01:16<03:01,  1.22it/s]
 26%|██▋       | 79/300 [01:17<02:53,  1.28it/s]
 27%|██▋       | 80/300 [01:18<02:47,  1.31it/s]
 19%|█▊        | 56/300 [01:19<05:16,  1.30s/it]
 27%|██▋       | 81/300 [01:19<03:20,  1.09it/s]
 19%|█▉        | 57/300 [01:20<04:50,  1.20s/it]


== Status ==
Current time: 2022-12-11 18:42:20 (running for 00:01:28.73)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.670258996852984 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   

 27%|██▋       | 82/300 [01:20<03:07,  1.16it/s]
 19%|█▉        | 58/300 [01:21<04:34,  1.13s/it]
 28%|██▊       | 83/300 [01:21<02:56,  1.23it/s]
 20%|█▉        | 59/300 [01:22<04:21,  1.09s/it]
 28%|██▊       | 84/300 [01:21<02:49,  1.28it/s]
 28%|██▊       | 85/300 [01:22<02:42,  1.33it/s]
 20%|██        | 60/300 [01:23<04:13,  1.05s/it]
 29%|██▊       | 86/300 [01:23<03:11,  1.12it/s]
 29%|██▉       | 87/300 [01:24<02:57,  1.20it/s]
 20%|██        | 61/300 [01:24<05:04,  1.27s/it]
 29%|██▉       | 88/300 [01:25<02:47,  1.26it/s]


== Status ==
Current time: 2022-12-11 18:42:26 (running for 00:01:33.87)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.670258996852984 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   

 21%|██        | 62/300 [01:25<04:43,  1.19s/it]
 30%|██▉       | 89/300 [01:25<02:41,  1.31it/s]
 30%|███       | 90/300 [01:26<02:38,  1.33it/s]
 21%|██        | 63/300 [01:26<04:29,  1.14s/it]
 21%|██▏       | 64/300 [01:27<04:15,  1.08s/it]
 30%|███       | 91/300 [01:27<03:06,  1.12it/s]
 22%|██▏       | 65/300 [01:28<04:09,  1.06s/it]
 31%|███       | 92/300 [01:28<02:56,  1.18it/s]
 31%|███       | 93/300 [01:29<02:46,  1.24it/s]
 31%|███▏      | 94/300 [01:29<02:39,  1.29it/s]
 22%|██▏       | 66/300 [01:30<05:01,  1.29s/it]


== Status ==
Current time: 2022-12-11 18:42:31 (running for 00:01:38.95)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.670258996852984 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   

 32%|███▏      | 95/300 [01:30<02:34,  1.33it/s]
 22%|██▏       | 67/300 [01:31<04:37,  1.19s/it]
 32%|███▏      | 96/300 [01:31<03:03,  1.11it/s]
 23%|██▎       | 68/300 [01:32<04:22,  1.13s/it]
 32%|███▏      | 97/300 [01:32<02:51,  1.18it/s]
 23%|██▎       | 69/300 [01:33<04:10,  1.08s/it]
 33%|███▎      | 98/300 [01:33<02:42,  1.25it/s]
 33%|███▎      | 99/300 [01:33<02:35,  1.30it/s]
 23%|██▎       | 70/300 [01:34<04:02,  1.05s/it]
 33%|███▎      | 100/300 [01:34<02:30,  1.33it/s]
 34%|███▎      | 101/300 [01:35<02:58,  1.12it/s]
 24%|██▎       | 71/300 [01:36<04:52,  1.28s/it]


== Status ==
Current time: 2022-12-11 18:42:37 (running for 00:01:44.97)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.670258996852984 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   

 34%|███▍      | 102/300 [01:36<02:47,  1.18it/s]
 24%|██▍       | 72/300 [01:37<04:32,  1.19s/it]
 34%|███▍      | 103/300 [01:37<02:38,  1.25it/s]
 24%|██▍       | 73/300 [01:38<04:16,  1.13s/it]
 35%|███▍      | 104/300 [01:37<02:31,  1.30it/s]
 35%|███▌      | 105/300 [01:38<02:26,  1.33it/s]
 25%|██▍       | 74/300 [01:39<04:05,  1.08s/it]
 25%|██▌       | 75/300 [01:40<03:55,  1.05s/it]
 35%|███▌      | 106/300 [01:39<02:53,  1.12it/s]
 36%|███▌      | 107/300 [01:40<02:42,  1.19it/s]
 36%|███▌      | 108/300 [01:41<02:32,  1.26it/s]
 25%|██▌       | 76/300 [01:42<04:45,  1.27s/it]


== Status ==
Current time: 2022-12-11 18:42:42 (running for 00:01:50.41)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.670258996852984 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   

 36%|███▋      | 109/300 [01:42<02:26,  1.31it/s]
 26%|██▌       | 77/300 [01:43<04:26,  1.19s/it]
 37%|███▋      | 110/300 [01:42<02:22,  1.33it/s]
 26%|██▌       | 78/300 [01:44<04:15,  1.15s/it]
 37%|███▋      | 111/300 [01:44<02:51,  1.10it/s]
 37%|███▋      | 112/300 [01:44<02:42,  1.16it/s]
 26%|██▋       | 79/300 [01:45<04:05,  1.11s/it]
 38%|███▊      | 113/300 [01:45<02:33,  1.22it/s]
 38%|███▊      | 114/300 [01:46<02:26,  1.27it/s]
 38%|███▊      | 115/300 [01:46<02:19,  1.32it/s]


== Status ==
Current time: 2022-12-11 18:42:48 (running for 00:01:55.95)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 39%|███▊      | 116/300 [01:48<02:44,  1.12it/s]
 39%|███▉      | 117/300 [01:48<02:32,  1.20it/s]
 39%|███▉      | 118/300 [01:49<02:22,  1.28it/s]
 40%|███▉      | 119/300 [01:50<02:17,  1.31it/s]
 40%|████      | 120/300 [01:50<02:12,  1.36it/s]
 40%|████      | 121/300 [01:52<02:35,  1.15it/s]


== Status ==
Current time: 2022-12-11 18:42:53 (running for 00:02:01.11)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 41%|████      | 122/300 [01:52<02:25,  1.22it/s]
 41%|████      | 123/300 [01:53<02:17,  1.29it/s]
 41%|████▏     | 124/300 [01:54<02:11,  1.34it/s]
 42%|████▏     | 125/300 [01:54<02:06,  1.38it/s]
 42%|████▏     | 126/300 [01:55<02:31,  1.15it/s]
 42%|████▏     | 127/300 [01:56<02:20,  1.23it/s]
 43%|████▎     | 128/300 [01:57<02:11,  1.31it/s]


== Status ==
Current time: 2022-12-11 18:42:58 (running for 00:02:06.35)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 43%|████▎     | 129/300 [01:57<02:05,  1.36it/s]
 43%|████▎     | 130/300 [01:58<02:02,  1.38it/s]
 44%|████▎     | 131/300 [01:59<02:25,  1.16it/s]
 44%|████▍     | 132/300 [02:00<02:17,  1.22it/s]
 44%|████▍     | 133/300 [02:01<02:09,  1.29it/s]
 45%|████▍     | 134/300 [02:01<02:02,  1.35it/s]
 45%|████▌     | 135/300 [02:02<01:58,  1.39it/s]


== Status ==
Current time: 2022-12-11 18:43:03 (running for 00:02:11.63)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 45%|████▌     | 136/300 [02:03<02:21,  1.16it/s]
 46%|████▌     | 137/300 [02:04<02:12,  1.23it/s]
 46%|████▌     | 138/300 [02:05<02:04,  1.30it/s]
 46%|████▋     | 139/300 [02:05<01:59,  1.35it/s]
 47%|████▋     | 140/300 [02:06<01:55,  1.39it/s]
 47%|████▋     | 141/300 [02:07<02:16,  1.16it/s]


== Status ==
Current time: 2022-12-11 18:43:09 (running for 00:02:16.80)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 47%|████▋     | 142/300 [02:08<02:10,  1.21it/s]
 48%|████▊     | 143/300 [02:09<02:04,  1.26it/s]
 48%|████▊     | 144/300 [02:09<01:59,  1.31it/s]
 48%|████▊     | 145/300 [02:10<01:54,  1.35it/s]
 49%|████▊     | 146/300 [02:11<02:15,  1.14it/s]
 49%|████▉     | 147/300 [02:12<02:04,  1.23it/s]
 49%|████▉     | 148/300 [02:13<01:57,  1.30it/s]


== Status ==
Current time: 2022-12-11 18:43:14 (running for 00:02:22.11)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 50%|████▉     | 149/300 [02:13<01:52,  1.35it/s]
 50%|█████     | 150/300 [02:14<01:48,  1.38it/s]
 50%|█████     | 151/300 [02:15<02:08,  1.16it/s]
 51%|█████     | 152/300 [02:16<02:00,  1.23it/s]
 51%|█████     | 153/300 [02:16<01:52,  1.30it/s]
 51%|█████▏    | 154/300 [02:17<01:47,  1.36it/s]
 52%|█████▏    | 155/300 [02:18<01:43,  1.40it/s]


== Status ==
Current time: 2022-12-11 18:43:19 (running for 00:02:27.33)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6713856992622217 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 52%|█████▏    | 156/300 [02:19<02:03,  1.17it/s]
 52%|█████▏    | 157/300 [02:20<01:53,  1.26it/s]
 53%|█████▎    | 158/300 [02:20<01:47,  1.32it/s]
 53%|█████▎    | 159/300 [02:21<01:43,  1.37it/s]
 53%|█████▎    | 160/300 [02:22<01:39,  1.41it/s]
 54%|█████▎    | 161/300 [02:23<01:57,  1.18it/s]


== Status ==
Current time: 2022-12-11 18:43:24 (running for 00:02:32.36)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 54%|█████▍    | 162/300 [02:23<01:50,  1.24it/s]
 54%|█████▍    | 163/300 [02:24<01:44,  1.31it/s]
 55%|█████▍    | 164/300 [02:25<01:39,  1.36it/s]
 55%|█████▌    | 165/300 [02:25<01:36,  1.40it/s]
 55%|█████▌    | 166/300 [02:27<01:55,  1.17it/s]
 56%|█████▌    | 167/300 [02:27<01:46,  1.25it/s]
 56%|█████▌    | 168/300 [02:28<01:40,  1.32it/s]


== Status ==
Current time: 2022-12-11 18:43:29 (running for 00:02:37.56)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 56%|█████▋    | 169/300 [02:29<01:36,  1.36it/s]
 57%|█████▋    | 170/300 [02:29<01:33,  1.39it/s]
 57%|█████▋    | 171/300 [02:31<01:50,  1.17it/s]
 57%|█████▋    | 172/300 [02:31<01:43,  1.24it/s]
 58%|█████▊    | 173/300 [02:32<01:36,  1.31it/s]
 58%|█████▊    | 174/300 [02:33<01:32,  1.36it/s]
 58%|█████▊    | 175/300 [02:33<01:28,  1.41it/s]


== Status ==
Current time: 2022-12-11 18:43:35 (running for 00:02:42.76)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 59%|█████▊    | 176/300 [02:34<01:45,  1.18it/s]
 59%|█████▉    | 177/300 [02:35<01:37,  1.26it/s]
 59%|█████▉    | 178/300 [02:36<01:32,  1.32it/s]
 60%|█████▉    | 179/300 [02:36<01:29,  1.35it/s]
 60%|██████    | 180/300 [02:37<01:26,  1.38it/s]
 60%|██████    | 181/300 [02:38<01:43,  1.15it/s]


== Status ==
Current time: 2022-12-11 18:43:40 (running for 00:02:47.91)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 61%|██████    | 182/300 [02:39<01:36,  1.22it/s]
 61%|██████    | 183/300 [02:40<01:31,  1.28it/s]
 61%|██████▏   | 184/300 [02:40<01:26,  1.34it/s]
 62%|██████▏   | 185/300 [02:41<01:23,  1.38it/s]
 62%|██████▏   | 186/300 [02:42<01:38,  1.16it/s]
 62%|██████▏   | 187/300 [02:43<01:31,  1.24it/s]
 63%|██████▎   | 188/300 [02:44<01:25,  1.31it/s]


== Status ==
Current time: 2022-12-11 18:43:45 (running for 00:02:53.15)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 63%|██████▎   | 189/300 [02:44<01:22,  1.35it/s]
 63%|██████▎   | 190/300 [02:45<01:19,  1.39it/s]
 64%|██████▎   | 191/300 [02:46<01:32,  1.17it/s]
 64%|██████▍   | 192/300 [02:47<01:27,  1.24it/s]
 64%|██████▍   | 193/300 [02:47<01:22,  1.30it/s]
 65%|██████▍   | 194/300 [02:48<01:18,  1.36it/s]
 65%|██████▌   | 195/300 [02:49<01:14,  1.40it/s]


== Status ==
Current time: 2022-12-11 18:43:50 (running for 00:02:58.36)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 65%|██████▌   | 196/300 [02:50<01:28,  1.17it/s]
 66%|██████▌   | 197/300 [02:51<01:22,  1.25it/s]
 66%|██████▌   | 198/300 [02:51<01:17,  1.32it/s]
 66%|██████▋   | 199/300 [02:52<01:13,  1.37it/s]
 67%|██████▋   | 200/300 [02:53<01:11,  1.40it/s]
 67%|██████▋   | 201/300 [02:54<01:24,  1.17it/s]


== Status ==
Current time: 2022-12-11 18:43:55 (running for 00:03:03.46)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 67%|██████▋   | 202/300 [02:55<01:20,  1.22it/s]
 68%|██████▊   | 203/300 [02:55<01:15,  1.29it/s]
 68%|██████▊   | 204/300 [02:56<01:11,  1.34it/s]
 68%|██████▊   | 205/300 [02:57<01:08,  1.38it/s]
 69%|██████▊   | 206/300 [02:58<01:21,  1.15it/s]
 69%|██████▉   | 207/300 [02:58<01:15,  1.24it/s]
 69%|██████▉   | 208/300 [02:59<01:10,  1.30it/s]


== Status ==
Current time: 2022-12-11 18:44:00 (running for 00:03:08.71)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 70%|██████▉   | 209/300 [03:00<01:07,  1.35it/s]
 70%|███████   | 210/300 [03:00<01:04,  1.39it/s]
 70%|███████   | 211/300 [03:02<01:15,  1.17it/s]
 71%|███████   | 212/300 [03:02<01:11,  1.24it/s]
 71%|███████   | 213/300 [03:03<01:06,  1.30it/s]
 71%|███████▏  | 214/300 [03:04<01:03,  1.36it/s]
 72%|███████▏  | 215/300 [03:04<01:00,  1.40it/s]


== Status ==
Current time: 2022-12-11 18:44:06 (running for 00:03:13.93)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 72%|███████▏  | 216/300 [03:06<01:12,  1.16it/s]
 72%|███████▏  | 217/300 [03:06<01:06,  1.25it/s]
 73%|███████▎  | 218/300 [03:07<01:02,  1.31it/s]
 73%|███████▎  | 219/300 [03:08<01:00,  1.34it/s]
 73%|███████▎  | 220/300 [03:08<00:57,  1.39it/s]
 74%|███████▎  | 221/300 [03:09<01:07,  1.16it/s]


== Status ==
Current time: 2022-12-11 18:44:11 (running for 00:03:19.06)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 74%|███████▍  | 222/300 [03:10<01:03,  1.23it/s]
 74%|███████▍  | 223/300 [03:11<00:59,  1.29it/s]
 75%|███████▍  | 224/300 [03:12<00:56,  1.34it/s]
 75%|███████▌  | 225/300 [03:12<00:54,  1.38it/s]
 75%|███████▌  | 226/300 [03:13<01:03,  1.16it/s]
 76%|███████▌  | 227/300 [03:14<00:58,  1.25it/s]
 76%|███████▌  | 228/300 [03:15<00:55,  1.31it/s]


== Status ==
Current time: 2022-12-11 18:44:16 (running for 00:03:24.29)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 76%|███████▋  | 229/300 [03:15<00:52,  1.35it/s]
 77%|███████▋  | 230/300 [03:16<00:50,  1.38it/s]
 77%|███████▋  | 231/300 [03:17<00:59,  1.16it/s]
 77%|███████▋  | 232/300 [03:18<00:55,  1.22it/s]
 78%|███████▊  | 233/300 [03:19<00:51,  1.29it/s]
 78%|███████▊  | 234/300 [03:19<00:48,  1.35it/s]
 78%|███████▊  | 235/300 [03:20<00:46,  1.39it/s]


== Status ==
Current time: 2022-12-11 18:44:21 (running for 00:03:29.56)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 79%|███████▊  | 236/300 [03:21<00:55,  1.16it/s]
 79%|███████▉  | 237/300 [03:22<00:50,  1.25it/s]
 79%|███████▉  | 238/300 [03:23<00:47,  1.32it/s]
 80%|███████▉  | 239/300 [03:23<00:44,  1.36it/s]
 80%|████████  | 240/300 [03:24<00:42,  1.41it/s]
 80%|████████  | 241/300 [03:25<00:50,  1.17it/s]


== Status ==
Current time: 2022-12-11 18:44:26 (running for 00:03:34.61)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 81%|████████  | 242/300 [03:26<00:46,  1.24it/s]
 81%|████████  | 243/300 [03:26<00:43,  1.30it/s]
 81%|████████▏ | 244/300 [03:27<00:41,  1.36it/s]
 82%|████████▏ | 245/300 [03:28<00:39,  1.40it/s]
 82%|████████▏ | 246/300 [03:29<00:46,  1.17it/s]
 82%|████████▏ | 247/300 [03:30<00:42,  1.25it/s]
 83%|████████▎ | 248/300 [03:30<00:39,  1.32it/s]


== Status ==
Current time: 2022-12-11 18:44:32 (running for 00:03:39.80)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 83%|████████▎ | 249/300 [03:31<00:37,  1.37it/s]
 83%|████████▎ | 250/300 [03:32<00:35,  1.40it/s]
 84%|████████▎ | 251/300 [03:33<00:41,  1.17it/s]
 84%|████████▍ | 252/300 [03:34<00:40,  1.20it/s]
 84%|████████▍ | 253/300 [03:34<00:36,  1.28it/s]
 85%|████████▍ | 254/300 [03:35<00:34,  1.33it/s]
 85%|████████▌ | 255/300 [03:36<00:32,  1.37it/s]


== Status ==
Current time: 2022-12-11 18:44:37 (running for 00:03:45.12)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 85%|████████▌ | 256/300 [03:37<00:37,  1.16it/s]
 86%|████████▌ | 257/300 [03:37<00:34,  1.24it/s]
 86%|████████▌ | 258/300 [03:38<00:32,  1.30it/s]
 86%|████████▋ | 259/300 [03:39<00:30,  1.34it/s]
 87%|████████▋ | 260/300 [03:39<00:28,  1.39it/s]
 87%|████████▋ | 261/300 [03:41<00:33,  1.17it/s]


== Status ==
Current time: 2022-12-11 18:44:42 (running for 00:03:50.20)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 87%|████████▋ | 262/300 [03:41<00:30,  1.24it/s]
 88%|████████▊ | 263/300 [03:42<00:28,  1.30it/s]
 88%|████████▊ | 264/300 [03:43<00:26,  1.36it/s]
 88%|████████▊ | 265/300 [03:43<00:25,  1.40it/s]
 89%|████████▊ | 266/300 [03:45<00:29,  1.17it/s]
 89%|████████▉ | 267/300 [03:45<00:26,  1.24it/s]
 89%|████████▉ | 268/300 [03:46<00:24,  1.30it/s]


== Status ==
Current time: 2022-12-11 18:44:47 (running for 00:03:55.44)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 90%|████████▉ | 269/300 [03:47<00:22,  1.35it/s]
 90%|█████████ | 270/300 [03:47<00:21,  1.39it/s]
 90%|█████████ | 271/300 [03:48<00:24,  1.17it/s]
 91%|█████████ | 272/300 [03:49<00:22,  1.23it/s]
 91%|█████████ | 273/300 [03:50<00:20,  1.29it/s]
 91%|█████████▏| 274/300 [03:50<00:19,  1.35it/s]
 92%|█████████▏| 275/300 [03:51<00:18,  1.39it/s]


== Status ==
Current time: 2022-12-11 18:44:52 (running for 00:04:00.68)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 92%|█████████▏| 276/300 [03:52<00:20,  1.17it/s]
 92%|█████████▏| 277/300 [03:53<00:18,  1.25it/s]
 93%|█████████▎| 278/300 [03:54<00:16,  1.32it/s]
 93%|█████████▎| 279/300 [03:54<00:15,  1.37it/s]
 93%|█████████▎| 280/300 [03:55<00:14,  1.40it/s]
 94%|█████████▎| 281/300 [03:56<00:16,  1.17it/s]


== Status ==
Current time: 2022-12-11 18:44:57 (running for 00:04:05.74)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 94%|█████████▍| 282/300 [03:57<00:14,  1.24it/s]
 94%|█████████▍| 283/300 [03:58<00:13,  1.30it/s]
 95%|█████████▍| 284/300 [03:58<00:11,  1.36it/s]
 95%|█████████▌| 285/300 [03:59<00:10,  1.40it/s]
 95%|█████████▌| 286/300 [04:00<00:12,  1.15it/s]
 96%|█████████▌| 287/300 [04:01<00:10,  1.24it/s]
 96%|█████████▌| 288/300 [04:01<00:09,  1.31it/s]


== Status ==
Current time: 2022-12-11 18:45:03 (running for 00:04:10.99)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 96%|█████████▋| 289/300 [04:02<00:08,  1.34it/s]
 97%|█████████▋| 290/300 [04:03<00:07,  1.37it/s]
 97%|█████████▋| 291/300 [04:04<00:07,  1.16it/s]
 97%|█████████▋| 292/300 [04:05<00:06,  1.23it/s]
 98%|█████████▊| 293/300 [04:05<00:05,  1.30it/s]
 98%|█████████▊| 294/300 [04:06<00:04,  1.36it/s]
 98%|█████████▊| 295/300 [04:07<00:03,  1.40it/s]


== Status ==
Current time: 2022-12-11 18:45:08 (running for 00:04:16.22)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 99%|█████████▊| 296/300 [04:08<00:03,  1.16it/s]
 99%|█████████▉| 297/300 [04:09<00:02,  1.24it/s]
 99%|█████████▉| 298/300 [04:09<00:01,  1.32it/s]
100%|█████████▉| 299/300 [04:10<00:00,  1.36it/s]
2022-12-11 18:45:11,798	INFO tune.py:777 -- Total run time: 259.57 seconds (259.42 seconds for the tuning loop).


== Status ==
Current time: 2022-12-11 18:45:11 (running for 00:04:19.44)
Memory usage on this node: 4.5/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.5509632212730746 | Iter 80.000: -0.6713954514513413 | Iter 40.000: -0.6913749567077805 | Iter 20.000: -0.6916017152058581 | Iter 10.000: -0.7034627677251895
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-40-52
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

  0%|          | 0/300 [00:00<?, ?it/s]m 


Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_8fcbb_00000,2022-12-11_18-45-39,True,,4772f80134bf4addb93fd7b3b01fc1c3,VGI-DEKSPTOP-TIJMEn,20,0.692405,172.29.86.26,4777,True,24.9468,0.668856,24.9468,1670780739,0,,20,8fcbb_00000,0.107618
run_one_training_8fcbb_00001,2022-12-11_18-46-41,True,,f5d4989fe90f49d9917997aa94889042,VGI-DEKSPTOP-TIJMEn,40,0.691313,172.29.86.26,4868,True,80.7011,1.7252,80.7011,1670780801,0,,40,8fcbb_00001,0.239402
run_one_training_8fcbb_00002,2022-12-11_18-46-08,True,,60eefd3e925041bbab7b223c983492af,VGI-DEKSPTOP-TIJMEn,192,0.944763,172.29.86.26,4870,,47.7945,0.342552,47.7945,1670780768,0,,192,8fcbb_00002,0.275795
run_one_training_8fcbb_00003,2022-12-11_18-45-27,True,,3f72c73144224320a19db24aececbd4e,VGI-DEKSPTOP-TIJMEn,10,0.705788,172.29.86.26,4872,True,6.93756,0.419066,6.93756,1670780727,0,,10,8fcbb_00003,0.251698
run_one_training_8fcbb_00004,2022-12-11_18-45-43,True,,a0bdb09c70d547cca218be2b01159f73,VGI-DEKSPTOP-TIJMEn,20,0.693032,172.29.86.26,4874,True,22.1297,0.605264,22.1297,1670780743,0,,20,8fcbb_00004,0.284742
run_one_training_8fcbb_00005,2022-12-11_18-45-32,True,,c9570326c23147b9a2532abb57e325f1,VGI-DEKSPTOP-TIJMEn,10,0.703318,172.29.86.26,4876,True,11.5775,0.633123,11.5775,1670780732,0,,10,8fcbb_00005,0.2909
run_one_training_8fcbb_00006,2022-12-11_18-45-26,True,,cbb8eb2df83e40069784a6fd6f465786,VGI-DEKSPTOP-TIJMEn,10,0.703758,172.29.86.26,4878,True,6.03261,0.355774,6.03261,1670780726,0,,10,8fcbb_00006,0.296273
run_one_training_8fcbb_00007,2022-12-11_18-45-32,True,,c57a32a0704544eba18fe1d73a425d2c,VGI-DEKSPTOP-TIJMEn,20,0.69096,172.29.86.26,4880,True,11.132,0.318069,11.132,1670780732,0,,20,8fcbb_00007,0.25844
run_one_training_8fcbb_00008,2022-12-11_18-45-43,True,,4982ad158aa34488a64418a3ddcc343f,VGI-DEKSPTOP-TIJMEn,20,0.691313,172.29.86.26,4882,True,22.6797,0.664389,22.6797,1670780743,0,,20,8fcbb_00008,0.294485
run_one_training_8fcbb_00009,2022-12-11_18-45-29,True,,321986828d56420eb9f6dc4ecd4fc595,VGI-DEKSPTOP-TIJMEn,10,0.703652,172.29.86.26,4885,True,9.07191,0.505988,9.07191,1670780729,0,,10,8fcbb_00009,0.297911


  0%|          | 1/300 [00:02<13:24,  2.69s/it]


[2m[36m(run_one_training pid=4878)[0m creating model model_config={'batch_size': 8, 'learning_rate': 0.05, 'sgd_momentum': 0.9, 'scheduler_gamma': 0.8, 'model_embedding_size': 16, 'model_attention_heads': 2, 'model_layers': 3, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '10d00c915fb74d0d8e3decc1ab031a85'}}}
[2m[36m(run_one_training pid=4872)[0m creating model model_config={'batch_size': 8, 'learning_rate': 0.05, 'sgd_momentum': 0.8, 'scheduler_gamma': 0.995, 'model_embedding_size': 64, 'model_attention_heads': 1, 'model_layers': 3, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '10d00c915fb74d0d8e3decc1ab031a85'}}}
[2m[36m(run_one_training pid=4885)[0m creating model model_config={'batch_size': 8, 'learning_rate': 0.1, 'sgd_momentum': 0.8, 'scheduler_gamma': 0.5

  1%|          | 2/300 [00:05<14:46,  2.98s/it]
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 


[2m[36m(run_one_training pid=4887)[0m Loading model...


  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 


[2m[36m(run_one_training pid=4889)[0m Loading model...


  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 1/300 [00:01<06:11,  1.24s/it]
  1%|          | 3/300 [00:07<11:00,  2.22s/it]
  0%|          | 1/300 [00:01<06:34,  1.32s/it]
  0%|          | 1/300 [00:01<07:18,  1.47s/it]
  0%|          | 1/300 [00:01<07:04,  1.42s/it]
  0%|          | 1/300 [00:01<07:46,  1.56s/it]
  1%|          | 2/300 [00:01<03:21,  1.48it/s]
  0%|          | 1/300 [00:01<07:34,  1.52s/it]
  1%|          | 2/300 [00:01<03:32,  1.40it/s]
  0%|          | 1/300 [00:01<08:56,  1.80s/it]
  1%|          | 2/300 [00:01<04:20,  1.15it/s]
  0%|          | 1/300 [00:01<08:38,  1.73s/it]
  1%|          | 3/300 [00:01<02:34,  1.93it/s]
  1%|          | 2/300 [00:01<04:07,  1.20it/s]
  1%|          | 3/300 [00:01<02:36,  1.90it/s]
  1%|          | 3/300 [00:02<03:05,  1.60it/s]
  1%|▏         | 4/300 [00:02<02:07,  2.31it/s]
  1%|          | 2/300 [00:02<04:55,  1.01it/s]
  1%|          | 2/300 [00:02<05:15,  1.06s/it]
  1%|▏         | 4/300 [00:02<02:12,  2.24it/s

== Status ==
Current time: 2022-12-11 18:45:24 (running for 00:00:12.36)
Memory usage on this node: 8.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                   |        

  0%|          | 1/300 [00:03<15:21,  3.08s/it]
  2%|▏         | 5/300 [00:03<02:30,  1.96it/s]
  0%|          | 1/300 [00:02<14:33,  2.92s/it]
  2%|▏         | 5/300 [00:03<02:36,  1.88it/s]
  1%|          | 2/300 [00:03<07:48,  1.57s/it]
  2%|▏         | 6/300 [00:03<02:57,  1.65it/s]
  1%|▏         | 4/300 [00:03<03:48,  1.30it/s]
  0%|          | 1/300 [00:03<16:52,  3.39s/it]
  1%|          | 3/300 [00:03<04:47,  1.03it/s]
  1%|▏         | 4/300 [00:03<03:42,  1.33it/s]
  1%|          | 2/300 [00:03<07:31,  1.52s/it]
  1%|▏         | 4/300 [00:03<03:35,  1.37it/s]
  2%|▏         | 6/300 [00:03<03:13,  1.52it/s]
  2%|▏         | 5/300 [00:09<07:54,  1.61s/it]
  2%|▏         | 7/300 [00:03<02:32,  1.92it/s]
  1%|▏         | 4/300 [00:04<03:55,  1.26it/s]
  2%|▏         | 5/300 [00:04<03:33,  1.38it/s]
  2%|▏         | 7/300 [00:04<02:43,  1.79it/s]
  3%|▎         | 8/300 [00:04<02:10,  2.23it/s]
  2%|▏         | 5/300 [00:04<03:30,  1.40it/s]
  1%|          | 3/300 [00:04<05:32,  1.

== Status ==
Current time: 2022-12-11 18:45:29 (running for 00:00:17.55)
Memory usage on this node: 7.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=4
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: -0.6967702914650241
Resources requested: 12.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (12 RUNNING, 4 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |      

  3%|▎         | 9/300 [00:08<03:29,  1.39it/s]
  5%|▍         | 14/300 [00:08<02:10,  2.19it/s]
  6%|▌         | 17/300 [00:08<02:15,  2.09it/s]
  2%|▏         | 7/300 [00:08<05:22,  1.10s/it]
  2%|▏         | 5/300 [00:08<07:12,  1.46s/it]
  2%|▏         | 6/300 [00:08<07:15,  1.48s/it]
  1%|          | 2/300 [00:08<19:43,  3.97s/it]
  5%|▌         | 15/300 [00:08<01:59,  2.38it/s]
  3%|▎         | 8/300 [00:14<07:18,  1.50s/it]
  2%|▏         | 5/300 [00:08<07:05,  1.44s/it]
  6%|▌         | 18/300 [00:08<01:58,  2.37it/s]
  2%|▏         | 6/300 [00:08<06:49,  1.39s/it]
  6%|▋         | 19/300 [00:09<01:50,  2.55it/s]
  3%|▎         | 8/300 [00:08<04:35,  1.06it/s]
  2%|▏         | 7/300 [00:09<06:05,  1.25s/it]
  7%|▋         | 20/300 [00:09<01:36,  2.90it/s]
  4%|▎         | 11/300 [00:09<04:05,  1.18it/s]
  4%|▎         | 11/300 [00:09<04:04,  1.18it/s]
  2%|▏         | 7/300 [00:08<05:40,  1.16s/it]
  3%|▎         | 9/300 [00:09<03:52,  1.25it/s]
  5%|▌         | 16/300 [00:09<0

== Status ==
Current time: 2022-12-11 18:45:34 (running for 00:00:22.57)
Memory usage on this node: 6.4/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: -0.6852264242867628 | Iter 10.000: -0.7033175801237425
Resources requested: 7.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (7 RUNNING, 9 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                        

  3%|▎         | 9/300 [00:13<05:12,  1.07s/it]
 11%|█         | 32/300 [00:13<01:20,  3.34it/s]
  3%|▎         | 9/300 [00:13<05:25,  1.12s/it]
 11%|█         | 33/300 [00:13<01:11,  3.76it/s]
  7%|▋         | 20/300 [00:13<02:02,  2.28it/s]
  4%|▍         | 13/300 [00:19<04:55,  1.03s/it]
 11%|█▏        | 34/300 [00:13<01:04,  4.10it/s]
 12%|█▏        | 35/300 [00:13<00:58,  4.55it/s]
  3%|▎         | 10/300 [00:13<04:40,  1.03it/s]
  3%|▎         | 10/300 [00:13<04:49,  1.00it/s]
  5%|▍         | 14/300 [00:20<04:19,  1.10it/s]
  7%|▋         | 21/300 [00:14<02:25,  1.92it/s]
 12%|█▏        | 36/300 [00:14<01:24,  3.13it/s]
  7%|▋         | 22/300 [00:14<02:08,  2.16it/s]
  2%|▏         | 5/300 [00:14<11:32,  2.35s/it]
 12%|█▏        | 37/300 [00:14<01:12,  3.65it/s]
 13%|█▎        | 38/300 [00:14<01:03,  4.11it/s]
  5%|▌         | 15/300 [00:20<03:52,  1.23it/s]
  8%|▊         | 23/300 [00:14<01:56,  2.38it/s]
 13%|█▎        | 39/300 [00:14<00:57,  4.52it/s]
 13%|█▎        | 40/300

== Status ==
Current time: 2022-12-11 18:45:39 (running for 00:00:27.62)
Memory usage on this node: 6.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6788867823779583 | Iter 20.000: -0.6901815791303912 | Iter 10.000: -0.702228075514237
Resources requested: 6.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (6 RUNNING, 10 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|        

 11%|█         | 32/300 [00:18<01:52,  2.38it/s]
 18%|█▊        | 54/300 [00:18<00:53,  4.63it/s]
 18%|█▊        | 55/300 [00:18<00:49,  4.99it/s]
 11%|█         | 33/300 [00:18<01:45,  2.52it/s]
  2%|▏         | 7/300 [00:18<10:56,  2.24s/it]
 11%|█▏        | 34/300 [00:19<01:38,  2.70it/s]
 19%|█▊        | 56/300 [00:19<01:15,  3.25it/s]
  5%|▌         | 16/300 [00:19<04:29,  1.05it/s]
 12%|█▏        | 35/300 [00:19<01:33,  2.85it/s]
 19%|█▉        | 57/300 [00:19<01:03,  3.84it/s]
 19%|█▉        | 58/300 [00:19<00:55,  4.34it/s]
  5%|▌         | 16/300 [00:19<04:40,  1.01it/s]
 20%|█▉        | 59/300 [00:19<00:50,  4.77it/s]
 20%|██        | 60/300 [00:19<00:47,  5.02it/s]
  6%|▌         | 17/300 [00:19<04:00,  1.17it/s]
 12%|█▏        | 36/300 [00:20<01:58,  2.23it/s]
  6%|▌         | 17/300 [00:20<04:14,  1.11it/s]
 12%|█▏        | 37/300 [00:20<01:45,  2.50it/s]
 20%|██        | 61/300 [00:20<01:11,  3.34it/s]
  6%|▌         | 18/300 [00:20<03:40,  1.28it/s]
 21%|██        | 62/3

== Status ==
Current time: 2022-12-11 18:45:44 (running for 00:00:32.76)
Memory usage on this node: 5.2/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6841449332423508 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.702228075514237
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|        

  3%|▎         | 10/300 [00:23<08:27,  1.75s/it]
 25%|██▌       | 76/300 [00:23<00:58,  3.82it/s]
 26%|██▌       | 77/300 [00:23<00:49,  4.46it/s]
 26%|██▌       | 78/300 [00:24<00:44,  5.03it/s]
 26%|██▋       | 79/300 [00:24<00:39,  5.57it/s]
 27%|██▋       | 80/300 [00:24<00:36,  5.96it/s]
 27%|██▋       | 81/300 [00:24<00:56,  3.88it/s]
 27%|██▋       | 82/300 [00:24<00:48,  4.49it/s]
 28%|██▊       | 83/300 [00:25<00:43,  5.03it/s]
 28%|██▊       | 84/300 [00:25<00:38,  5.57it/s]
 28%|██▊       | 85/300 [00:25<00:37,  5.81it/s]
 29%|██▊       | 86/300 [00:25<00:54,  3.95it/s]
 29%|██▉       | 87/300 [00:26<00:46,  4.59it/s]
  4%|▎         | 11/300 [00:26<09:25,  1.96s/it]
 29%|██▉       | 88/300 [00:26<00:40,  5.18it/s]
 30%|██▉       | 89/300 [00:26<00:36,  5.72it/s]
 30%|███       | 90/300 [00:26<00:34,  6.06it/s]
 30%|███       | 91/300 [00:26<00:53,  3.94it/s]
 31%|███       | 92/300 [00:27<00:46,  4.52it/s]
 31%|███       | 93/300 [00:27<00:40,  5.10it/s]
 31%|███▏      | 94/

== Status ==
Current time: 2022-12-11 18:45:49 (running for 00:00:37.82)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: None | Iter 80.000: -0.6788798967997233 | Iter 40.000: -0.6841449332423508 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.6978077987053741
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 34%|███▎      | 101/300 [00:28<00:51,  3.88it/s]
 34%|███▍      | 102/300 [00:29<00:44,  4.46it/s]
  4%|▍         | 13/300 [00:29<08:16,  1.73s/it]
 34%|███▍      | 103/300 [00:29<00:39,  5.05it/s]
 35%|███▍      | 104/300 [00:29<00:35,  5.56it/s]
 35%|███▌      | 105/300 [00:29<00:32,  5.99it/s]
 35%|███▌      | 106/300 [00:29<00:49,  3.92it/s]
 36%|███▌      | 107/300 [00:30<00:42,  4.56it/s]
 36%|███▌      | 108/300 [00:30<00:37,  5.11it/s]
 36%|███▋      | 109/300 [00:30<00:34,  5.61it/s]
 37%|███▋      | 110/300 [00:30<00:31,  6.06it/s]
  5%|▍         | 14/300 [00:30<07:58,  1.67s/it]
 37%|███▋      | 111/300 [00:30<00:47,  3.94it/s]
 37%|███▋      | 112/300 [00:31<00:41,  4.57it/s]
 38%|███▊      | 113/300 [00:31<00:36,  5.15it/s]
 38%|███▊      | 114/300 [00:31<00:32,  5.66it/s]
 38%|███▊      | 115/300 [00:31<00:30,  6.05it/s]
 39%|███▊      | 116/300 [00:31<00:46,  3.99it/s]
 39%|███▉      | 117/300 [00:32<00:40,  4.48it/s]
 39%|███▉      | 118/300 [00:32<00:35,  5.07it/s]
  

== Status ==
Current time: 2022-12-11 18:45:54 (running for 00:00:42.91)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: None | Iter 80.000: -0.6788798967997233 | Iter 40.000: -0.6841449332423508 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.6978077987053741
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 42%|████▏     | 126/300 [00:33<00:45,  3.86it/s]
 42%|████▏     | 127/300 [00:34<00:38,  4.49it/s]
 43%|████▎     | 128/300 [00:34<00:33,  5.10it/s]
 43%|████▎     | 129/300 [00:34<00:31,  5.44it/s]
 43%|████▎     | 130/300 [00:34<00:29,  5.86it/s]
  5%|▌         | 16/300 [00:34<09:23,  1.98s/it]
 44%|████▎     | 131/300 [00:35<00:47,  3.57it/s]
 44%|████▍     | 132/300 [00:35<00:40,  4.16it/s]
 44%|████▍     | 133/300 [00:35<00:35,  4.75it/s]
 45%|████▍     | 134/300 [00:35<00:31,  5.25it/s]
 45%|████▌     | 135/300 [00:35<00:28,  5.71it/s]
 45%|████▌     | 136/300 [00:36<00:42,  3.86it/s]
 46%|████▌     | 137/300 [00:36<00:36,  4.49it/s]
 46%|████▌     | 138/300 [00:36<00:31,  5.09it/s]
 46%|████▋     | 139/300 [00:36<00:28,  5.60it/s]
 47%|████▋     | 140/300 [00:36<00:26,  6.05it/s]
  6%|▌         | 17/300 [00:36<09:02,  1.92s/it]
 47%|████▋     | 141/300 [00:37<00:40,  3.96it/s]
 47%|████▋     | 142/300 [00:37<00:34,  4.57it/s]
 48%|████▊     | 143/300 [00:37<00:30,  5.16it/s]
 4

== Status ==
Current time: 2022-12-11 18:45:59 (running for 00:00:47.96)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: None | Iter 80.000: -0.6788798967997233 | Iter 40.000: -0.6841449332423508 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.6978077987053741
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 50%|█████     | 151/300 [00:39<00:38,  3.87it/s]
 51%|█████     | 152/300 [00:39<00:33,  4.42it/s]
 51%|█████     | 153/300 [00:39<00:29,  5.01it/s]
 51%|█████▏    | 154/300 [00:39<00:26,  5.50it/s]
 52%|█████▏    | 155/300 [00:39<00:24,  5.90it/s]
 52%|█████▏    | 156/300 [00:40<00:37,  3.86it/s]
 52%|█████▏    | 157/300 [00:40<00:31,  4.50it/s]
  6%|▋         | 19/300 [00:40<08:40,  1.85s/it]
 53%|█████▎    | 158/300 [00:40<00:28,  5.03it/s]
 53%|█████▎    | 159/300 [00:40<00:25,  5.47it/s]
 53%|█████▎    | 160/300 [00:40<00:24,  5.78it/s]
 54%|█████▎    | 161/300 [00:41<00:36,  3.84it/s]
 54%|█████▍    | 162/300 [00:41<00:31,  4.44it/s]
 54%|█████▍    | 163/300 [00:41<00:27,  4.94it/s]
 55%|█████▍    | 164/300 [00:41<00:24,  5.49it/s]
 55%|█████▌    | 165/300 [00:41<00:22,  5.94it/s]
  7%|▋         | 20/300 [00:42<08:34,  1.84s/it]
 55%|█████▌    | 166/300 [00:42<00:34,  3.94it/s]
 56%|█████▌    | 167/300 [00:42<00:29,  4.58it/s]
 56%|█████▌    | 168/300 [00:42<00:26,  5.07it/s]
 5

== Status ==
Current time: 2022-12-11 18:46:04 (running for 00:00:53.00)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6788798967997233 | Iter 80.000: -0.6788798967997233 | Iter 40.000: -0.6841449332423508 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.6978077987053741
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 58%|█████▊    | 174/300 [00:43<00:23,  5.33it/s]
 58%|█████▊    | 175/300 [00:43<00:21,  5.73it/s]
 59%|█████▊    | 176/300 [00:44<00:31,  3.89it/s]
 59%|█████▉    | 177/300 [00:44<00:27,  4.52it/s]
 59%|█████▉    | 178/300 [00:44<00:23,  5.11it/s]
 60%|█████▉    | 179/300 [00:44<00:21,  5.60it/s]
 60%|██████    | 180/300 [00:44<00:19,  6.05it/s]
  7%|▋         | 21/300 [00:44<09:50,  2.12s/it]
 60%|██████    | 181/300 [00:45<00:30,  3.85it/s]
 61%|██████    | 182/300 [00:45<00:26,  4.47it/s]
 61%|██████    | 183/300 [00:45<00:22,  5.09it/s]
 61%|██████▏   | 184/300 [00:45<00:20,  5.63it/s]
 62%|██████▏   | 185/300 [00:45<00:19,  5.90it/s]
 62%|██████▏   | 186/300 [00:46<00:29,  3.82it/s]
 62%|██████▏   | 187/300 [00:46<00:25,  4.37it/s]
 63%|██████▎   | 188/300 [00:46<00:22,  4.92it/s]
  7%|▋         | 22/300 [00:46<09:20,  2.02s/it]
 63%|██████▎   | 189/300 [00:46<00:20,  5.38it/s]
 63%|██████▎   | 190/300 [00:47<00:19,  5.73it/s]


[2m[36m(run_one_training pid=4870)[0m Early stopping due to no improvement.


 64%|██████▎   | 191/300 [00:47<00:27,  4.02it/s]
  8%|▊         | 23/300 [00:48<08:58,  1.94s/it]


== Status ==
Current time: 2022-12-11 18:46:11 (running for 00:00:59.56)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6788798967997233 | Iter 80.000: -0.6788798967997233 | Iter 40.000: -0.6841449332423508 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.6978077987053741
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

  8%|▊         | 24/300 [00:50<08:40,  1.89s/it]
  8%|▊         | 25/300 [00:51<08:29,  1.85s/it]
  9%|▊         | 26/300 [00:54<09:33,  2.09s/it]


== Status ==
Current time: 2022-12-11 18:46:17 (running for 00:01:05.75)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6788798967997233 | Iter 80.000: -0.6788798967997233 | Iter 40.000: -0.6841449332423508 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.6978077987053741
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

  9%|▉         | 27/300 [00:56<09:04,  1.99s/it]
  9%|▉         | 28/300 [00:58<08:41,  1.92s/it]
 10%|▉         | 29/300 [00:59<08:21,  1.85s/it]


== Status ==
Current time: 2022-12-11 18:46:22 (running for 00:01:10.88)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6788798967997233 | Iter 80.000: -0.6788798967997233 | Iter 40.000: -0.6841449332423508 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.6978077987053741
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 10%|█         | 30/300 [01:01<08:07,  1.80s/it]
 10%|█         | 31/300 [01:04<09:08,  2.04s/it]
 11%|█         | 32/300 [01:05<08:40,  1.94s/it]


== Status ==
Current time: 2022-12-11 18:46:28 (running for 00:01:16.88)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6788798967997233 | Iter 80.000: -0.6788798967997233 | Iter 40.000: -0.6841449332423508 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.6978077987053741
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 11%|█         | 33/300 [01:07<08:18,  1.87s/it]
 11%|█▏        | 34/300 [01:09<08:03,  1.82s/it]
 12%|█▏        | 35/300 [01:10<07:53,  1.79s/it]


== Status ==
Current time: 2022-12-11 18:46:33 (running for 00:01:22.00)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6788798967997233 | Iter 80.000: -0.6788798967997233 | Iter 40.000: -0.6841449332423508 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.6978077987053741
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 12%|█▏        | 36/300 [01:13<08:56,  2.03s/it]
 12%|█▏        | 37/300 [01:15<08:28,  1.93s/it]
 13%|█▎        | 38/300 [01:16<08:10,  1.87s/it]


== Status ==
Current time: 2022-12-11 18:46:39 (running for 00:01:28.03)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.6788798967997233 | Iter 80.000: -0.6788798967997233 | Iter 40.000: -0.6841449332423508 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.6978077987053741
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 13%|█▎        | 39/300 [01:18<07:55,  1.82s/it]
2022-12-11 18:46:41,759	INFO tune.py:777 -- Total run time: 89.89 seconds (89.76 seconds for the tuning loop).


== Status ==
Current time: 2022-12-11 18:46:41 (running for 00:01:29.77)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.6788798967997233 | Iter 80.000: -0.6788798967997233 | Iter 40.000: -0.6894030841067433 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.6978077987053741
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-45-11
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

  0%|          | 0/300 [00:00<?, ?it/s]m 


Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_c56ac_00000,2022-12-11_18-46-58,True,,20eed3d834444df6ba582eb855585f44,VGI-DEKSPTOP-TIJMEn,10,0.703714,172.29.86.26,6823,True,13.9795,0.901052,13.9795,1670780818,0,,10,c56ac_00000,0.106333
run_one_training_c56ac_00001,2022-12-11_18-47-14,True,,fb03fae1054c4f8db5537ba43765b6ca,VGI-DEKSPTOP-TIJMEn,20,0.691313,172.29.86.26,6924,True,24.9081,0.668083,24.9081,1670780834,0,,20,c56ac_00001,0.277084
run_one_training_c56ac_00002,2022-12-11_18-47-29,True,,1b075359d7a64191a277d19eaf220d0a,VGI-DEKSPTOP-TIJMEn,162,0.145144,172.29.86.26,6926,,40.1815,0.302351,40.1815,1670780849,0,,162,c56ac_00002,0.292521
run_one_training_c56ac_00003,2022-12-11_18-47-04,True,,8503bfacf6644348b16f5602b044287d,VGI-DEKSPTOP-TIJMEn,10,0.710241,172.29.86.26,6928,True,14.8076,0.850543,14.8076,1670780824,0,,10,c56ac_00003,0.242852
run_one_training_c56ac_00004,2022-12-11_18-47-10,True,,e470f77774da41c4b60861c721af2a04,VGI-DEKSPTOP-TIJMEn,20,0.691744,172.29.86.26,6930,True,20.9198,0.540754,20.9198,1670780830,0,,20,c56ac_00004,0.340234
run_one_training_c56ac_00005,2022-12-11_18-47-21,True,,730181dfa6124e8e98b9c9ac3c6e81ee,VGI-DEKSPTOP-TIJMEn,20,0.693688,172.29.86.26,6932,True,32.1919,0.90465,32.1919,1670780841,0,,20,c56ac_00005,0.247017
run_one_training_c56ac_00006,2022-12-11_18-47-00,True,,442d11d149bf4ffda04d50dbf181e006,VGI-DEKSPTOP-TIJMEn,20,0.646658,172.29.86.26,6934,True,10.985,0.311344,10.985,1670780820,0,,20,c56ac_00006,0.241475
run_one_training_c56ac_00007,2022-12-11_18-47-04,True,,a5446a8aa48d4835ac7b21829be247b9,VGI-DEKSPTOP-TIJMEn,10,0.704,172.29.86.26,6937,True,14.6112,0.820253,14.6112,1670780824,0,,10,c56ac_00007,0.341208
run_one_training_c56ac_00008,2022-12-11_18-47-23,True,,a173a4c808504344a4ad7c7fac0c24ba,VGI-DEKSPTOP-TIJMEn,20,0.696398,172.29.86.26,6939,True,33.8731,0.880046,33.8731,1670780843,0,,20,c56ac_00008,0.246045
run_one_training_c56ac_00009,2022-12-11_18-47-14,True,,5df9c731347b45cd92ef172a177d7fe0,VGI-DEKSPTOP-TIJMEn,10,0.7052,172.29.86.26,6941,True,24.5566,1.43412,24.5566,1670780834,0,,10,c56ac_00009,0.227249


  0%|          | 1/300 [00:02<10:46,  2.16s/it]


[2m[36m(run_one_training pid=6924)[0m creating model model_config={'batch_size': 2, 'learning_rate': 0.05, 'sgd_momentum': 0.9, 'scheduler_gamma': 0.8, 'model_embedding_size': 8, 'model_attention_heads': 1, 'model_layers': 5, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '1cb754b415df4156a1f5fa3ba15cac3c'}}}
[2m[36m(run_one_training pid=6932)[0m creating model model_config={'batch_size': 2, 'learning_rate': 0.001, 'sgd_momentum': 0.8, 'scheduler_gamma': 1, 'model_embedding_size': 32, 'model_attention_heads': 3, 'model_layers': 7, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '1cb754b415df4156a1f5fa3ba15cac3c'}}}
[2m[36m(run_one_training pid=6941)[0m creating model model_config={'batch_size': 8, 'learning_rate': 0.05, 'sgd_momentum': 0.9, 'scheduler_gamma': 1, 'mo

  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 


[2m[36m(run_one_training pid=6926)[0m Loading model...
[2m[36m(run_one_training pid=6939)[0m Loading model...
[2m[36m(run_one_training pid=6934)[0m Loading model...
[2m[36m(run_one_training pid=6943)[0m Loading model...
[2m[36m(run_one_training pid=6928)[0m Loading model...
[2m[36m(run_one_training pid=6949)[0m Loading model...
[2m[36m(run_one_training pid=6950)[0m Loading model...
[2m[36m(run_one_training pid=6930)[0m creating model model_config={'batch_size': 2, 'learning_rate': 0.001, 'sgd_momentum': 0.5, 'scheduler_gamma': 0.995, 'model_embedding_size': 8, 'model_attention_heads': 3, 'model_layers': 3, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '1cb754b415df4156a1f5fa3ba15cac3c'}}}
[2m[36m(run_one_training pid=6947)[0m creating model model_config={'batch_size': 2, 'learning_rate': 0.01, 'sgd_momentum': 0.9, 'scheduler_gamma': 0.8, 'model_embeddi

  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 


[2m[36m(run_one_training pid=6930)[0m Loading model...
[2m[36m(run_one_training pid=6947)[0m Loading model...
[2m[36m(run_one_training pid=6937)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.001, 'sgd_momentum': 0.8, 'scheduler_gamma': 1, 'model_embedding_size': 16, 'model_attention_heads': 2, 'model_layers': 7, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '1cb754b415df4156a1f5fa3ba15cac3c'}}}
== Status ==
Current time: 2022-12-11 18:46:48 (running for 00:00:06.39)
Memory usage on this node: 7.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-46-41
Number of trials: 16/16 (16 RUNNING)
+--

  1%|          | 2/300 [00:05<13:11,  2.66s/it]
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 


[2m[36m(run_one_training pid=6952)[0m Loading model...


  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 1/300 [00:01<05:58,  1.20s/it]
  1%|          | 3/300 [00:06<09:11,  1.86s/it]
  1%|          | 2/300 [00:01<03:14,  1.54it/s]
  0%|          | 1/300 [00:01<07:06,  1.42s/it]
  1%|          | 3/300 [00:01<02:19,  2.13it/s]
  1%|          | 2/300 [00:01<03:52,  1.28it/s]
  1%|▏         | 4/300 [00:01<01:54,  2.58it/s]
  0%|          | 1/300 [00:01<09:21,  1.88s/it]
  1%|▏         | 4/300 [00:06<07:09,  1.45s/it]
  2%|▏         | 5/300 [00:02<01:36,  3.05it/s]
  1%|          | 3/300 [00:02<02:49,  1.76it/s]
  0%|          | 1/300 [00:02<10:18,  2.07s/it]
  1%|          | 2/300 [00:02<05:24,  1.09s/it]
  1%|▏         | 4/300 [00:02<02:28,  1.99it/s]
  0%|          | 1/300 [00:02<11:17,  2.27s/it]
  0%|          | 1/300 [00:02<14:21,  2.88s/it]
  2%|▏         | 5/300 [00:07<05:56,  1.21s/it]
  2%|▏         | 5/300 [00:02<02:15,  2.17it/s]
  1%|          | 3/300 [00:02<04:08,  1.19it/s]


== Status ==
Current time: 2022-12-11 18:46:53 (running for 00:00:11.40)
Memory usage on this node: 9.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-46-41
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                   |        

  2%|▏         | 6/300 [00:03<02:41,  1.82it/s]
  0%|          | 1/300 [00:03<15:54,  3.19s/it]
  1%|          | 2/300 [00:03<07:00,  1.41s/it]
  2%|▏         | 7/300 [00:03<02:08,  2.28it/s]
  0%|          | 1/300 [00:03<17:23,  3.49s/it]
  0%|          | 1/300 [00:03<17:37,  3.54s/it]
  3%|▎         | 8/300 [00:03<01:48,  2.70it/s]
  1%|▏         | 4/300 [00:03<03:36,  1.37it/s]
  1%|          | 2/300 [00:03<08:05,  1.63s/it]
  0%|          | 1/300 [00:03<15:52,  3.19s/it]
  0%|          | 1/300 [00:02<14:37,  2.94s/it]
  3%|▎         | 9/300 [00:03<01:34,  3.09it/s]
  1%|          | 2/300 [00:04<09:25,  1.90s/it]
  1%|          | 3/300 [00:03<05:46,  1.17s/it]
  3%|▎         | 10/300 [00:04<01:27,  3.30it/s]
  2%|▏         | 5/300 [00:04<03:21,  1.46it/s]
  2%|▏         | 6/300 [00:04<03:34,  1.37it/s]
  0%|          | 1/300 [00:04<21:25,  4.30s/it]
  1%|          | 2/300 [00:04<09:43,  1.96s/it]
  2%|▏         | 7/300 [00:04<02:55,  1.67it/s]
  0%|          | 1/300 [00:04<22:33,  4

== Status ==
Current time: 2022-12-11 18:46:58 (running for 00:00:16.42)
Memory usage on this node: 9.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: -0.6705146109064419
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-46-41
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |             

  7%|▋         | 20/300 [00:08<01:31,  3.07it/s]
  5%|▌         | 15/300 [00:08<02:03,  2.31it/s]
  2%|▏         | 6/300 [00:08<07:11,  1.47s/it]
  1%|          | 3/300 [00:08<12:50,  2.59s/it]
  1%|          | 2/300 [00:08<20:12,  4.07s/it]
  2%|▏         | 6/300 [00:08<07:41,  1.57s/it]
  1%|          | 3/300 [00:09<13:50,  2.79s/it]
  2%|▏         | 7/300 [00:09<06:20,  1.30s/it]
  7%|▋         | 21/300 [00:09<02:29,  1.86it/s]
  7%|▋         | 22/300 [00:09<02:02,  2.28it/s]
  5%|▌         | 16/300 [00:09<03:08,  1.51it/s]
  2%|▏         | 6/300 [00:08<07:37,  1.56s/it]
  8%|▊         | 23/300 [00:09<01:43,  2.69it/s]
  6%|▌         | 17/300 [00:09<02:37,  1.80it/s]
  8%|▊         | 24/300 [00:09<01:30,  3.04it/s]
  6%|▌         | 18/300 [00:10<02:19,  2.02it/s]
  2%|▏         | 7/300 [00:09<06:42,  1.38s/it]
  3%|▎         | 8/300 [00:09<05:41,  1.17s/it]
  1%|▏         | 4/300 [00:10<11:24,  2.31s/it]
  8%|▊         | 25/300 [00:10<01:22,  3.33it/s]
  6%|▋         | 19/300 [00:10

== Status ==
Current time: 2022-12-11 18:47:03 (running for 00:00:21.43)
Memory usage on this node: 8.0/23.5 GiB 
Using AsyncHyperBand: num_stopped=4
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: -0.6458770136038463 | Iter 10.000: -0.6918755240427951
Resources requested: 12.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-46-41
Number of trials: 16/16 (12 RUNNING, 4 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                      

  2%|▏         | 5/300 [00:13<11:31,  2.34s/it]
  3%|▎         | 8/300 [00:13<07:36,  1.56s/it]
 11%|█         | 32/300 [00:13<01:53,  2.36it/s]
  3%|▎         | 9/300 [00:13<06:10,  1.27s/it]
 11%|█         | 33/300 [00:13<01:36,  2.78it/s]
  3%|▎         | 9/300 [00:13<05:56,  1.22s/it]
 11%|█▏        | 34/300 [00:13<01:22,  3.21it/s]
  4%|▎         | 11/300 [00:13<06:19,  1.31s/it]
  3%|▎         | 9/300 [00:13<06:05,  1.26s/it]
  3%|▎         | 9/300 [00:13<05:57,  1.23s/it]
 12%|█▏        | 35/300 [00:14<01:17,  3.41it/s]
  3%|▎         | 9/300 [00:14<06:55,  1.43s/it]
  4%|▍         | 12/300 [00:14<05:33,  1.16s/it]
 12%|█▏        | 36/300 [00:14<01:57,  2.25it/s]
  4%|▎         | 11/300 [00:14<06:23,  1.33s/it]
  2%|▏         | 5/300 [00:15<12:14,  2.49s/it]
 12%|█▏        | 37/300 [00:14<01:32,  2.85it/s]
  1%|          | 3/300 [00:14<22:10,  4.48s/it]
 13%|█▎        | 38/300 [00:15<01:16,  3.44it/s]
  3%|▎         | 10/300 [00:15<06:00,  1.24s/it]
 13%|█▎        | 39/300 [00:1

== Status ==
Current time: 2022-12-11 18:47:08 (running for 00:00:26.70)
Memory usage on this node: 6.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6954371370375156 | Iter 20.000: -0.6458770136038463 | Iter 10.000: -0.7035404841105144
Resources requested: 8.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-46-41
Number of trials: 16/16 (8 RUNNING, 8 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|         

 18%|█▊        | 53/300 [00:18<00:57,  4.33it/s]
  6%|▌         | 18/300 [00:18<03:25,  1.37it/s]
 18%|█▊        | 54/300 [00:18<00:50,  4.87it/s]
  3%|▎         | 9/300 [00:19<07:47,  1.61s/it]
  3%|▎         | 8/300 [00:18<09:15,  1.90s/it]
 18%|█▊        | 55/300 [00:18<00:46,  5.28it/s]
  6%|▌         | 17/300 [00:18<03:58,  1.19it/s]
  5%|▍         | 14/300 [00:19<04:40,  1.02it/s]
  6%|▋         | 19/300 [00:19<03:12,  1.46it/s]
  2%|▏         | 7/300 [00:19<11:25,  2.34s/it]
 19%|█▊        | 56/300 [00:19<01:19,  3.08it/s]
 19%|█▉        | 57/300 [00:19<01:05,  3.71it/s]
  6%|▌         | 18/300 [00:19<03:34,  1.31it/s]
 19%|█▉        | 58/300 [00:19<00:55,  4.34it/s]
 20%|█▉        | 59/300 [00:20<00:48,  4.96it/s]
  2%|▏         | 5/300 [00:19<16:13,  3.30s/it]
  5%|▌         | 15/300 [00:20<04:22,  1.08it/s]
  3%|▎         | 10/300 [00:20<07:10,  1.48s/it]
 20%|██        | 60/300 [00:20<00:43,  5.51it/s]
  3%|▎         | 9/300 [00:20<08:22,  1.73s/it]
  6%|▋         | 19/300 [

== Status ==
Current time: 2022-12-11 18:47:13 (running for 00:00:31.70)
Memory usage on this node: 6.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6954371370375156 | Iter 20.000: -0.6689854077994823 | Iter 10.000: -0.7004427326998364
Resources requested: 6.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-46-41
Number of trials: 16/16 (6 RUNNING, 10 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|       

  6%|▋         | 19/300 [00:23<03:58,  1.18it/s]
  4%|▎         | 11/300 [00:23<08:42,  1.81s/it]
 25%|██▌       | 76/300 [00:24<01:04,  3.46it/s]
  2%|▏         | 6/300 [00:23<17:22,  3.55s/it]
 26%|██▌       | 77/300 [00:24<00:54,  4.05it/s]
 26%|██▌       | 78/300 [00:24<00:46,  4.75it/s]
  4%|▍         | 13/300 [00:24<06:31,  1.36s/it]
 26%|██▋       | 79/300 [00:24<00:40,  5.49it/s]
 27%|██▋       | 80/300 [00:24<00:35,  6.12it/s]
  4%|▍         | 12/300 [00:24<07:35,  1.58s/it]
 27%|██▋       | 81/300 [00:25<00:54,  4.02it/s]
 27%|██▋       | 82/300 [00:25<00:46,  4.73it/s]
  5%|▍         | 14/300 [00:25<05:46,  1.21s/it]
 28%|██▊       | 83/300 [00:25<00:39,  5.55it/s]
 28%|██▊       | 84/300 [00:25<00:34,  6.28it/s]
 28%|██▊       | 85/300 [00:25<00:30,  6.95it/s]
  4%|▍         | 13/300 [00:25<06:42,  1.40s/it]
 29%|██▊       | 86/300 [00:25<00:52,  4.08it/s]
 29%|██▉       | 87/300 [00:26<00:43,  4.86it/s]
 29%|██▉       | 88/300 [00:26<00:39,  5.34it/s]
  2%|▏         | 7/30

== Status ==
Current time: 2022-12-11 18:47:18 (running for 00:00:37.07)
Memory usage on this node: 5.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=12
Bracket: Iter 160.000: None | Iter 80.000: -0.7036504646142324 | Iter 40.000: -0.6954371370375156 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.7033671879520019
Resources requested: 4.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-46-41
Number of trials: 16/16 (4 RUNNING, 12 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

  6%|▌         | 17/300 [00:29<05:41,  1.21s/it]
 34%|███▍      | 103/300 [00:29<00:34,  5.76it/s]
 35%|███▍      | 104/300 [00:29<00:31,  6.19it/s]
 35%|███▌      | 105/300 [00:29<00:31,  6.19it/s]
 35%|███▌      | 106/300 [00:29<00:49,  3.90it/s]
 36%|███▌      | 107/300 [00:29<00:41,  4.70it/s]
  5%|▌         | 16/300 [00:29<06:49,  1.44s/it]
  6%|▌         | 18/300 [00:30<05:24,  1.15s/it]
 36%|███▌      | 108/300 [00:30<00:34,  5.51it/s]
 36%|███▋      | 109/300 [00:30<00:30,  6.32it/s]
 37%|███▋      | 110/300 [00:30<00:26,  7.07it/s]
  3%|▎         | 9/300 [00:29<12:18,  2.54s/it]
 37%|███▋      | 111/300 [00:30<00:44,  4.25it/s]
 37%|███▋      | 112/300 [00:30<00:36,  5.10it/s]
  6%|▌         | 17/300 [00:30<06:03,  1.29s/it]
  6%|▋         | 19/300 [00:30<04:58,  1.06s/it]
 38%|███▊      | 113/300 [00:30<00:32,  5.83it/s]
 38%|███▊      | 114/300 [00:30<00:28,  6.57it/s]
 38%|███▊      | 115/300 [00:31<00:26,  7.09it/s]
 39%|███▊      | 116/300 [00:31<00:43,  4.22it/s]
 39%|██

== Status ==
Current time: 2022-12-11 18:47:24 (running for 00:00:42.25)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: None | Iter 80.000: -0.7036504646142324 | Iter 40.000: -0.6954371370375156 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.7004427326998364
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-46-41
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 44%|████▍     | 133/300 [00:34<00:26,  6.33it/s]
 45%|████▌     | 135/300 [00:34<00:22,  7.38it/s]
 45%|████▌     | 136/300 [00:34<00:31,  5.29it/s]
 46%|████▌     | 138/300 [00:35<00:24,  6.55it/s]
 47%|████▋     | 140/300 [00:35<00:21,  7.52it/s]
  4%|▎         | 11/300 [00:34<12:08,  2.52s/it]
 47%|████▋     | 141/300 [00:35<00:29,  5.39it/s]
 48%|████▊     | 143/300 [00:35<00:23,  6.55it/s]
 48%|████▊     | 145/300 [00:35<00:20,  7.53it/s]
 49%|████▊     | 146/300 [00:37<01:03,  2.43it/s]
  4%|▍         | 12/300 [00:37<11:50,  2.47s/it]
 49%|████▉     | 147/300 [00:37<00:52,  2.91it/s]
 50%|████▉     | 149/300 [00:37<00:37,  4.01it/s]
 50%|█████     | 151/300 [00:38<00:36,  4.04it/s]
 51%|█████     | 153/300 [00:38<00:28,  5.10it/s]
 52%|█████▏    | 155/300 [00:38<00:23,  6.14it/s]


== Status ==
Current time: 2022-12-11 18:47:29 (running for 00:00:47.35)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: None | Iter 80.000: -0.7036504646142324 | Iter 40.000: -0.6954371370375156 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.7004427326998364
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-46-41
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 52%|█████▏    | 156/300 [00:39<00:29,  4.85it/s]
 53%|█████▎    | 158/300 [00:39<00:23,  5.96it/s]
  4%|▍         | 13/300 [00:38<10:42,  2.24s/it]
 53%|█████▎    | 159/300 [00:39<00:22,  6.41it/s]
 54%|█████▎    | 161/300 [00:39<00:34,  4.04it/s]


[2m[36m(run_one_training pid=6926)[0m Early stopping due to no improvement.


  5%|▍         | 14/300 [00:40<09:52,  2.07s/it]


== Status ==
Current time: 2022-12-11 18:47:36 (running for 00:00:54.62)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.7036504646142324 | Iter 80.000: -0.7036504646142324 | Iter 40.000: -0.6954371370375156 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.7004427326998364
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-46-41
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

  5%|▌         | 15/300 [00:46<14:47,  3.11s/it]
  5%|▌         | 16/300 [00:48<14:13,  3.00s/it]
  6%|▌         | 17/300 [00:50<12:17,  2.61s/it]


== Status ==
Current time: 2022-12-11 18:47:42 (running for 00:01:00.79)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.7036504646142324 | Iter 80.000: -0.7036504646142324 | Iter 40.000: -0.6954371370375156 | Iter 20.000: -0.6913125067949295 | Iter 10.000: -0.7004427326998364
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-46-41
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

  6%|▌         | 18/300 [00:52<10:59,  2.34s/it]
  6%|▋         | 19/300 [00:53<10:04,  2.15s/it]
2022-12-11 18:47:46,191	INFO tune.py:777 -- Total run time: 64.36 seconds (64.23 seconds for the tuning loop).


== Status ==
Current time: 2022-12-11 18:47:46 (running for 00:01:04.24)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.7036504646142324 | Iter 80.000: -0.7036504646142324 | Iter 40.000: -0.6954371370375156 | Iter 20.000: -0.6913322613885005 | Iter 10.000: -0.7004427326998364
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-46-41
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

  0%|          | 0/300 [00:00<?, ?it/s]m 


Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_ebd16_00000,2022-12-11_18-48-05,True,,ddac4e6c36174acda7a15b8579994359,VGI-DEKSPTOP-TIJMEn,10,0.703275,172.29.86.26,8730,True,16.3813,0.92328,16.3813,1670780885,0,,10,ebd16_00000,0.106287
run_one_training_ebd16_00001,2022-12-11_18-48-20,True,,a401aff642154153927d0e9393478231,VGI-DEKSPTOP-TIJMEn,20,0.703275,172.29.86.26,8848,True,26.2266,0.720666,26.2266,1670780900,0,,20,ebd16_00001,0.392406
run_one_training_ebd16_00002,2022-12-11_18-48-06,True,,73bb386b72164fe19c42d324c5f18d81,VGI-DEKSPTOP-TIJMEn,10,0.704097,172.29.86.26,8850,True,11.7,0.587902,11.7,1670780886,0,,10,ebd16_00002,0.286545
run_one_training_ebd16_00003,2022-12-11_18-49-11,True,,80a1e39a9cfe49188834702c6bd198f0,VGI-DEKSPTOP-TIJMEn,300,0.0901589,172.29.86.26,8852,True,76.9199,0.144737,76.9199,1670780951,0,,300,ebd16_00003,0.281786
run_one_training_ebd16_00004,2022-12-11_18-48-01,True,,8aa798f4fd644df8844814aa55d172a3,VGI-DEKSPTOP-TIJMEn,10,0.707427,172.29.86.26,8854,True,7.61298,0.454904,7.61298,1670780881,0,,10,ebd16_00004,0.294076
run_one_training_ebd16_00005,2022-12-11_18-48-34,True,,0c257c1c3846488ab34f09cb280024e9,VGI-DEKSPTOP-TIJMEn,137,0.18866,172.29.86.26,8856,,40.9403,0.340459,40.9403,1670780914,0,,137,ebd16_00005,0.251345
run_one_training_ebd16_00006,2022-12-11_18-48-15,True,,0383406b15804fbba6506f65f49d5010,VGI-DEKSPTOP-TIJMEn,20,0.703387,172.29.86.26,8858,True,21.1524,0.545253,21.1524,1670780895,0,,20,ebd16_00006,0.316402
run_one_training_ebd16_00007,2022-12-11_18-48-03,True,,b99818eedb8d401a8a831116e5cc1e2d,VGI-DEKSPTOP-TIJMEn,10,0.703668,172.29.86.26,8861,True,9.60916,0.542042,9.60916,1670780883,0,,10,ebd16_00007,0.293161
run_one_training_ebd16_00008,2022-12-11_18-48-00,True,,3c3aa316ea604eff815d2dffd1e2e547,VGI-DEKSPTOP-TIJMEn,10,0.699129,172.29.86.26,8863,True,6.193,0.406137,6.193,1670780880,0,,10,ebd16_00008,0.275197
run_one_training_ebd16_00009,2022-12-11_18-48-10,True,,10e0adc3f0b04ed5bec3702df6d7a35d,VGI-DEKSPTOP-TIJMEn,10,0.703513,172.29.86.26,8865,True,16.5979,0.934655,16.5979,1670780890,0,,10,ebd16_00009,0.280766


  0%|          | 1/300 [00:02<13:22,  2.68s/it]


[2m[36m(run_one_training pid=8856)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.1, 'sgd_momentum': 0.8, 'scheduler_gamma': 0.9, 'model_embedding_size': 8, 'model_attention_heads': 2, 'model_layers': 1, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': 'eb261a8e2ac64ada93a81b7604bc2d24'}}}
[2m[36m(run_one_training pid=8848)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.1, 'sgd_momentum': 0.9, 'scheduler_gamma': 0.995, 'model_embedding_size': 64, 'model_attention_heads': 2, 'model_layers': 7, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': 'eb261a8e2ac64ada93a81b7604bc2d24'}}}
[2m[36m(run_one_training pid=8869)[0m creating model model_config={'batch_size': 8, 'learning_rate': 0.001, 'sgd_momentum': 0.9, 'scheduler_gamma': 0.5,

  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 


[2m[36m(run_one_training pid=8848)[0m Loading model...
[2m[36m(run_one_training pid=8869)[0m Loading model...
[2m[36m(run_one_training pid=8865)[0m Loading model...
[2m[36m(run_one_training pid=8858)[0m Loading model...
[2m[36m(run_one_training pid=8873)[0m Loading model...
[2m[36m(run_one_training pid=8850)[0m creating model model_config={'batch_size': 8, 'learning_rate': 0.05, 'sgd_momentum': 0.8, 'scheduler_gamma': 0.8, 'model_embedding_size': 64, 'model_attention_heads': 2, 'model_layers': 5, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': 'eb261a8e2ac64ada93a81b7604bc2d24'}}}
[2m[36m(run_one_training pid=8871)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.05, 'sgd_momentum': 0.5, 'scheduler_gamma': 1, 'model_embedding_size': 16, 'model_attention_heads': 2, 'model_layers': 7, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 

  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 
  1%|          | 2/300 [00:04<11:46,  2.37s/it]


[2m[36m(run_one_training pid=8850)[0m Loading model...
[2m[36m(run_one_training pid=8871)[0m Loading model...


  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 0/300 [00:00<?, ?it/s]m 


[2m[36m(run_one_training pid=8877)[0m Loading model...


  0%|          | 0/300 [00:00<?, ?it/s]m 
  0%|          | 1/300 [00:01<06:18,  1.27s/it]
  1%|          | 3/300 [00:06<09:01,  1.82s/it]
  0%|          | 1/300 [00:01<06:58,  1.40s/it]
  1%|          | 2/300 [00:01<03:59,  1.25it/s]
  0%|          | 1/300 [00:01<06:59,  1.40s/it]
  0%|          | 1/300 [00:01<08:40,  1.74s/it]
  1%|          | 2/300 [00:01<04:01,  1.23it/s]
  1%|          | 2/300 [00:01<04:19,  1.15it/s]
  1%|          | 3/300 [00:02<03:09,  1.57it/s]
  0%|          | 1/300 [00:02<10:01,  2.01s/it]
  1%|          | 2/300 [00:02<05:19,  1.07s/it]
  1%|          | 3/300 [00:02<03:14,  1.53it/s]
  1%|          | 3/300 [00:02<03:04,  1.61it/s]
  1%|▏         | 4/300 [00:02<02:35,  1.90it/s]
  0%|          | 1/300 [00:02<11:27,  2.30s/it]
  1%|          | 2/300 [00:02<05:21,  1.08s/it]
  1%|▏         | 4/300 [00:07<08:02,  1.63s/it]
  2%|▏         | 5/300 [00:02<02:19,  2.11it/s]
  1%|▏         | 4/300 [00:02<02:43,  1.82it/s]
  1%|▏         | 4/300 [00:02<02:48,  1.76it/s

== Status ==
Current time: 2022-12-11 18:47:58 (running for 00:00:12.25)
Memory usage on this node: 9.0/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                   |        

  0%|          | 1/300 [00:03<19:53,  3.99s/it]
  1%|          | 2/300 [00:03<08:19,  1.68s/it]
  2%|▏         | 7/300 [00:04<02:50,  1.72it/s]
  2%|▏         | 6/300 [00:04<03:23,  1.44it/s]
  2%|▏         | 6/300 [00:04<03:34,  1.37it/s]
  1%|▏         | 4/300 [00:04<04:25,  1.12it/s]
  0%|          | 1/300 [00:04<21:41,  4.35s/it]
  1%|          | 2/300 [00:04<10:04,  2.03s/it]
  3%|▎         | 8/300 [00:04<02:34,  1.89it/s]
  1%|          | 2/300 [00:04<09:28,  1.91s/it]
  2%|▏         | 7/300 [00:04<03:01,  1.61it/s]
  1%|          | 3/300 [00:04<06:36,  1.34s/it]
  2%|▏         | 7/300 [00:04<03:12,  1.52it/s]
  3%|▎         | 9/300 [00:05<02:16,  2.12it/s]
  2%|▏         | 5/300 [00:04<03:58,  1.24it/s]
  3%|▎         | 8/300 [00:05<02:40,  1.82it/s]
  1%|          | 3/300 [00:04<06:48,  1.38s/it]
  1%|          | 2/300 [00:05<11:58,  2.41s/it]
  2%|▏         | 6/300 [00:05<04:27,  1.10it/s]
  3%|▎         | 8/300 [00:05<02:45,  1.76it/s]
  1%|          | 2/300 [00:04<10:46,  2.

== Status ==
Current time: 2022-12-11 18:48:03 (running for 00:00:17.53)
Memory usage on this node: 8.2/23.5 GiB 
Using AsyncHyperBand: num_stopped=3
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: -0.699160703147451
Resources requested: 13.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (13 RUNNING, 3 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |       

  3%|▎         | 8/300 [00:14<07:35,  1.56s/it]
  2%|▏         | 5/300 [00:09<07:45,  1.58s/it]
  1%|          | 2/300 [00:09<21:31,  4.33s/it]
  2%|▏         | 7/300 [00:09<05:53,  1.21s/it]
  6%|▌         | 17/300 [00:09<02:30,  1.88it/s]
  2%|▏         | 5/300 [00:08<07:39,  1.56s/it]
  5%|▌         | 16/300 [00:09<03:03,  1.55it/s]
  2%|▏         | 7/300 [00:09<06:02,  1.24s/it]
  6%|▌         | 18/300 [00:10<02:15,  2.09it/s]
  2%|▏         | 5/300 [00:09<08:06,  1.65s/it]
  6%|▋         | 19/300 [00:10<01:57,  2.39it/s]
  6%|▌         | 17/300 [00:10<02:34,  1.83it/s]
  1%|▏         | 4/300 [00:10<10:52,  2.20s/it]
  3%|▎         | 8/300 [00:09<05:04,  1.04s/it]
  2%|▏         | 6/300 [00:10<08:22,  1.71s/it]
  3%|▎         | 9/300 [00:15<06:40,  1.38s/it]
  7%|▋         | 20/300 [00:10<01:48,  2.59it/s]
  6%|▌         | 18/300 [00:10<02:14,  2.09it/s]
  3%|▎         | 8/300 [00:10<05:06,  1.05s/it]
  2%|▏         | 6/300 [00:10<08:23,  1.71s/it]
  6%|▋         | 19/300 [00:10<02

== Status ==
Current time: 2022-12-11 18:48:08 (running for 00:00:22.69)
Memory usage on this node: 6.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: -0.6871176214578252 | Iter 10.000: -0.7036683292438587
Resources requested: 8.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (8 RUNNING, 8 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                        

  3%|▎         | 8/300 [00:14<06:53,  1.42s/it]
 10%|▉         | 29/300 [00:14<01:26,  3.15it/s]
  3%|▎         | 9/300 [00:14<05:51,  1.21s/it]
 10%|█         | 31/300 [00:14<01:49,  2.45it/s]
 10%|█         | 30/300 [00:14<01:20,  3.36it/s]
  4%|▎         | 11/300 [00:14<05:32,  1.15s/it]
 11%|█         | 32/300 [00:15<01:34,  2.83it/s]
  3%|▎         | 9/300 [00:14<06:03,  1.25s/it]
 11%|█         | 33/300 [00:15<01:24,  3.18it/s]
  4%|▍         | 12/300 [00:15<04:44,  1.01it/s]
 11%|█▏        | 34/300 [00:15<01:16,  3.49it/s]
  3%|▎         | 9/300 [00:15<06:05,  1.26s/it]
  3%|▎         | 10/300 [00:14<05:16,  1.09s/it]
 10%|█         | 31/300 [00:15<01:56,  2.32it/s]
 12%|█▏        | 35/300 [00:15<01:10,  3.78it/s]
  2%|▏         | 7/300 [00:15<09:20,  1.91s/it]
  3%|▎         | 10/300 [00:15<05:26,  1.13s/it]
 11%|█         | 32/300 [00:15<01:42,  2.63it/s]
  4%|▍         | 13/300 [00:15<04:09,  1.15it/s]
 11%|█         | 33/300 [00:16<01:31,  2.91it/s]
 12%|█▏        | 36/300 [

== Status ==
Current time: 2022-12-11 18:48:13 (running for 00:00:27.71)
Memory usage on this node: 6.5/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6693917065858841 | Iter 20.000: -0.6871176214578252 | Iter 10.000: -0.7033903785049915
Resources requested: 7.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (7 RUNNING, 9 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|         

 16%|█▌        | 48/300 [00:19<01:06,  3.77it/s]
 15%|█▌        | 45/300 [00:19<01:02,  4.09it/s]
  5%|▍         | 14/300 [00:18<04:33,  1.05it/s]
  2%|▏         | 6/300 [00:19<14:12,  2.90s/it]
 16%|█▋        | 49/300 [00:19<01:01,  4.05it/s]
  6%|▌         | 18/300 [00:19<03:32,  1.32it/s]
 17%|█▋        | 50/300 [00:20<01:00,  4.13it/s]
  5%|▍         | 14/300 [00:20<04:47,  1.01s/it]
 15%|█▌        | 46/300 [00:20<01:33,  2.71it/s]
  6%|▋         | 19/300 [00:20<03:16,  1.43it/s]
  5%|▌         | 15/300 [00:19<04:21,  1.09it/s]
 16%|█▌        | 47/300 [00:20<01:21,  3.09it/s]
 17%|█▋        | 51/300 [00:20<01:28,  2.80it/s]
 16%|█▌        | 48/300 [00:20<01:13,  3.43it/s]
 17%|█▋        | 52/300 [00:20<01:17,  3.20it/s]
 16%|█▋        | 49/300 [00:20<01:07,  3.72it/s]
 18%|█▊        | 53/300 [00:21<01:09,  3.56it/s]
  5%|▌         | 15/300 [00:21<04:27,  1.07it/s]
 17%|█▋        | 50/300 [00:20<01:02,  4.00it/s]
 18%|█▊        | 54/300 [00:21<01:01,  4.02it/s]
 18%|█▊        | 55/3

== Status ==
Current time: 2022-12-11 18:48:19 (running for 00:00:32.83)
Memory usage on this node: 6.2/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6693917065858841 | Iter 20.000: -0.694350461785992 | Iter 10.000: -0.7033903785049915
Resources requested: 6.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (6 RUNNING, 10 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|        

 22%|██▏       | 67/300 [00:24<01:06,  3.52it/s]
 21%|██        | 62/300 [00:24<01:13,  3.22it/s]
 23%|██▎       | 68/300 [00:24<00:59,  3.91it/s]
 21%|██        | 63/300 [00:24<01:06,  3.55it/s]
  3%|▎         | 9/300 [00:24<10:28,  2.16s/it]
 23%|██▎       | 69/300 [00:25<00:54,  4.20it/s]
  6%|▋         | 19/300 [00:25<04:17,  1.09it/s]
 21%|██▏       | 64/300 [00:25<01:02,  3.79it/s]
 23%|██▎       | 70/300 [00:25<00:49,  4.62it/s]
  5%|▍         | 14/300 [00:25<06:29,  1.36s/it]
 22%|██▏       | 65/300 [00:25<00:56,  4.18it/s]
 24%|██▎       | 71/300 [00:25<01:11,  3.19it/s]
 22%|██▏       | 66/300 [00:25<01:18,  3.00it/s]
 24%|██▍       | 72/300 [00:26<01:03,  3.61it/s]
 22%|██▏       | 67/300 [00:25<01:07,  3.47it/s]
 24%|██▍       | 73/300 [00:26<00:55,  4.09it/s]
 23%|██▎       | 68/300 [00:26<00:59,  3.88it/s]
 25%|██▍       | 74/300 [00:26<00:50,  4.46it/s]
  5%|▌         | 15/300 [00:26<06:07,  1.29s/it]
 25%|██▌       | 75/300 [00:26<00:46,  4.87it/s]
 23%|██▎       | 69/3

== Status ==
Current time: 2022-12-11 18:48:24 (running for 00:00:37.96)
Memory usage on this node: 5.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=12
Bracket: Iter 160.000: None | Iter 80.000: -0.5904004575374225 | Iter 40.000: -0.6693917065858841 | Iter 20.000: -0.699098575549821 | Iter 10.000: -0.7033794534703095
Resources requested: 4.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (4 RUNNING, 12 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iterat

 30%|██▉       | 89/300 [00:29<00:46,  4.49it/s]
 28%|██▊       | 83/300 [00:29<00:51,  4.25it/s]
 30%|███       | 90/300 [00:30<00:44,  4.70it/s]
 28%|██▊       | 84/300 [00:29<00:48,  4.47it/s]
 28%|██▊       | 85/300 [00:30<00:48,  4.46it/s]
  6%|▌         | 18/300 [00:30<06:02,  1.29s/it]
 30%|███       | 91/300 [00:30<01:07,  3.12it/s]
 31%|███       | 92/300 [00:30<00:56,  3.66it/s]
 29%|██▊       | 86/300 [00:30<01:10,  3.02it/s]
 31%|███       | 93/300 [00:31<00:49,  4.18it/s]
 29%|██▉       | 87/300 [00:30<01:00,  3.52it/s]
  4%|▍         | 12/300 [00:30<09:48,  2.04s/it]
 31%|███▏      | 94/300 [00:31<00:44,  4.64it/s]
 32%|███▏      | 95/300 [00:31<00:40,  5.08it/s]
 29%|██▉       | 88/300 [00:31<00:53,  3.95it/s]
 30%|██▉       | 89/300 [00:31<00:48,  4.39it/s]
 30%|███       | 90/300 [00:31<00:46,  4.52it/s]
  6%|▋         | 19/300 [00:31<05:46,  1.23s/it]
 32%|███▏      | 96/300 [00:31<01:02,  3.25it/s]
 32%|███▏      | 97/300 [00:32<00:54,  3.73it/s]
 33%|███▎      | 98/

== Status ==
Current time: 2022-12-11 18:48:29 (running for 00:00:43.02)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: None | Iter 80.000: -0.5904004575374225 | Iter 40.000: -0.6693917065858841 | Iter 20.000: -0.7011868275391558 | Iter 10.000: -0.7033794534703095
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 35%|███▍      | 104/300 [00:34<00:40,  4.82it/s]
 35%|███▌      | 105/300 [00:35<00:37,  5.16it/s]
 37%|███▋      | 111/300 [00:35<00:50,  3.74it/s]
 37%|███▋      | 112/300 [00:35<00:43,  4.36it/s]
 38%|███▊      | 113/300 [00:35<00:37,  4.95it/s]
 35%|███▌      | 106/300 [00:35<00:52,  3.69it/s]
  5%|▌         | 15/300 [00:35<07:58,  1.68s/it]
 38%|███▊      | 114/300 [00:35<00:34,  5.33it/s]
 38%|███▊      | 115/300 [00:35<00:32,  5.73it/s]
 36%|███▌      | 107/300 [00:35<00:46,  4.19it/s]
 36%|███▌      | 108/300 [00:35<00:41,  4.58it/s]
 36%|███▋      | 109/300 [00:35<00:38,  4.93it/s]
 39%|███▊      | 116/300 [00:36<00:49,  3.72it/s]
 37%|███▋      | 110/300 [00:36<00:36,  5.26it/s]
 39%|███▉      | 117/300 [00:36<00:43,  4.22it/s]
 39%|███▉      | 118/300 [00:36<00:37,  4.82it/s]
 40%|███▉      | 119/300 [00:36<00:34,  5.30it/s]
 37%|███▋      | 111/300 [00:36<00:51,  3.67it/s]
 40%|████      | 120/300 [00:36<00:31,  5.67it/s]
 37%|███▋      | 112/300 [00:36<00:44,  4.20it/s]
 

== Status ==
Current time: 2022-12-11 18:48:34 (running for 00:00:48.12)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: None | Iter 80.000: -0.5904004575374225 | Iter 40.000: -0.6693917065858841 | Iter 20.000: -0.7011868275391558 | Iter 10.000: -0.7033794534703095
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 42%|████▏     | 126/300 [00:39<00:49,  3.52it/s]
 42%|████▏     | 127/300 [00:40<00:43,  4.01it/s]


[2m[36m(run_one_training pid=8856)[0m Early stopping due to no improvement.


 45%|████▌     | 136/300 [00:40<00:48,  3.35it/s]
 43%|████▎     | 128/300 [00:40<00:39,  4.41it/s]
 43%|████▎     | 129/300 [00:40<00:34,  4.91it/s]
 43%|████▎     | 130/300 [00:40<00:32,  5.29it/s]
  6%|▌         | 18/300 [00:40<07:51,  1.67s/it]
 44%|████▎     | 131/300 [00:41<00:44,  3.79it/s]
 44%|████▍     | 132/300 [00:41<00:38,  4.36it/s]
 44%|████▍     | 133/300 [00:41<00:34,  4.88it/s]
 45%|████▍     | 134/300 [00:41<00:31,  5.33it/s]
 45%|████▌     | 135/300 [00:41<00:29,  5.60it/s]
 45%|████▌     | 136/300 [00:42<00:42,  3.85it/s]
  6%|▋         | 19/300 [00:42<07:20,  1.57s/it]
 46%|████▌     | 137/300 [00:42<00:36,  4.42it/s]
 46%|████▌     | 138/300 [00:42<00:33,  4.90it/s]
 46%|████▋     | 139/300 [00:42<00:30,  5.28it/s]
 47%|████▋     | 140/300 [00:42<00:29,  5.34it/s]
 47%|████▋     | 141/300 [00:43<00:42,  3.77it/s]
 47%|████▋     | 142/300 [00:43<00:37,  4.21it/s]
 48%|████▊     | 143/300 [00:43<00:33,  4.75it/s]
  7%|▋         | 20/300 [00:43<07:02,  1.51s/it]
 48

== Status ==
Current time: 2022-12-11 18:48:39 (running for 00:00:53.24)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: None | Iter 80.000: -0.5904004575374225 | Iter 40.000: -0.6693917065858841 | Iter 20.000: -0.699098575549821 | Iter 10.000: -0.7033794534703095
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iterat

 50%|█████     | 151/300 [00:45<00:38,  3.88it/s]
 51%|█████     | 152/300 [00:45<00:33,  4.45it/s]
 51%|█████     | 153/300 [00:45<00:30,  4.77it/s]
 51%|█████▏    | 154/300 [00:45<00:28,  5.15it/s]
  7%|▋         | 21/300 [00:45<08:09,  1.75s/it]
 52%|█████▏    | 155/300 [00:45<00:26,  5.56it/s]
 52%|█████▏    | 156/300 [00:46<00:36,  3.91it/s]
 52%|█████▏    | 157/300 [00:46<00:32,  4.46it/s]
 53%|█████▎    | 158/300 [00:46<00:28,  4.96it/s]
 53%|█████▎    | 159/300 [00:46<00:26,  5.33it/s]
 53%|█████▎    | 160/300 [00:46<00:24,  5.68it/s]
  7%|▋         | 22/300 [00:47<07:32,  1.63s/it]
 54%|█████▎    | 161/300 [00:47<00:35,  3.89it/s]
 54%|█████▍    | 162/300 [00:47<00:30,  4.46it/s]
 54%|█████▍    | 163/300 [00:47<00:27,  4.97it/s]
 55%|█████▍    | 164/300 [00:47<00:25,  5.36it/s]
 55%|█████▌    | 165/300 [00:48<00:23,  5.72it/s]
 55%|█████▌    | 166/300 [00:48<00:34,  3.87it/s]
  8%|▊         | 23/300 [00:48<07:06,  1.54s/it]
 56%|█████▌    | 167/300 [00:48<00:30,  4.40it/s]
 56

== Status ==
Current time: 2022-12-11 18:48:44 (running for 00:00:58.39)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.19031332541878024 | Iter 80.000: -0.5904004575374225 | Iter 40.000: -0.6693917065858841 | Iter 20.000: -0.699098575549821 | Iter 10.000: -0.7033794534703095
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 58%|█████▊    | 175/300 [00:50<00:22,  5.48it/s]
 59%|█████▊    | 176/300 [00:50<00:33,  3.73it/s]
 59%|█████▉    | 177/300 [00:50<00:28,  4.25it/s]
 59%|█████▉    | 178/300 [00:50<00:25,  4.79it/s]
 60%|█████▉    | 179/300 [00:51<00:23,  5.17it/s]
 60%|██████    | 180/300 [00:51<00:21,  5.52it/s]
  8%|▊         | 25/300 [00:51<06:39,  1.45s/it]
 60%|██████    | 181/300 [00:51<00:31,  3.81it/s]
 61%|██████    | 182/300 [00:51<00:27,  4.36it/s]
 61%|██████    | 183/300 [00:52<00:24,  4.69it/s]
 61%|██████▏   | 184/300 [00:52<00:22,  5.07it/s]
 62%|██████▏   | 185/300 [00:52<00:21,  5.44it/s]
 62%|██████▏   | 186/300 [00:52<00:29,  3.82it/s]
 62%|██████▏   | 187/300 [00:52<00:25,  4.40it/s]
 63%|██████▎   | 188/300 [00:53<00:22,  4.92it/s]
 63%|██████▎   | 189/300 [00:53<00:21,  5.28it/s]
 63%|██████▎   | 190/300 [00:53<00:19,  5.68it/s]
  9%|▊         | 26/300 [00:53<07:49,  1.71s/it]
 64%|██████▎   | 191/300 [00:53<00:27,  3.93it/s]
 64%|██████▍   | 192/300 [00:53<00:23,  4.50it/s]
 6

== Status ==
Current time: 2022-12-11 18:48:49 (running for 00:01:03.50)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.19031332541878024 | Iter 80.000: -0.5904004575374225 | Iter 40.000: -0.6693917065858841 | Iter 20.000: -0.699098575549821 | Iter 10.000: -0.7033794534703095
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 66%|██████▌   | 198/300 [00:55<00:22,  4.58it/s]
 66%|██████▋   | 199/300 [00:55<00:20,  4.94it/s]
 67%|██████▋   | 200/300 [00:55<00:19,  5.23it/s]
 67%|██████▋   | 201/300 [00:56<00:26,  3.67it/s]
 67%|██████▋   | 202/300 [00:56<00:23,  4.23it/s]
 68%|██████▊   | 203/300 [00:56<00:20,  4.72it/s]
  9%|▉         | 28/300 [00:56<07:02,  1.55s/it]
 68%|██████▊   | 204/300 [00:56<00:19,  5.05it/s]
 68%|██████▊   | 205/300 [00:56<00:17,  5.47it/s]
 69%|██████▊   | 206/300 [00:57<00:24,  3.79it/s]
 69%|██████▉   | 207/300 [00:57<00:21,  4.31it/s]
 69%|██████▉   | 208/300 [00:57<00:19,  4.84it/s]
 70%|██████▉   | 209/300 [00:57<00:17,  5.17it/s]
 70%|███████   | 210/300 [00:57<00:16,  5.51it/s]
 10%|▉         | 29/300 [00:57<06:46,  1.50s/it]
 70%|███████   | 211/300 [00:58<00:23,  3.86it/s]
 71%|███████   | 212/300 [00:58<00:20,  4.36it/s]
 71%|███████   | 213/300 [00:58<00:17,  4.88it/s]
 71%|███████▏  | 214/300 [00:58<00:16,  5.29it/s]
 72%|███████▏  | 215/300 [00:58<00:15,  5.64it/s]
 1

== Status ==
Current time: 2022-12-11 18:48:54 (running for 00:01:08.72)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.19031332541878024 | Iter 80.000: -0.5904004575374225 | Iter 40.000: -0.6693917065858841 | Iter 20.000: -0.699098575549821 | Iter 10.000: -0.7033794534703095
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 74%|███████▍  | 222/300 [01:00<00:18,  4.29it/s]
 74%|███████▍  | 223/300 [01:00<00:16,  4.81it/s]
 75%|███████▍  | 224/300 [01:00<00:14,  5.11it/s]
 75%|███████▌  | 225/300 [01:00<00:14,  5.36it/s]
 75%|███████▌  | 226/300 [01:01<00:20,  3.63it/s]
 10%|█         | 31/300 [01:01<07:49,  1.75s/it]
 76%|███████▌  | 227/300 [01:01<00:17,  4.22it/s]
 76%|███████▌  | 228/300 [01:01<00:15,  4.74it/s]
 76%|███████▋  | 229/300 [01:01<00:13,  5.19it/s]
 77%|███████▋  | 230/300 [01:02<00:12,  5.58it/s]
 77%|███████▋  | 231/300 [01:02<00:17,  3.85it/s]
 77%|███████▋  | 232/300 [01:02<00:15,  4.39it/s]
 78%|███████▊  | 233/300 [01:02<00:13,  4.89it/s]
 11%|█         | 32/300 [01:02<07:16,  1.63s/it]
 78%|███████▊  | 234/300 [01:02<00:12,  5.35it/s]
 78%|███████▊  | 235/300 [01:03<00:11,  5.59it/s]
 79%|███████▊  | 236/300 [01:03<00:16,  3.90it/s]
 79%|███████▉  | 237/300 [01:03<00:14,  4.36it/s]
 79%|███████▉  | 238/300 [01:03<00:12,  4.91it/s]
 80%|███████▉  | 239/300 [01:04<00:11,  5.38it/s]
 8

== Status ==
Current time: 2022-12-11 18:49:00 (running for 00:01:13.81)
Memory usage on this node: 5.0/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.19031332541878024 | Iter 80.000: -0.5904004575374225 | Iter 40.000: -0.6693917065858841 | Iter 20.000: -0.699098575549821 | Iter 10.000: -0.7033794534703095
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 11%|█▏        | 34/300 [01:05<06:36,  1.49s/it]
 82%|████████▏ | 246/300 [01:05<00:14,  3.84it/s]
 82%|████████▏ | 247/300 [01:05<00:12,  4.36it/s]
 83%|████████▎ | 248/300 [01:05<00:10,  4.81it/s]
 83%|████████▎ | 249/300 [01:06<00:09,  5.25it/s]
 83%|████████▎ | 250/300 [01:06<00:08,  5.64it/s]
 84%|████████▎ | 251/300 [01:06<00:12,  3.92it/s]
 84%|████████▍ | 252/300 [01:06<00:10,  4.51it/s]
 12%|█▏        | 35/300 [01:06<06:24,  1.45s/it]
 84%|████████▍ | 253/300 [01:07<00:09,  4.95it/s]
 85%|████████▍ | 254/300 [01:07<00:08,  5.36it/s]
 85%|████████▌ | 255/300 [01:07<00:07,  5.63it/s]
 85%|████████▌ | 256/300 [01:07<00:11,  3.95it/s]
 86%|████████▌ | 257/300 [01:07<00:09,  4.43it/s]
 86%|████████▌ | 258/300 [01:08<00:08,  4.80it/s]
 86%|████████▋ | 259/300 [01:08<00:07,  5.22it/s]
 87%|████████▋ | 260/300 [01:08<00:07,  5.51it/s]
 87%|████████▋ | 261/300 [01:08<00:09,  3.90it/s]
 87%|████████▋ | 262/300 [01:08<00:08,  4.50it/s]
 88%|████████▊ | 263/300 [01:09<00:07,  5.05it/s]
 8

== Status ==
Current time: 2022-12-11 18:49:05 (running for 00:01:18.82)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.19031332541878024 | Iter 80.000: -0.5904004575374225 | Iter 40.000: -0.6693917065858841 | Iter 20.000: -0.699098575549821 | Iter 10.000: -0.7033794534703095
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 12%|█▏        | 37/300 [01:10<07:01,  1.60s/it]
 90%|█████████ | 271/300 [01:10<00:07,  3.89it/s]
 91%|█████████ | 272/300 [01:11<00:06,  4.40it/s]
 91%|█████████ | 273/300 [01:11<00:05,  4.86it/s]
 91%|█████████▏| 274/300 [01:11<00:04,  5.22it/s]
 92%|█████████▏| 275/300 [01:11<00:04,  5.63it/s]
 92%|█████████▏| 276/300 [01:11<00:06,  3.83it/s]
 13%|█▎        | 38/300 [01:11<06:42,  1.54s/it]
 92%|█████████▏| 277/300 [01:12<00:05,  4.42it/s]
 93%|█████████▎| 278/300 [01:12<00:04,  4.93it/s]
 93%|█████████▎| 279/300 [01:12<00:03,  5.33it/s]
 93%|█████████▎| 280/300 [01:12<00:03,  5.69it/s]
 94%|█████████▎| 281/300 [01:12<00:04,  3.94it/s]
 94%|█████████▍| 282/300 [01:13<00:04,  4.49it/s]
 94%|█████████▍| 283/300 [01:13<00:03,  4.97it/s]
 13%|█▎        | 39/300 [01:13<06:25,  1.48s/it]
 95%|█████████▍| 284/300 [01:13<00:02,  5.41it/s]
 95%|█████████▌| 285/300 [01:13<00:02,  5.70it/s]
 95%|█████████▌| 286/300 [01:14<00:03,  3.89it/s]
 96%|█████████▌| 287/300 [01:14<00:02,  4.36it/s]
 96

== Status ==
Current time: 2022-12-11 18:49:10 (running for 00:01:23.87)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.19031332541878024 | Iter 80.000: -0.5904004575374225 | Iter 40.000: -0.6905262048045794 | Iter 20.000: -0.699098575549821 | Iter 10.000: -0.7033794534703095
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      loss |

 99%|█████████▊| 296/300 [01:16<00:00,  4.12it/s]
 99%|█████████▉| 297/300 [01:16<00:00,  4.73it/s]
 99%|█████████▉| 298/300 [01:16<00:00,  5.22it/s]
100%|█████████▉| 299/300 [01:16<00:00,  5.69it/s]
2022-12-11 18:49:11,267	INFO tune.py:777 -- Total run time: 85.01 seconds (84.87 seconds for the tuning loop).


== Status ==
Current time: 2022-12-11 18:49:11 (running for 00:01:24.89)
Memory usage on this node: 4.5/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.19031332541878024 | Iter 80.000: -0.5904004575374225 | Iter 40.000: -0.6905262048045794 | Iter 20.000: -0.699098575549821 | Iter 10.000: -0.7033794534703095
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-47-46
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+-------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc               |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      loss |   training_i

  0%|          | 0/300 [00:00<?, ?it/s]0m 


Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_1e87f_00000,2022-12-11_18-51-02,True,,8ebf3beb5870476b9304c1f7083c7f18,"0_batch_size=8,learning_rate=0.0010,model_attention_heads=2,model_embedding_size=64,model_layers=1,scheduler_gamma=0.9950,sgd_momentum=0.8000",VGI-DEKSPTOP-TIJMEn,267,0.190155,172.29.86.26,11236,107.845,0.411972,107.845,1670781062,0,,267,1e87f_00000,0.107595
run_one_training_1e87f_00001,2022-12-11_18-50-04,True,,5681a596c1ff4355b325f6efeff92c09,,VGI-DEKSPTOP-TIJMEn,80,0.699906,172.29.86.26,11327,44.5486,0.330209,44.5486,1670781004,0,,80,1e87f_00001,0.371134
run_one_training_1e87f_00002,2022-12-11_18-50-00,True,,77ff6255fc3a432c9906a05a282356bc,,VGI-DEKSPTOP-TIJMEn,20,0.703277,172.29.86.26,11329,41.1743,1.50884,41.1743,1670781000,0,,20,1e87f_00002,0.269669
run_one_training_1e87f_00003,2022-12-11_18-49-23,True,,1cd7a841aa6848039f878544fc250305,,VGI-DEKSPTOP-TIJMEn,10,0.7136,172.29.86.26,11331,4.48325,0.223213,4.48325,1670780963,0,,10,1e87f_00003,0.368193
run_one_training_1e87f_00004,2022-12-11_18-51-32,True,,5319548a03174517bba5a8b938aa5008,,VGI-DEKSPTOP-TIJMEn,80,0.691313,172.29.86.26,11333,132.961,0.865933,132.961,1670781092,0,,80,1e87f_00004,0.318244
run_one_training_1e87f_00005,2022-12-11_18-49-31,True,,63262078407a46c8858fb94d7e239a1e,,VGI-DEKSPTOP-TIJMEn,10,0.705581,172.29.86.26,11335,12.5041,0.700099,12.5041,1670780971,0,,10,1e87f_00005,0.340292
run_one_training_1e87f_00006,2022-12-11_18-52-48,True,,c565fbc59cfb4cef8bb36bb7c3da5ae9,"6_batch_size=2,learning_rate=0.0010,model_attention_heads=2,model_embedding_size=128,model_layers=5,scheduler_gamma=0.8000,sgd_momentum=0.8000",VGI-DEKSPTOP-TIJMEn,137,0.189483,172.29.86.26,11337,209.28,0.735138,209.28,1670781168,0,,137,1e87f_00006,0.463944
run_one_training_1e87f_00007,2022-12-11_18-49-45,True,,e59830a1359f45059116253b30fdb0a8,,VGI-DEKSPTOP-TIJMEn,20,0.703309,172.29.86.26,11339,25.4826,0.790052,25.4826,1670780985,0,,20,1e87f_00007,0.26782
run_one_training_1e87f_00008,2022-12-11_18-50-48,True,,8e9f114152714502ad9b69e51dee9b55,"8_batch_size=4,learning_rate=0.0010,model_attention_heads=3,model_embedding_size=64,model_layers=1,scheduler_gamma=0.9950,sgd_momentum=0.9000",VGI-DEKSPTOP-TIJMEn,137,0.189142,172.29.86.26,11341,88.4962,0.524231,88.4962,1670781048,0,,137,1e87f_00008,0.261312
run_one_training_1e87f_00009,2022-12-11_18-51-43,True,,463a3ca0459d4fcaabbcb5f835608cf0,"9_batch_size=2,learning_rate=0.0100,model_attention_heads=2,model_embedding_size=16,model_layers=5,scheduler_gamma=0.9000,sgd_momentum=0.8000",VGI-DEKSPTOP-TIJMEn,142,0.189475,172.29.86.26,11343,144.473,0.553226,144.473,1670781103,0,,142,1e87f_00009,0.273392


  0%|          | 1/300 [00:01<06:11,  1.24s/it]
  1%|          | 2/300 [00:01<03:47,  1.31it/s]
  1%|          | 3/300 [00:01<02:33,  1.93it/s]
  1%|▏         | 4/300 [00:02<01:57,  2.51it/s]


[2m[36m(run_one_training pid=11337)[0m creating model model_config={'batch_size': 2, 'learning_rate': 0.001, 'sgd_momentum': 0.8, 'scheduler_gamma': 0.8, 'model_embedding_size': 128, 'model_attention_heads': 2, 'model_layers': 5, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '90dd4769fc65450d9b80d93ea565dce6'}}}
[2m[36m(run_one_training pid=11331)[0m creating model model_config={'batch_size': 8, 'learning_rate': 0.001, 'sgd_momentum': 0.8, 'scheduler_gamma': 0.995, 'model_embedding_size': 8, 'model_attention_heads': 3, 'model_layers': 1, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '90dd4769fc65450d9b80d93ea565dce6'}}}
[2m[36m(run_one_training pid=11349)[0m creating model model_config={'batch_size': 2, 'learning_rate': 0.001, 'sgd_momentum': 0.8, 'scheduler_gamm

  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 


[2m[36m(run_one_training pid=11333)[0m Loading model...
[2m[36m(run_one_training pid=11354)[0m Loading model...
[2m[36m(run_one_training pid=11356)[0m Loading model...
[2m[36m(run_one_training pid=11335)[0m Loading model...
== Status ==
Current time: 2022-12-11 18:49:17 (running for 00:00:05.99)
Memory usage on this node: 7.5/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                |   ba

  0%|          | 0/300 [00:00<?, ?it/s]0m 


[2m[36m(run_one_training pid=11329)[0m Loading model...
[2m[36m(run_one_training pid=11343)[0m Loading model...
[2m[36m(run_one_training pid=11341)[0m creating model model_config={'batch_size': 4, 'learning_rate': 0.001, 'sgd_momentum': 0.9, 'scheduler_gamma': 0.995, 'model_embedding_size': 64, 'model_attention_heads': 3, 'model_layers': 1, 'mlflow': {'experiment_name': 'metabolite_gnn_sweep_full', 'tracking_uri': 'http://localhost:5000', 'save_artifacts': True, 'tags': {'mlflow.parentRunId': '90dd4769fc65450d9b80d93ea565dce6'}}}


  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  2%|▏         | 5/300 [00:04<05:55,  1.21s/it]
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 


[2m[36m(run_one_training pid=11346)[0m Loading model...
[2m[36m(run_one_training pid=11327)[0m Loading model...
[2m[36m(run_one_training pid=11352)[0m Loading model...


  0%|          | 0/300 [00:00<?, ?it/s]0m 


[2m[36m(run_one_training pid=11339)[0m Loading model...


  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 0/300 [00:00<?, ?it/s]0m 


[2m[36m(run_one_training pid=11341)[0m Loading model...


  0%|          | 0/300 [00:00<?, ?it/s]0m 
  0%|          | 1/300 [00:01<06:11,  1.24s/it]
  1%|          | 2/300 [00:01<03:19,  1.49it/s]
  2%|▏         | 6/300 [00:06<06:09,  1.26s/it]
  1%|          | 3/300 [00:01<02:25,  2.04it/s]
  0%|          | 1/300 [00:01<08:45,  1.76s/it]
  2%|▏         | 7/300 [00:06<04:47,  1.02it/s]
  1%|▏         | 4/300 [00:02<01:53,  2.61it/s]
  0%|          | 1/300 [00:01<08:40,  1.74s/it]
  2%|▏         | 5/300 [00:02<01:38,  3.00it/s]
  3%|▎         | 8/300 [00:06<03:48,  1.28it/s]
  1%|          | 2/300 [00:02<05:34,  1.12s/it]
  0%|          | 1/300 [00:01<09:05,  1.82s/it]
  3%|▎         | 9/300 [00:07<03:06,  1.56it/s]
  1%|          | 2/300 [00:02<04:56,  1.01it/s]
  0%|          | 1/300 [00:02<12:20,  2.48s/it]


== Status ==
Current time: 2022-12-11 18:49:22 (running for 00:00:11.00)
Memory usage on this node: 8.9/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                    |     

  3%|▎         | 10/300 [00:07<02:39,  1.82it/s]
  1%|          | 3/300 [00:02<04:17,  1.15it/s]
  1%|          | 2/300 [00:02<05:38,  1.14s/it]
  0%|          | 1/300 [00:02<14:55,  2.99s/it]
  1%|          | 3/300 [00:02<03:48,  1.30it/s]
  2%|▏         | 6/300 [00:03<02:50,  1.72it/s]
  0%|          | 1/300 [00:03<15:43,  3.16s/it]
  0%|          | 1/300 [00:02<14:41,  2.95s/it]
  0%|          | 1/300 [00:03<15:47,  3.17s/it]
  2%|▏         | 7/300 [00:03<02:15,  2.17it/s]
  1%|▏         | 4/300 [00:03<03:58,  1.24it/s]
  0%|          | 1/300 [00:03<17:38,  3.54s/it]
  1%|▏         | 4/300 [00:03<03:19,  1.48it/s]
  3%|▎         | 8/300 [00:03<01:52,  2.59it/s]
  1%|          | 3/300 [00:03<04:25,  1.12it/s]
  0%|          | 1/300 [00:03<15:47,  3.17s/it]
  1%|          | 2/300 [00:03<08:04,  1.63s/it]
  3%|▎         | 9/300 [00:03<01:37,  2.98it/s]
  4%|▎         | 11/300 [00:08<03:28,  1.38it/s]
  1%|          | 2/300 [00:03<08:14,  1.66s/it]
  2%|▏         | 5/300 [00:03<02:55,  

== Status ==
Current time: 2022-12-11 18:49:27 (running for 00:00:16.12)
Memory usage on this node: 8.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: -0.7034777129689852
Resources requested: 15.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (15 RUNNING, 1 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |    

  3%|▎         | 9/300 [00:07<03:52,  1.25it/s]
  7%|▋         | 20/300 [00:12<01:55,  2.42it/s]
  3%|▎         | 9/300 [00:07<03:27,  1.40it/s]
  1%|▏         | 4/300 [00:08<08:58,  1.82s/it]
  3%|▎         | 10/300 [00:08<03:35,  1.34it/s]
  1%|          | 3/300 [00:08<12:43,  2.57s/it]
  2%|▏         | 6/300 [00:08<07:19,  1.50s/it]
  3%|▎         | 10/300 [00:08<03:14,  1.49it/s]
  4%|▎         | 11/300 [00:08<04:03,  1.18it/s]
  2%|▏         | 5/300 [00:08<07:51,  1.60s/it]
  1%|          | 3/300 [00:08<13:31,  2.73s/it]
  7%|▋         | 21/300 [00:13<02:53,  1.61it/s]
  2%|▏         | 6/300 [00:09<08:02,  1.64s/it]
  7%|▋         | 22/300 [00:14<02:27,  1.88it/s]
  2%|▏         | 7/300 [00:08<05:51,  1.20s/it]
  1%|          | 3/300 [00:09<14:43,  2.97s/it]
  4%|▍         | 12/300 [00:09<03:28,  1.38it/s]
  2%|▏         | 6/300 [00:09<07:56,  1.62s/it]
  8%|▊         | 23/300 [00:14<02:09,  2.13it/s]
  2%|▏         | 6/300 [00:09<07:58,  1.63s/it]
  1%|          | 3/300 [00:09<14

== Status ==
Current time: 2022-12-11 18:49:32 (running for 00:00:21.32)
Memory usage on this node: 8.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=3
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: -0.7020646793146929 | Iter 10.000: -0.7034777129689852
Resources requested: 13.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (13 RUNNING, 3 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                    

  6%|▌         | 18/300 [00:12<02:52,  1.63it/s]
  2%|▏         | 6/300 [00:13<11:06,  2.27s/it]
  2%|▏         | 7/300 [00:13<08:43,  1.79s/it]
  2%|▏         | 5/300 [00:13<11:32,  2.35s/it]
  3%|▎         | 9/300 [00:12<05:49,  1.20s/it]
  6%|▋         | 19/300 [00:13<02:38,  1.77it/s]
  5%|▌         | 16/300 [00:13<04:04,  1.16it/s]
  3%|▎         | 10/300 [00:13<05:36,  1.16s/it]
 10%|█         | 31/300 [00:18<02:42,  1.66it/s]
  5%|▌         | 16/300 [00:13<04:14,  1.11it/s]
  2%|▏         | 5/300 [00:14<12:29,  2.54s/it]
  7%|▋         | 20/300 [00:13<02:25,  1.92it/s]
 11%|█         | 32/300 [00:18<02:18,  1.94it/s]
  6%|▌         | 17/300 [00:13<03:32,  1.33it/s]
  3%|▎         | 10/300 [00:13<05:18,  1.10s/it]
  2%|▏         | 5/300 [00:14<12:35,  2.56s/it]
  6%|▌         | 17/300 [00:14<03:44,  1.26it/s]
  2%|▏         | 7/300 [00:14<09:39,  1.98s/it]
  3%|▎         | 8/300 [00:14<07:51,  1.61s/it]
 11%|█         | 33/300 [00:19<01:59,  2.23it/s]
  6%|▌         | 18/300 [00:

== Status ==
Current time: 2022-12-11 18:49:37 (running for 00:00:26.51)
Memory usage on this node: 7.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=4
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.7007424843808016 | Iter 20.000: -0.698569699190557 | Iter 10.000: -0.703309333572785
Resources requested: 12.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (12 RUNNING, 4 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|       

  9%|▉         | 27/300 [00:17<02:42,  1.68it/s]
  8%|▊         | 23/300 [00:18<03:03,  1.51it/s]
 14%|█▍        | 42/300 [00:23<01:58,  2.19it/s]
  8%|▊         | 24/300 [00:17<02:38,  1.74it/s]
  4%|▍         | 13/300 [00:17<05:33,  1.16s/it]
  5%|▍         | 14/300 [00:18<05:04,  1.07s/it]
 14%|█▍        | 43/300 [00:23<01:44,  2.46it/s]
  2%|▏         | 7/300 [00:18<12:10,  2.49s/it]
  9%|▉         | 28/300 [00:18<02:28,  1.84it/s]
  8%|▊         | 25/300 [00:18<02:29,  1.84it/s]
  8%|▊         | 24/300 [00:18<02:53,  1.59it/s]
 15%|█▍        | 44/300 [00:23<01:32,  2.77it/s]
 10%|▉         | 29/300 [00:18<02:14,  2.02it/s]
  4%|▎         | 11/300 [00:18<08:04,  1.68s/it]
  3%|▎         | 8/300 [00:18<10:01,  2.06s/it]
 15%|█▌        | 45/300 [00:23<01:24,  3.01it/s]
  5%|▍         | 14/300 [00:18<05:03,  1.06s/it]
  5%|▌         | 15/300 [00:18<04:48,  1.01s/it]
  8%|▊         | 25/300 [00:19<02:42,  1.69it/s]
 10%|█         | 30/300 [00:19<02:05,  2.14it/s]
  2%|▏         | 7/300

== Status ==
Current time: 2022-12-11 18:49:42 (running for 00:00:31.62)
Memory usage on this node: 7.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=5
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.7007424843808016 | Iter 20.000: -0.698569699190557 | Iter 10.000: -0.703309333572785
Resources requested: 11.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (11 RUNNING, 5 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|       

 11%|█         | 32/300 [00:22<02:53,  1.55it/s]
  5%|▌         | 15/300 [00:23<05:52,  1.24s/it]
 12%|█▏        | 37/300 [00:23<02:34,  1.70it/s]
 10%|█         | 31/300 [00:23<03:23,  1.32it/s]
  3%|▎         | 9/300 [00:23<11:12,  2.31s/it]
  6%|▋         | 19/300 [00:23<04:46,  1.02s/it]
 11%|█         | 33/300 [00:23<02:41,  1.65it/s]
  3%|▎         | 10/300 [00:23<09:37,  1.99s/it]
  6%|▌         | 18/300 [00:23<05:13,  1.11s/it]
  3%|▎         | 9/300 [00:24<11:12,  2.31s/it]
 13%|█▎        | 38/300 [00:23<02:19,  1.88it/s]
 19%|█▊        | 56/300 [00:28<02:08,  1.90it/s]
 11%|█         | 32/300 [00:24<03:02,  1.47it/s]
 11%|█▏        | 34/300 [00:23<02:27,  1.81it/s]
 13%|█▎        | 39/300 [00:24<02:05,  2.08it/s]
 19%|█▉        | 57/300 [00:29<01:48,  2.23it/s]
  7%|▋         | 20/300 [00:24<04:30,  1.04it/s]
 19%|█▉        | 58/300 [00:29<01:34,  2.55it/s]
  6%|▋         | 19/300 [00:24<04:42,  1.01s/it]
 11%|█         | 33/300 [00:24<02:50,  1.57it/s]
 13%|█▎        | 40/30

== Status ==
Current time: 2022-12-11 18:49:48 (running for 00:00:36.74)
Memory usage on this node: 7.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=6
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: -0.6999455833186706 | Iter 20.000: -0.698569699190557 | Iter 10.000: -0.7021237503116329
Resources requested: 10.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (10 RUNNING, 6 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|      

  4%|▍         | 13/300 [00:28<08:54,  1.86s/it]
 23%|██▎       | 68/300 [00:33<01:24,  2.75it/s]
 13%|█▎        | 40/300 [00:28<02:18,  1.88it/s]
 23%|██▎       | 69/300 [00:33<01:15,  3.06it/s]
 16%|█▌        | 47/300 [00:28<02:15,  1.87it/s]
  8%|▊         | 24/300 [00:28<04:16,  1.08it/s]
 14%|█▎        | 41/300 [00:28<02:54,  1.49it/s]
  6%|▋         | 19/300 [00:28<05:42,  1.22s/it]
  4%|▍         | 12/300 [00:28<10:21,  2.16s/it]
 23%|██▎       | 70/300 [00:33<01:09,  3.29it/s]
 16%|█▌        | 48/300 [00:28<02:02,  2.06it/s]
  4%|▎         | 11/300 [00:29<12:21,  2.57s/it]
 14%|█▍        | 42/300 [00:28<02:36,  1.65it/s]
  4%|▎         | 11/300 [00:29<12:16,  2.55s/it]
 16%|█▋        | 49/300 [00:29<01:54,  2.19it/s]
  8%|▊         | 25/300 [00:29<04:09,  1.10it/s]
 14%|█▍        | 43/300 [00:29<02:25,  1.77it/s]
 14%|█▎        | 41/300 [00:29<03:04,  1.40it/s]
 17%|█▋        | 50/300 [00:29<01:47,  2.32it/s]
  7%|▋         | 20/300 [00:29<05:28,  1.17s/it]
 24%|██▎       | 71/

== Status ==
Current time: 2022-12-11 18:49:53 (running for 00:00:41.83)
Memory usage on this node: 7.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=6
Bracket: Iter 160.000: None | Iter 80.000: -0.6990031066040198 | Iter 40.000: -0.6962946110094588 | Iter 20.000: -0.6968860340615114 | Iter 10.000: -0.7021237503116329
Resources requested: 10.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (10 RUNNING, 6 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

 10%|▉         | 29/300 [00:33<04:05,  1.10it/s]
 27%|██▋       | 81/300 [00:38<01:47,  2.05it/s]
 16%|█▌        | 48/300 [00:33<02:25,  1.73it/s]
 19%|█▉        | 57/300 [00:33<02:10,  1.87it/s]
  5%|▌         | 15/300 [00:33<08:35,  1.81s/it]
 27%|██▋       | 82/300 [00:38<01:30,  2.40it/s]
  8%|▊         | 23/300 [00:34<05:40,  1.23s/it]
 28%|██▊       | 83/300 [00:38<01:19,  2.73it/s]
 19%|█▉        | 58/300 [00:33<01:57,  2.05it/s]
 16%|█▋        | 49/300 [00:34<02:17,  1.82it/s]
 10%|█         | 30/300 [00:34<03:55,  1.15it/s]
 17%|█▋        | 51/300 [00:33<02:46,  1.50it/s]
 28%|██▊       | 84/300 [00:39<01:12,  2.98it/s]
 20%|█▉        | 59/300 [00:34<01:51,  2.16it/s]
  5%|▌         | 16/300 [00:34<09:56,  2.10s/it]
 28%|██▊       | 85/300 [00:39<01:07,  3.21it/s]
  5%|▍         | 14/300 [00:34<10:01,  2.10s/it]
 17%|█▋        | 50/300 [00:34<02:16,  1.83it/s]
 17%|█▋        | 52/300 [00:34<02:31,  1.64it/s]
 20%|██        | 60/300 [00:34<01:44,  2.30it/s]
  5%|▍         | 14/

== Status ==
Current time: 2022-12-11 18:49:58 (running for 00:00:46.85)
Memory usage on this node: 7.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=6
Bracket: Iter 160.000: None | Iter 80.000: -0.6990031066040198 | Iter 40.000: -0.6962946110094588 | Iter 20.000: -0.6968860340615114 | Iter 10.000: -0.7021237503116329
Resources requested: 10.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (10 RUNNING, 6 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_ite

 11%|█▏        | 34/300 [00:38<04:06,  1.08it/s]
 20%|█▉        | 59/300 [00:38<02:08,  1.88it/s]
  6%|▌         | 17/300 [00:38<09:50,  2.09s/it]
 22%|██▏       | 67/300 [00:38<02:06,  1.84it/s]
 19%|█▊        | 56/300 [00:39<02:51,  1.42it/s]
 20%|██        | 60/300 [00:38<02:00,  1.99it/s]
 23%|██▎       | 68/300 [00:38<01:53,  2.04it/s]
  6%|▋         | 19/300 [00:39<08:15,  1.76s/it]
 32%|███▏      | 96/300 [00:44<01:36,  2.11it/s]
 12%|█▏        | 35/300 [00:39<03:52,  1.14it/s]
 19%|█▉        | 57/300 [00:39<02:32,  1.59it/s]
  9%|▉         | 27/300 [00:39<06:06,  1.34s/it]
 23%|██▎       | 69/300 [00:39<01:43,  2.23it/s]
 32%|███▏      | 97/300 [00:44<01:22,  2.45it/s]
 19%|█▉        | 58/300 [00:39<02:20,  1.72it/s]
 33%|███▎      | 98/300 [00:44<01:11,  2.81it/s]
 23%|██▎       | 70/300 [00:39<01:36,  2.38it/s]
 33%|███▎      | 99/300 [00:44<01:02,  3.19it/s]
  5%|▌         | 16/300 [00:40<11:41,  2.47s/it]
 20%|██        | 61/300 [00:39<02:41,  1.48it/s]
  5%|▌         | 16/

== Status ==
Current time: 2022-12-11 18:50:03 (running for 00:00:51.98)
Memory usage on this node: 7.0/23.5 GiB 
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 160.000: None | Iter 80.000: -0.6990031066040198 | Iter 40.000: -0.6962946110094588 | Iter 20.000: -0.6968860340615114 | Iter 10.000: -0.7021237503116329
Resources requested: 9.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (9 RUNNING, 7 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

  6%|▌         | 18/300 [00:43<09:41,  2.06s/it]
 37%|███▋      | 110/300 [00:48<00:51,  3.67it/s]
 26%|██▌       | 78/300 [00:43<01:33,  2.37it/s]
 13%|█▎        | 40/300 [00:43<03:27,  1.25it/s]
 23%|██▎       | 69/300 [00:43<01:44,  2.20it/s]
 26%|██▋       | 79/300 [00:43<01:29,  2.47it/s]
 10%|█         | 31/300 [00:44<05:51,  1.31s/it]
 22%|██▏       | 66/300 [00:44<02:20,  1.67it/s]
 23%|██▎       | 70/300 [00:43<01:42,  2.25it/s]
 37%|███▋      | 111/300 [00:49<01:22,  2.29it/s]
 22%|██▏       | 67/300 [00:44<02:07,  1.83it/s]
 37%|███▋      | 112/300 [00:49<01:09,  2.72it/s]
 38%|███▊      | 113/300 [00:49<01:01,  3.06it/s]
  6%|▋         | 19/300 [00:45<09:03,  1.93s/it]
 23%|██▎       | 68/300 [00:45<01:57,  1.98it/s]
 11%|█         | 32/300 [00:45<05:16,  1.18s/it]
 38%|███▊      | 114/300 [00:49<00:53,  3.46it/s]
 24%|██▎       | 71/300 [00:44<02:15,  1.68it/s]
 14%|█▎        | 41/300 [00:44<04:18,  1.00it/s]
 23%|██▎       | 69/300 [00:45<01:47,  2.15it/s]
  6%|▋         

== Status ==
Current time: 2022-12-11 18:50:10 (running for 00:00:59.24)
Memory usage on this node: 6.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: None | Iter 80.000: -0.6994547992944717 | Iter 40.000: -0.6926436387002468 | Iter 20.000: -0.6968860340615114 | Iter 10.000: -0.7021237503116329
Resources requested: 8.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (8 RUNNING, 8 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 39%|███▊      | 116/300 [00:50<01:12,  2.54it/s]
 11%|█▏        | 34/300 [00:59<21:56,  4.95s/it]
 39%|███▉      | 117/300 [01:04<13:28,  4.42s/it]
  7%|▋         | 20/300 [01:00<27:08,  5.81s/it]
  7%|▋         | 20/300 [00:59<26:46,  5.74s/it]
  7%|▋         | 22/300 [00:59<25:47,  5.57s/it]
 14%|█▍        | 43/300 [00:59<20:40,  4.83s/it]
 25%|██▍       | 74/300 [00:59<17:01,  4.52s/it]
 39%|███▉      | 118/300 [01:04<09:36,  3.17s/it]
 40%|███▉      | 119/300 [01:05<06:54,  2.29s/it]
 25%|██▌       | 75/300 [00:59<12:21,  3.29s/it]
 24%|██▎       | 71/300 [01:00<17:56,  4.70s/it]
 40%|████      | 120/300 [01:05<05:00,  1.67s/it]
 15%|█▍        | 44/300 [01:00<15:23,  3.61s/it]
 24%|██▍       | 72/300 [01:00<12:56,  3.40s/it]
 12%|█▏        | 35/300 [01:00<16:34,  3.75s/it]
 24%|██▍       | 73/300 [01:01<09:27,  2.50s/it]
 15%|█▌        | 45/300 [01:01<11:31,  2.71s/it]
 25%|██▌       | 76/300 [01:00<09:37,  2.58s/it]
  8%|▊         | 23/300 [01:01<19:58,  4.33s/it]
 40%|████      

== Status ==
Current time: 2022-12-11 18:50:24 (running for 00:01:13.20)
Memory usage on this node: 6.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: None | Iter 80.000: -0.6944415523903444 | Iter 40.000: -0.6926436387002468 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 8.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (8 RUNNING, 8 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_itera

 44%|████▍     | 133/300 [01:09<00:50,  3.29it/s]
 13%|█▎        | 39/300 [01:04<06:56,  1.60s/it]
 28%|██▊       | 85/300 [01:04<01:42,  2.09it/s]
 45%|████▍     | 134/300 [01:09<00:46,  3.53it/s]
 17%|█▋        | 50/300 [01:04<04:21,  1.05s/it]
 27%|██▋       | 81/300 [01:05<02:34,  1.41it/s]
 45%|████▌     | 135/300 [01:10<00:44,  3.74it/s]
 27%|██▋       | 82/300 [01:05<02:15,  1.61it/s]
  8%|▊         | 23/300 [01:05<14:48,  3.21s/it]
 13%|█▎        | 40/300 [01:05<06:01,  1.39s/it]
  8%|▊         | 23/300 [01:05<14:39,  3.18s/it]
 29%|██▊       | 86/300 [01:05<02:13,  1.60it/s]
 45%|████▌     | 136/300 [01:10<01:06,  2.48it/s]
 28%|██▊       | 83/300 [01:06<01:57,  1.85it/s]
 29%|██▉       | 87/300 [01:05<01:53,  1.87it/s]
 46%|████▌     | 137/300 [01:10<00:55,  2.92it/s]
 28%|██▊       | 84/300 [01:06<01:45,  2.05it/s]
  9%|▊         | 26/300 [01:06<12:24,  2.72s/it]
 46%|████▌     | 138/300 [01:11<00:48,  3.32it/s]
 17%|█▋        | 51/300 [01:06<04:44,  1.14s/it]
 29%|██▉      

== Status ==
Current time: 2022-12-11 18:50:29 (running for 00:01:18.21)
Memory usage on this node: 6.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: None | Iter 80.000: -0.6944415523903444 | Iter 40.000: -0.691990213546281 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 8.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (8 RUNNING, 8 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iterat

 15%|█▍        | 44/300 [01:09<04:24,  1.03s/it]
 50%|█████     | 150/300 [01:14<00:35,  4.26it/s]
 30%|███       | 91/300 [01:10<01:53,  1.84it/s]
 10%|▉         | 29/300 [01:10<07:58,  1.77s/it]
 32%|███▏      | 96/300 [01:09<01:47,  1.89it/s]
 31%|███       | 92/300 [01:10<01:43,  2.01it/s]
 50%|█████     | 151/300 [01:15<00:56,  2.65it/s]
 19%|█▊        | 56/300 [01:10<03:47,  1.07it/s]
 32%|███▏      | 97/300 [01:10<01:34,  2.15it/s]
 15%|█▌        | 45/300 [01:10<04:05,  1.04it/s]
 51%|█████     | 152/300 [01:15<00:47,  3.10it/s]
 31%|███       | 93/300 [01:10<01:35,  2.17it/s]
 51%|█████     | 153/300 [01:15<00:41,  3.52it/s]
 33%|███▎      | 98/300 [01:10<01:26,  2.34it/s]
 51%|█████▏    | 154/300 [01:15<00:37,  3.89it/s]
 19%|█▉        | 57/300 [01:10<03:24,  1.19it/s]
 31%|███▏      | 94/300 [01:11<01:31,  2.26it/s]
 52%|█████▏    | 155/300 [01:16<00:34,  4.18it/s]
 33%|███▎      | 99/300 [01:10<01:21,  2.48it/s]
  9%|▊         | 26/300 [01:11<11:07,  2.43s/it]
 10%|█        

== Status ==
Current time: 2022-12-11 18:50:34 (running for 00:01:23.28)
Memory usage on this node: 6.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: -0.696372602134943 | Iter 80.000: -0.6944415523903444 | Iter 40.000: -0.691990213546281 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 8.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (8 RUNNING, 8 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   t

 55%|█████▌    | 166/300 [01:19<00:49,  2.68it/s]
 34%|███▍      | 102/300 [01:15<01:36,  2.06it/s]
 21%|██        | 62/300 [01:14<03:17,  1.20it/s]
 56%|█████▌    | 167/300 [01:19<00:43,  3.07it/s]
 36%|███▌      | 107/300 [01:14<01:34,  2.04it/s]
 11%|█         | 32/300 [01:15<07:38,  1.71s/it]
 34%|███▍      | 103/300 [01:15<01:31,  2.16it/s]
 56%|█████▌    | 168/300 [01:20<00:39,  3.34it/s]
 36%|███▌      | 108/300 [01:15<01:27,  2.19it/s]
 17%|█▋        | 50/300 [01:15<03:47,  1.10it/s]
 56%|█████▋    | 169/300 [01:20<00:35,  3.65it/s]
 21%|██        | 63/300 [01:15<03:08,  1.26it/s]
 35%|███▍      | 104/300 [01:15<01:27,  2.23it/s]
 57%|█████▋    | 170/300 [01:20<00:32,  3.98it/s]
 36%|███▋      | 109/300 [01:15<01:22,  2.33it/s]
 10%|▉         | 29/300 [01:16<08:22,  1.85s/it]
 35%|███▌      | 105/300 [01:16<01:24,  2.31it/s]
 10%|▉         | 29/300 [01:16<08:22,  1.85s/it]
 37%|███▋      | 110/300 [01:15<01:17,  2.44it/s]
 21%|██▏       | 64/300 [01:16<02:55,  1.35it/s]
 11%|█ 

== Status ==
Current time: 2022-12-11 18:50:39 (running for 00:01:28.30)
Memory usage on this node: 6.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: -0.696372602134943 | Iter 80.000: -0.6944415523903444 | Iter 40.000: -0.691990213546281 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 8.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (8 RUNNING, 8 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   t

 61%|██████▏   | 184/300 [01:24<00:29,  3.99it/s]
 23%|██▎       | 69/300 [01:19<02:41,  1.43it/s]
 39%|███▉      | 118/300 [01:19<01:17,  2.36it/s]
 10%|█         | 31/300 [01:20<08:55,  1.99s/it]
 38%|███▊      | 113/300 [01:20<01:24,  2.22it/s]
 62%|██████▏   | 185/300 [01:24<00:27,  4.20it/s]
 40%|███▉      | 119/300 [01:19<01:13,  2.47it/s]
 18%|█▊        | 55/300 [01:20<03:33,  1.15it/s]
 10%|█         | 31/300 [01:20<09:07,  2.04s/it]
 38%|███▊      | 114/300 [01:20<01:20,  2.31it/s]
 23%|██▎       | 70/300 [01:20<02:36,  1.47it/s]
 40%|████      | 120/300 [01:20<01:10,  2.56it/s]
 38%|███▊      | 115/300 [01:21<01:17,  2.39it/s]
 62%|██████▏   | 186/300 [01:25<00:42,  2.69it/s]
 62%|██████▏   | 187/300 [01:25<00:36,  3.07it/s]
 63%|██████▎   | 188/300 [01:26<00:31,  3.51it/s]
 12%|█▏        | 36/300 [01:21<07:31,  1.71s/it]
 63%|██████▎   | 189/300 [01:26<00:28,  3.88it/s]
 63%|██████▎   | 190/300 [01:26<00:25,  4.27it/s]
 11%|█         | 32/300 [01:21<08:14,  1.84s/it]
 40%|██

== Status ==
Current time: 2022-12-11 18:50:44 (running for 00:01:33.30)
Memory usage on this node: 6.7/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: -0.696372602134943 | Iter 80.000: -0.6944415523903444 | Iter 40.000: -0.691990213546281 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 8.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (8 RUNNING, 8 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   t

 41%|████      | 123/300 [01:25<01:22,  2.14it/s]
 11%|█▏        | 34/300 [01:25<07:33,  1.70s/it]
 43%|████▎     | 129/300 [01:24<01:13,  2.33it/s]
 41%|████▏     | 124/300 [01:25<01:20,  2.20it/s]
 20%|██        | 60/300 [01:25<03:32,  1.13it/s]
 13%|█▎        | 39/300 [01:25<06:15,  1.44s/it]
 67%|██████▋   | 201/300 [01:30<00:38,  2.57it/s]
 43%|████▎     | 130/300 [01:24<01:10,  2.40it/s]
 67%|██████▋   | 202/300 [01:30<00:32,  2.99it/s]
 42%|████▏     | 125/300 [01:25<01:15,  2.31it/s]
 68%|██████▊   | 203/300 [01:30<00:28,  3.40it/s]
 68%|██████▊   | 204/300 [01:30<00:25,  3.78it/s]
 25%|██▌       | 76/300 [01:25<03:22,  1.11it/s]
 68%|██████▊   | 205/300 [01:30<00:22,  4.14it/s]
 12%|█▏        | 35/300 [01:26<07:17,  1.65s/it]
 44%|████▎     | 131/300 [01:25<01:33,  1.80it/s]
 26%|██▌       | 77/300 [01:26<03:00,  1.24it/s]
 42%|████▏     | 126/300 [01:26<01:37,  1.78it/s]
 12%|█▏        | 35/300 [01:26<07:21,  1.67s/it]
 44%|████▍     | 132/300 [01:26<01:22,  2.03it/s]
 13%|█▎

[2m[36m(run_one_training pid=11341)[0m Early stopping due to no improvement.


 71%|███████   | 213/300 [01:33<00:23,  3.67it/s]
 44%|████▎     | 131/300 [01:28<01:27,  1.92it/s]
 71%|███████▏  | 214/300 [01:33<00:20,  4.13it/s]
 12%|█▏        | 36/300 [01:29<08:32,  1.94s/it]
 72%|███████▏  | 215/300 [01:33<00:19,  4.46it/s]
 12%|█▏        | 36/300 [01:29<08:34,  1.95s/it]
 14%|█▎        | 41/300 [01:29<07:20,  1.70s/it]
 44%|████▍     | 132/300 [01:29<01:19,  2.11it/s]
 21%|██▏       | 64/300 [01:29<03:30,  1.12it/s]
 44%|████▍     | 133/300 [01:29<01:11,  2.33it/s]
 27%|██▋       | 81/300 [01:29<03:02,  1.20it/s]
 72%|███████▏  | 216/300 [01:34<00:29,  2.85it/s]
 45%|████▍     | 134/300 [01:30<01:09,  2.39it/s]
 72%|███████▏  | 217/300 [01:34<00:25,  3.20it/s]


== Status ==
Current time: 2022-12-11 18:50:49 (running for 00:01:38.42)
Memory usage on this node: 6.4/23.5 GiB 
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 160.000: -0.696372602134943 | Iter 80.000: -0.6913118826535841 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 7.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (7 RUNNING, 9 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   

 22%|██▏       | 65/300 [01:30<03:22,  1.16it/s]
 73%|███████▎  | 218/300 [01:34<00:23,  3.55it/s]
 27%|██▋       | 82/300 [01:29<02:49,  1.29it/s]
 73%|███████▎  | 219/300 [01:35<00:20,  3.89it/s]
 12%|█▏        | 37/300 [01:30<07:49,  1.79s/it]
 45%|████▌     | 135/300 [01:30<01:06,  2.47it/s]
 14%|█▍        | 42/300 [01:30<06:43,  1.57s/it]
 73%|███████▎  | 220/300 [01:35<00:19,  4.03it/s]
 12%|█▏        | 37/300 [01:30<07:55,  1.81s/it]
 28%|██▊       | 83/300 [01:30<02:40,  1.36it/s]
 45%|████▌     | 136/300 [01:31<01:26,  1.90it/s]
 74%|███████▎  | 221/300 [01:35<00:28,  2.79it/s]
 74%|███████▍  | 222/300 [01:36<00:23,  3.32it/s]
 28%|██▊       | 84/300 [01:31<02:28,  1.46it/s]
 46%|████▌     | 137/300 [01:31<01:17,  2.11it/s]
 74%|███████▍  | 223/300 [01:36<00:20,  3.73it/s]
 14%|█▍        | 43/300 [01:31<06:17,  1.47s/it]
 22%|██▏       | 66/300 [01:31<04:11,  1.07s/it]
 75%|███████▍  | 224/300 [01:36<00:18,  4.07it/s]
 13%|█▎        | 38/300 [01:31<07:18,  1.67s/it]
 46%|████▌

== Status ==
Current time: 2022-12-11 18:50:54 (running for 00:01:43.59)
Memory usage on this node: 6.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: -0.696372602134943 | Iter 80.000: -0.6913118826535841 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 6.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (6 RUNNING, 10 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 49%|████▊     | 146/300 [01:35<01:13,  2.10it/s]
 79%|███████▉  | 237/300 [01:40<00:17,  3.56it/s]
 30%|███       | 90/300 [01:35<02:07,  1.65it/s]
 79%|███████▉  | 238/300 [01:40<00:15,  4.01it/s]
 49%|████▉     | 147/300 [01:35<01:06,  2.29it/s]
 80%|███████▉  | 239/300 [01:40<00:14,  4.32it/s]
 80%|████████  | 240/300 [01:40<00:12,  4.68it/s]
 49%|████▉     | 148/300 [01:36<01:02,  2.45it/s]
 24%|██▎       | 71/300 [01:35<03:47,  1.01it/s]
 15%|█▌        | 46/300 [01:36<06:38,  1.57s/it]
 50%|████▉     | 149/300 [01:36<00:56,  2.68it/s]
 80%|████████  | 241/300 [01:41<00:18,  3.18it/s]
 30%|███       | 91/300 [01:36<02:37,  1.33it/s]
 50%|█████     | 150/300 [01:36<00:52,  2.85it/s]
 81%|████████  | 242/300 [01:41<00:15,  3.71it/s]
 24%|██▍       | 72/300 [01:36<03:21,  1.13it/s]
 14%|█▎        | 41/300 [01:36<07:30,  1.74s/it]
 81%|████████  | 243/300 [01:41<00:13,  4.26it/s]
 81%|████████▏ | 244/300 [01:41<00:12,  4.59it/s]
 31%|███       | 92/300 [01:36<02:21,  1.47it/s]
 16%|█▌

== Status ==
Current time: 2022-12-11 18:51:00 (running for 00:01:48.69)
Memory usage on this node: 6.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: -0.696372602134943 | Iter 80.000: -0.6913118826535841 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 6.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (6 RUNNING, 10 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |  

 53%|█████▎    | 160/300 [01:40<00:48,  2.89it/s]
 86%|████████▌ | 258/300 [01:45<00:09,  4.21it/s]
 15%|█▍        | 44/300 [01:40<05:59,  1.40s/it]
 26%|██▌       | 77/300 [01:40<03:03,  1.22it/s]
 86%|████████▋ | 259/300 [01:45<00:08,  4.64it/s]
 33%|███▎      | 98/300 [01:40<02:03,  1.64it/s]
 87%|████████▋ | 260/300 [01:45<00:08,  4.74it/s]
 54%|█████▎    | 161/300 [01:41<01:02,  2.22it/s]
 33%|███▎      | 99/300 [01:40<02:00,  1.67it/s]
 26%|██▌       | 78/300 [01:41<02:52,  1.28it/s]
 87%|████████▋ | 261/300 [01:46<00:12,  3.14it/s]
 54%|█████▍    | 162/300 [01:41<00:55,  2.49it/s]
 87%|████████▋ | 262/300 [01:46<00:10,  3.70it/s]
 33%|███▎      | 100/300 [01:41<01:53,  1.75it/s]
 54%|█████▍    | 163/300 [01:41<00:51,  2.67it/s]
 88%|████████▊ | 263/300 [01:46<00:08,  4.18it/s]
 15%|█▌        | 45/300 [01:41<05:50,  1.37s/it]
 88%|████████▊ | 264/300 [01:46<00:08,  4.47it/s]
 26%|██▋       | 79/300 [01:41<02:42,  1.36it/s]
 55%|█████▍    | 164/300 [01:42<00:49,  2.77it/s]
 88%|██

[2m[36m(run_one_training pid=11236)[0m Early stopping due to no improvement.


 55%|█████▌    | 166/300 [01:43<00:58,  2.29it/s]
 34%|███▍      | 102/300 [01:42<02:07,  1.56it/s]
 56%|█████▌    | 167/300 [01:43<00:51,  2.58it/s]
 17%|█▋        | 52/300 [01:43<05:19,  1.29s/it]
 56%|█████▌    | 168/300 [01:43<00:47,  2.76it/s]
 34%|███▍      | 103/300 [01:43<01:58,  1.67it/s]
 15%|█▌        | 46/300 [01:43<06:42,  1.58s/it]
 56%|█████▋    | 169/300 [01:43<00:44,  2.98it/s]
 27%|██▋       | 81/300 [01:43<03:12,  1.14it/s]
 57%|█████▋    | 170/300 [01:44<00:41,  3.11it/s]
 18%|█▊        | 53/300 [01:44<04:52,  1.18s/it]
 35%|███▍      | 104/300 [01:43<01:49,  1.79it/s]
 27%|██▋       | 82/300 [01:44<02:52,  1.26it/s]
 35%|███▌      | 105/300 [01:44<01:43,  1.88it/s]
 57%|█████▋    | 171/300 [01:44<00:52,  2.45it/s]
 16%|█▌        | 47/300 [01:45<06:07,  1.45s/it]
 57%|█████▋    | 172/300 [01:45<00:48,  2.64it/s]
 28%|██▊       | 83/300 [01:44<02:43,  1.33it/s]
 18%|█▊        | 54/300 [01:45<04:39,  1.13s/it]
 58%|█████▊    | 173/300 [01:45<00:45,  2.81it/s]
 35%|███

== Status ==
Current time: 2022-12-11 18:51:05 (running for 00:01:53.93)
Memory usage on this node: 5.9/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.69130543867747 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 5.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (5 RUNNING, 11 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   

 58%|█████▊    | 174/300 [01:45<00:41,  3.00it/s]
 28%|██▊       | 84/300 [01:45<02:34,  1.40it/s]
 58%|█████▊    | 175/300 [01:45<00:39,  3.17it/s]
 16%|█▌        | 48/300 [01:46<05:47,  1.38s/it]
 36%|███▌      | 107/300 [01:45<02:01,  1.59it/s]
 18%|█▊        | 55/300 [01:46<04:26,  1.09s/it]
 28%|██▊       | 85/300 [01:46<02:25,  1.47it/s]
 59%|█████▊    | 176/300 [01:46<00:50,  2.44it/s]
 36%|███▌      | 108/300 [01:46<01:50,  1.74it/s]
 59%|█████▉    | 177/300 [01:46<00:45,  2.71it/s]
 59%|█████▉    | 178/300 [01:47<00:41,  2.92it/s]
 36%|███▋      | 109/300 [01:46<01:44,  1.84it/s]
 16%|█▋        | 49/300 [01:47<05:30,  1.32s/it]
 60%|█████▉    | 179/300 [01:47<00:38,  3.10it/s]
 29%|██▊       | 86/300 [01:47<02:59,  1.19it/s]
 37%|███▋      | 110/300 [01:47<01:38,  1.92it/s]
 60%|██████    | 180/300 [01:47<00:37,  3.16it/s]
 19%|█▊        | 56/300 [01:47<05:18,  1.31s/it]
 29%|██▉       | 87/300 [01:48<02:43,  1.30it/s]
 60%|██████    | 181/300 [01:48<01:11,  1.67it/s]
 17%|█▋ 

[2m[36m(run_one_training pid=11349)[0m Early stopping due to no improvement.


 37%|███▋      | 111/300 [01:48<02:05,  1.51it/s]
 29%|██▉       | 88/300 [01:48<02:30,  1.41it/s]
 19%|█▉        | 57/300 [01:48<04:49,  1.19s/it]
 37%|███▋      | 112/300 [01:48<01:51,  1.68it/s]
 30%|██▉       | 89/300 [01:49<02:23,  1.47it/s]
 38%|███▊      | 113/300 [01:49<01:45,  1.78it/s]
 30%|███       | 90/300 [01:49<02:16,  1.54it/s]
 19%|█▉        | 58/300 [01:49<04:29,  1.11s/it]
 38%|███▊      | 114/300 [01:49<01:38,  1.89it/s]
 17%|█▋        | 51/300 [01:50<06:01,  1.45s/it]
 38%|███▊      | 115/300 [01:49<01:31,  2.02it/s]


== Status ==
Current time: 2022-12-11 18:51:10 (running for 00:01:59.02)
Memory usage on this node: 5.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.69130543867747 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 4.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (4 RUNNING, 12 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   

 20%|█▉        | 59/300 [01:50<04:13,  1.05s/it]
 30%|███       | 91/300 [01:50<02:50,  1.23it/s]
 39%|███▊      | 116/300 [01:50<01:56,  1.58it/s]
 17%|█▋        | 52/300 [01:51<05:34,  1.35s/it]
 31%|███       | 92/300 [01:51<02:36,  1.33it/s]
 20%|██        | 60/300 [01:51<04:03,  1.02s/it]
 39%|███▉      | 117/300 [01:51<01:46,  1.72it/s]
 31%|███       | 93/300 [01:52<02:22,  1.45it/s]
 39%|███▉      | 118/300 [01:51<01:38,  1.84it/s]
 18%|█▊        | 53/300 [01:52<05:11,  1.26s/it]
 40%|███▉      | 119/300 [01:52<01:33,  1.94it/s]
 31%|███▏      | 94/300 [01:52<02:14,  1.53it/s]
 40%|████      | 120/300 [01:52<01:28,  2.04it/s]
 32%|███▏      | 95/300 [01:53<02:08,  1.59it/s]
 20%|██        | 61/300 [01:53<04:55,  1.24s/it]
 18%|█▊        | 54/300 [01:53<04:56,  1.21s/it]
 40%|████      | 121/300 [01:53<01:50,  1.62it/s]
 21%|██        | 62/300 [01:54<04:27,  1.12s/it]
 41%|████      | 122/300 [01:54<01:39,  1.78it/s]
 32%|███▏      | 96/300 [01:54<02:41,  1.26it/s]
 18%|█▊      

== Status ==
Current time: 2022-12-11 18:51:15 (running for 00:02:04.02)
Memory usage on this node: 5.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.69130543867747 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 4.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (4 RUNNING, 12 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   

 21%|██▏       | 64/300 [01:55<03:58,  1.01s/it]
 33%|███▎      | 99/300 [01:56<02:08,  1.57it/s]
 19%|█▊        | 56/300 [01:56<05:36,  1.38s/it]
 42%|████▏     | 126/300 [01:56<01:45,  1.64it/s]
 33%|███▎      | 100/300 [01:56<02:03,  1.62it/s]
 22%|██▏       | 65/300 [01:56<03:47,  1.03it/s]
 42%|████▏     | 127/300 [01:56<01:36,  1.80it/s]
 43%|████▎     | 128/300 [01:57<01:29,  1.92it/s]
 19%|█▉        | 57/300 [01:57<05:13,  1.29s/it]
 34%|███▎      | 101/300 [01:57<02:34,  1.29it/s]
 43%|████▎     | 129/300 [01:57<01:26,  1.99it/s]
 43%|████▎     | 130/300 [01:58<01:22,  2.05it/s]
 34%|███▍      | 102/300 [01:58<02:20,  1.41it/s]
 22%|██▏       | 66/300 [01:58<04:43,  1.21s/it]
 19%|█▉        | 58/300 [01:58<04:59,  1.24s/it]
 34%|███▍      | 103/300 [01:58<02:11,  1.50it/s]
 44%|████▎     | 131/300 [01:59<01:44,  1.62it/s]
 35%|███▍      | 104/300 [01:59<02:04,  1.57it/s]
 22%|██▏       | 67/300 [01:59<04:18,  1.11s/it]
 20%|█▉        | 59/300 [01:59<04:47,  1.19s/it]
 44%|████

== Status ==
Current time: 2022-12-11 18:51:20 (running for 00:02:09.07)
Memory usage on this node: 5.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.69130543867747 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 4.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (4 RUNNING, 12 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   

 20%|██        | 60/300 [02:01<04:40,  1.17s/it]
 45%|████▌     | 135/300 [02:00<01:23,  1.97it/s]
 35%|███▌      | 106/300 [02:01<02:34,  1.26it/s]
 23%|██▎       | 69/300 [02:01<03:57,  1.03s/it]
 36%|███▌      | 107/300 [02:01<02:20,  1.37it/s]


[2m[36m(run_one_training pid=11346)[0m Early stopping due to no improvement.


 45%|████▌     | 136/300 [02:01<02:26,  1.12it/s]
 23%|██▎       | 70/300 [02:02<03:49,  1.00it/s]
 36%|███▌      | 108/300 [02:02<02:08,  1.49it/s]
 20%|██        | 61/300 [02:02<05:29,  1.38s/it]
 36%|███▋      | 109/300 [02:02<01:59,  1.60it/s]
 37%|███▋      | 110/300 [02:03<01:53,  1.68it/s]
 21%|██        | 62/300 [02:04<05:05,  1.28s/it]
 24%|██▎       | 71/300 [02:03<04:32,  1.19s/it]
 37%|███▋      | 111/300 [02:04<02:22,  1.33it/s]
 21%|██        | 63/300 [02:05<04:46,  1.21s/it]
 24%|██▍       | 72/300 [02:04<04:08,  1.09s/it]
 37%|███▋      | 112/300 [02:05<02:09,  1.45it/s]


== Status ==
Current time: 2022-12-11 18:51:25 (running for 00:02:14.11)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.69130543867747 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   

 38%|███▊      | 113/300 [02:05<02:00,  1.55it/s]
 24%|██▍       | 73/300 [02:05<03:53,  1.03s/it]
 21%|██▏       | 64/300 [02:06<04:33,  1.16s/it]
 38%|███▊      | 114/300 [02:06<01:55,  1.61it/s]
 25%|██▍       | 74/300 [02:06<03:43,  1.01it/s]
 38%|███▊      | 115/300 [02:06<01:49,  1.69it/s]
 22%|██▏       | 65/300 [02:07<04:28,  1.14s/it]
 25%|██▌       | 75/300 [02:07<03:34,  1.05it/s]
 39%|███▊      | 116/300 [02:07<02:18,  1.33it/s]
 39%|███▉      | 117/300 [02:08<02:05,  1.46it/s]
 22%|██▏       | 66/300 [02:08<05:11,  1.33s/it]
 39%|███▉      | 118/300 [02:08<01:55,  1.58it/s]
 25%|██▌       | 76/300 [02:09<04:18,  1.16s/it]
 40%|███▉      | 119/300 [02:09<01:49,  1.65it/s]
 22%|██▏       | 67/300 [02:09<04:48,  1.24s/it]
 40%|████      | 120/300 [02:10<01:45,  1.71it/s]
 26%|██▌       | 77/300 [02:09<03:58,  1.07s/it]


== Status ==
Current time: 2022-12-11 18:51:30 (running for 00:02:19.17)
Memory usage on this node: 5.3/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.69130543867747 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 3.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (3 RUNNING, 13 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   

 23%|██▎       | 68/300 [02:11<04:33,  1.18s/it]
 26%|██▌       | 78/300 [02:10<03:42,  1.00s/it]
 40%|████      | 121/300 [02:11<02:15,  1.32it/s]
 41%|████      | 122/300 [02:11<02:03,  1.45it/s]
 26%|██▋       | 79/300 [02:11<03:35,  1.03it/s]
 23%|██▎       | 69/300 [02:12<04:26,  1.15s/it]
 41%|████      | 123/300 [02:12<01:54,  1.54it/s]
 41%|████▏     | 124/300 [02:12<01:48,  1.62it/s]
 23%|██▎       | 70/300 [02:13<04:16,  1.11s/it]
 42%|████▏     | 125/300 [02:13<01:42,  1.71it/s]
 42%|████▏     | 126/300 [02:14<02:09,  1.35it/s]
 24%|██▎       | 71/300 [02:14<04:55,  1.29s/it]
 42%|████▏     | 127/300 [02:14<01:58,  1.46it/s]
 43%|████▎     | 128/300 [02:15<01:50,  1.56it/s]
 24%|██▍       | 72/300 [02:15<04:36,  1.21s/it]


== Status ==
Current time: 2022-12-11 18:51:35 (running for 00:02:24.42)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913118826535841 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 43%|████▎     | 129/300 [02:16<01:44,  1.64it/s]
 43%|████▎     | 130/300 [02:16<01:40,  1.69it/s]
 24%|██▍       | 73/300 [02:16<04:23,  1.16s/it]
 25%|██▍       | 74/300 [02:17<04:11,  1.11s/it]
 44%|████▎     | 131/300 [02:17<02:05,  1.34it/s]
 44%|████▍     | 132/300 [02:18<01:57,  1.43it/s]
 25%|██▌       | 75/300 [02:18<04:07,  1.10s/it]
 44%|████▍     | 133/300 [02:18<01:48,  1.53it/s]
 45%|████▍     | 134/300 [02:19<01:41,  1.64it/s]
 45%|████▌     | 135/300 [02:19<01:36,  1.72it/s]
 25%|██▌       | 76/300 [02:20<04:48,  1.29s/it]
 45%|████▌     | 136/300 [02:20<01:59,  1.37it/s]


== Status ==
Current time: 2022-12-11 18:51:41 (running for 00:02:29.85)
Memory usage on this node: 5.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913118826535841 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 2.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (2 RUNNING, 14 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 46%|████▌     | 137/300 [02:21<01:49,  1.48it/s]
 26%|██▌       | 77/300 [02:21<04:28,  1.20s/it]
 46%|████▌     | 138/300 [02:22<01:42,  1.58it/s]
 46%|████▋     | 139/300 [02:22<01:35,  1.68it/s]
 26%|██▌       | 78/300 [02:22<04:13,  1.14s/it]
 47%|████▋     | 140/300 [02:23<01:32,  1.73it/s]
 26%|██▋       | 79/300 [02:23<04:03,  1.10s/it]
 47%|████▋     | 141/300 [02:24<02:42,  1.02s/it]


[2m[36m(run_one_training pid=11343)[0m Early stopping due to no improvement.


 27%|██▋       | 80/300 [02:24<03:54,  1.07s/it]
 27%|██▋       | 81/300 [02:26<04:36,  1.26s/it]


== Status ==
Current time: 2022-12-11 18:51:46 (running for 00:02:35.56)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913105995239069 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 27%|██▋       | 82/300 [02:27<04:16,  1.18s/it]
 28%|██▊       | 83/300 [02:28<04:03,  1.12s/it]
 28%|██▊       | 84/300 [02:29<03:53,  1.08s/it]
 28%|██▊       | 85/300 [02:30<03:46,  1.05s/it]
 29%|██▊       | 86/300 [02:32<04:28,  1.25s/it]


== Status ==
Current time: 2022-12-11 18:51:52 (running for 00:02:41.23)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913105995239069 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 29%|██▉       | 87/300 [02:33<04:09,  1.17s/it]
 29%|██▉       | 88/300 [02:34<03:57,  1.12s/it]
 30%|██▉       | 89/300 [02:35<03:48,  1.08s/it]
 30%|███       | 90/300 [02:36<03:42,  1.06s/it]
 30%|███       | 91/300 [02:37<04:23,  1.26s/it]


== Status ==
Current time: 2022-12-11 18:51:58 (running for 00:02:46.92)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913105995239069 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 31%|███       | 92/300 [02:38<04:03,  1.17s/it]
 31%|███       | 93/300 [02:39<03:51,  1.12s/it]
 31%|███▏      | 94/300 [02:40<03:42,  1.08s/it]
 32%|███▏      | 95/300 [02:41<03:34,  1.05s/it]
 32%|███▏      | 96/300 [02:43<04:45,  1.40s/it]


== Status ==
Current time: 2022-12-11 18:52:04 (running for 00:02:53.07)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913105995239069 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 32%|███▏      | 97/300 [02:44<04:18,  1.27s/it]
 33%|███▎      | 98/300 [02:45<04:01,  1.20s/it]
 33%|███▎      | 99/300 [02:46<03:46,  1.13s/it]
 33%|███▎      | 100/300 [02:47<03:35,  1.08s/it]
 34%|███▎      | 101/300 [02:49<04:15,  1.28s/it]


== Status ==
Current time: 2022-12-11 18:52:10 (running for 00:02:58.76)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913105995239069 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 34%|███▍      | 102/300 [02:50<03:56,  1.19s/it]
 34%|███▍      | 103/300 [02:51<03:43,  1.13s/it]
 35%|███▍      | 104/300 [02:52<03:32,  1.09s/it]
 35%|███▌      | 105/300 [02:53<03:24,  1.05s/it]
 35%|███▌      | 106/300 [02:55<04:00,  1.24s/it]


== Status ==
Current time: 2022-12-11 18:52:15 (running for 00:03:04.35)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913105995239069 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 36%|███▌      | 107/300 [02:56<03:43,  1.16s/it]
 36%|███▌      | 108/300 [02:57<03:31,  1.10s/it]
 36%|███▋      | 109/300 [02:58<03:22,  1.06s/it]
 37%|███▋      | 110/300 [02:59<03:15,  1.03s/it]
 37%|███▋      | 111/300 [03:00<03:51,  1.23s/it]


== Status ==
Current time: 2022-12-11 18:52:21 (running for 00:03:09.89)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913105995239069 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 37%|███▋      | 112/300 [03:01<03:35,  1.15s/it]
 38%|███▊      | 113/300 [03:02<03:24,  1.09s/it]
 38%|███▊      | 114/300 [03:03<03:15,  1.05s/it]
 38%|███▊      | 115/300 [03:04<03:09,  1.02s/it]
 39%|███▊      | 116/300 [03:06<03:47,  1.24s/it]


== Status ==
Current time: 2022-12-11 18:52:26 (running for 00:03:15.51)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913105995239069 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 39%|███▉      | 117/300 [03:07<03:34,  1.17s/it]
 39%|███▉      | 118/300 [03:08<03:23,  1.12s/it]
 40%|███▉      | 119/300 [03:09<03:14,  1.08s/it]
 40%|████      | 120/300 [03:10<03:07,  1.04s/it]
 40%|████      | 121/300 [03:12<03:42,  1.24s/it]


== Status ==
Current time: 2022-12-11 18:52:32 (running for 00:03:21.14)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913105995239069 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 41%|████      | 122/300 [03:12<03:26,  1.16s/it]
 41%|████      | 123/300 [03:13<03:16,  1.11s/it]
 41%|████▏     | 124/300 [03:14<03:07,  1.07s/it]
 42%|████▏     | 125/300 [03:15<03:02,  1.04s/it]
 42%|████▏     | 126/300 [03:17<03:37,  1.25s/it]


== Status ==
Current time: 2022-12-11 18:52:38 (running for 00:03:26.80)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913105995239069 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 42%|████▏     | 127/300 [03:18<03:22,  1.17s/it]
 43%|████▎     | 128/300 [03:19<03:12,  1.12s/it]
 43%|████▎     | 129/300 [03:20<03:03,  1.07s/it]
 43%|████▎     | 130/300 [03:21<02:56,  1.04s/it]
 44%|████▎     | 131/300 [03:23<03:29,  1.24s/it]


== Status ==
Current time: 2022-12-11 18:52:43 (running for 00:03:32.39)
Memory usage on this node: 4.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913105995239069 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 1.0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (1 RUNNING, 15 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |

 44%|████▍     | 132/300 [03:24<03:14,  1.16s/it]
 44%|████▍     | 133/300 [03:25<03:04,  1.10s/it]
 45%|████▍     | 134/300 [03:26<02:56,  1.07s/it]
 45%|████▌     | 135/300 [03:27<02:50,  1.03s/it]


[2m[36m(run_one_training pid=11337)[0m Early stopping due to no improvement.


 45%|████▌     | 136/300 [03:28<04:11,  1.54s/it]
2022-12-11 18:52:48,519	INFO tune.py:777 -- Total run time: 217.18 seconds (217.04 seconds for the tuning loop).


== Status ==
Current time: 2022-12-11 18:52:48 (running for 00:03:37.05)
Memory usage on this node: 4.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 160.000: -0.6931263001558061 | Iter 80.000: -0.6913105995239069 | Iter 40.000: -0.6913367883923153 | Iter 20.000: -0.6931748799979687 | Iter 10.000: -0.7021237503116329
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/11.6 GiB heap, 0.0/5.8 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-11_18-49-11
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_i