In [1]:
cd /home/tvangraft/tudelft/thesis/metaengineering

/home/tvangraft/tudelft/thesis/metaengineering


In [2]:
from collections import defaultdict
from typing import DefaultDict, List, Hashable, Dict, Any

from src.utils.utils import get_generator, get_project_root
from src.utils.test_result_store import TestResultStore

from src.pipeline.config import DataLoaderConfig, TaskLoaderConfig
from src.pipeline.taskloader import TaskLoader, TaskFrame
from src.pipeline.dataloader import DataLoader

from src.orchestrator.trainer import Trainer

from src.settings.tier import Tier
from src.settings.strategy import Strategy
from src.settings.metabolites import ENZYMES, METABOLITES, PRECURSOR_METABOLITES, PRECURSOR_METABOLITES_NO_TRANSFORM

from src.gnn.data_augmentation import DataAugmentation
from src.gnn.embeddings import generate_embedding
from src.gnn.graph_builder import get_samples_hetero_graph, get_graph_fc, get_graph_fc_protein_only, edge_index_from_df_protein_only, get_samples_graph
from src.gnn.shared_training import count_parameters, log_metrics, tune_metabolite_hyper_parameters

import pandas as pd
import numpy as np

import cobra
from cobra.util import create_stoichiometric_matrix
from cobra.core import Reaction

import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns

from more_itertools import flatten

from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.metrics import mean_absolute_error

from scipy.stats import pearsonr

import os
from functools import partial

from tqdm import tqdm

import torch
from torch.nn import BatchNorm1d, ModuleList
import torch.nn.functional as F

from torch_geometric.data import Data, HeteroData
from torch_geometric.utils import from_networkx, to_networkx
from torch_geometric.loader import DataLoader as GeoDataLoader
from torch_geometric.nn import GAT, GCNConv, to_hetero, SAGEConv, GATConv, HeteroLinear, Linear, Node2Vec
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
import torch_geometric.transforms as T
from torch_geometric.nn.conv import HeteroConv

import mlflow.pytorch

from config import HYPERPARAMETERS, BEST_PARAMETERS

from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler, FIFOScheduler
from ray.air import session, RunConfig
from ray.tune.integration.mlflow import mlflow_mixin
from ray.tune.integration.mlflow import MLflowLoggerCallback

# device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
mlflow.set_tracking_uri("http://localhost:5000")
device = torch.device("cpu")
torch.manual_seed(42)

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x7f916bc3c730>

In [3]:
path = "/home/tvangraft/tudelft/thesis/metaengineering/data"
model = cobra.io.read_sbml_model(f'{path}/iMM904.xml')

In [4]:
def prepare_data(path: str, valid_metabolites: List[str]):
    edge_list_df = pd.read_csv(path)
    graph_fc = get_graph_fc_protein_only(edge_list_df, valid_metabolites)
    edge_index = edge_index_from_df_protein_only(graph_fc, edge_list_df, valid_metabolites)
    embedding = generate_embedding(edge_index.T, device, hetero=False)

    return edge_list_df, graph_fc, edge_index, embedding

# Model

In [19]:
class GATModel(torch.nn.Module):
    def __init__(self, model_config) -> None:
        super(GATModel, self).__init__()
        embedding_size = model_config["model_embedding_size"]
        n_heads = model_config["model_attention_heads"]
        self.n_layers = model_config["model_layers"]
        
        self.conv_layers = ModuleList([])
        self.transf_layers = ModuleList([])
        self.pooling_layers = ModuleList([])
        self.bn_layers = ModuleList([])
        
        self.conv1 = GATConv(
            -1, out_channels=embedding_size, heads=n_heads, add_self_loops=False, bias=False
        )
        self.transf1 = Linear(
            in_channels=embedding_size*n_heads, 
            out_channels=embedding_size, 
        )

        for i in range(self.n_layers):
            self.conv_layers.append(
                GATConv(
                    -1, 
                    out_channels=embedding_size, 
                    heads=n_heads, 
                    add_self_loops=False
                )
            )
            self.transf_layers.append(
                Linear(
                    embedding_size*n_heads, 
                    embedding_size
                )
            )

        self.linear1 = Linear(embedding_size * 2, embedding_size)
        self.linear2 = Linear(embedding_size, model_config['output_size'])
    
    def forward(self, x, edge_index, batch_index):
        x = self.conv1(x, edge_index)

        global_representation = []

        for i in range(self.n_layers):
            x = self.conv_layers[i](x, edge_index)
            print(x.shape)
            x = torch.relu(self.transf_layers[i](x))
            print(x.shape)
            global_representation.append(torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1))

        # This generates the last embeddings for all enzymes in the graph
        x = sum(global_representation)
        x = torch.relu(self.linear1(x))
        x = F.dropout(x, p=0.8, training=self.training)
        x = self.linear2(x)
        return x 

# Setup functions

In [61]:
def train_one_epoch(epoch, model, train_loader, optimizer, loss_fn, debug=False):
    # Enumerate over the data
    running_loss = 0.0
    step = 0
    for _, batch in enumerate(train_loader):
        print(batch)
        # Use GPU
        batch.to(device)
        # Reset gradients
        optimizer.zero_grad() 
        # Passing the node features and the connection info
        pred = model.forward(
            batch.x, 
            batch.edge_index,
            batch.batch
        )
        # Calculating the loss and gradients
        train_mask = torch.reshape(batch.train_mask.bool(), pred.shape)

        if debug:
            print(f"{pred.shape=}")
            print(f"{train_mask.sum()=}")
            print(
                f"{pred.shape=} \n"
                f"{pred[train_mask].mean()=} \n"
                f"{pred[train_mask].max()=} \n"
                f"{pred[train_mask].min()=} \n"
                f"{pred[train_mask].shape=} \n"
            )
            print(
                f"{torch.squeeze(pred).shape} \n"
                f"{torch.squeeze(batch.y.float()).shape} \n"
            )

        loss = loss_fn(
            torch.squeeze(pred), 
            torch.squeeze(torch.reshape(batch.y.float(), pred.shape))
        )

        loss.backward()  
        optimizer.step()  
        # Update tracking
        running_loss += loss.item()
        step += 1
    
    tune.report(loss=(running_loss/step))

    return running_loss/step
    
@mlflow_mixin
def test(epoch, model, test_loader, loss_fn, debug=False):
    all_preds_raw = []
    all_labels = []
    all_knockout_ids = []
    running_loss = 0.0
    step = 0
    for batch in test_loader:
        batch.to(device)
        # print(batch)  
        pred = model(
            batch.x, 
            batch.edge_index,
            batch.batch,
        )
        test_mask = torch.reshape(batch.test_mask.bool(), pred.shape)

        if debug:
            print(f"{test_mask.sum()=}")
            print(
                f"{pred.shape=} \n"
                f"{pred[test_mask].mean()=} \n"
                f"{pred[test_mask].max()=} \n"
                f"{pred[test_mask].min()=} \n"
                f"{pred[test_mask].shape=} \n"
            ) 
        
        loss = loss_fn(
            torch.squeeze(torch.nan_to_num(pred[test_mask])), 
            torch.squeeze(torch.nan_to_num(batch.y.float()[torch.squeeze(test_mask)]))
        )

         # Update tracking
        running_loss += loss.item()
        step += 1
        all_preds_raw.append(torch.nan_to_num(pred[test_mask]).cpu().detach().numpy())
        all_labels.append(torch.nan_to_num(batch.y[torch.squeeze(test_mask)]).cpu().detach().numpy())
        # all_knockout_ids.append(batch['enzymes'].knockout_label_id.cpu().detach().numpy())
    
    all_preds_raw = np.concatenate(all_preds_raw).ravel()
    all_labels = np.concatenate(all_labels).ravel()
    # all_knockout_ids = np.concatenate(all_knockout_ids).ravel()
    log_metrics(all_preds_raw, all_labels, all_knockout_ids, epoch, "test")
    return running_loss/step

def build_model(model_config, output_size):
    print(f"creating model {model_config=}")
    # Set the number of outputs we expect
    model_config['output_size'] = output_size
    params = model_config

    if 'mlflow' in model_config:
        run_id = model_config['mlflow']['tags']['mlflow.parentRunId']
        mlflow.set_tag("mlflow.parentRunId", run_id)
    # Logging params
    for key in params.keys():
        mlflow.log_param(key, params[key])
  
    # Loading the model
    print("Loading model...")
    model = GATModel(model_config=params)
    model = model.to(device)

    return model

@mlflow_mixin
def run_one_training(model_config, train_samples, test_samples, checkpoint_dir):
    # Build the model
    model = build_model(model_config, train_samples[0].y.shape[0])

    # Preparing training
    train_loader = GeoDataLoader(train_samples, batch_size=model_config['batch_size'])
    test_loader = GeoDataLoader(test_samples, batch_size=1)

    # < 1 increases precision, > 1 recall
    # loss_fn = torch.nn.MSELoss(reduction='none')
    loss_fn = torch.nn.MSELoss()
    # we need to keep the lr quite low since otherwise the weights explode
    optimizer = torch.optim.SGD(
        model.parameters(), 
        lr=model_config['learning_rate'],
        momentum=model_config['sgd_momentum'],
        # weight_decay=5e-4
    )
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=model_config['scheduler_gamma'])

    # Use this to debug the train and test function
    debug = False
    
    # Start training
    best_loss = 1000
    early_stopping_counter = 0
    max_epochs = 300
    for epoch in tqdm(range(max_epochs)): 
        if early_stopping_counter <= 25: # = x * 5 
            # Training
            model.train()
            loss = train_one_epoch(epoch, model, train_loader, optimizer, loss_fn, debug=debug)
            # print(f"Epoch {epoch} | Train Loss {loss}")
            mlflow.log_metric(key="Train loss", value=float(loss), step=epoch)
            
            # Testing
            model.eval()
            if epoch % 5 == 0 or epoch == max_epochs - 1:
                loss = test(epoch, model, test_loader, loss_fn, debug=debug)
                # print(f"Epoch {epoch} | Test Loss {loss}")
                mlflow.log_metric(key="Test loss", value=float(loss), step=epoch)
                
                # Update best loss
                if float(loss) < best_loss:
                    best_loss = loss
                    # Save the currently best model 
                    # mlflow.pytorch.log_model(model, "model", signature=SIGNATURE)
                    early_stopping_counter = 0
                else:
                    early_stopping_counter += 1

            scheduler.step()
            mlflow.log_metric(key="Learning rate", value=float(scheduler.get_last_lr()[0]), step=epoch)
            
        else:
            print("Early stopping due to no improvement.")
            session.report({
                "loss": best_loss
            })
            return {"loss": best_loss}
    print(f"Finishing training with best test loss: {best_loss}")

    with torch.no_grad():
        sample = train_samples[0].to(device)
        model.forward(sample.x_dict, sample.edge_index_dict)
        print(f"Number of parameters: {count_parameters(model)}")

    session.report({
        "loss": best_loss
    })

    mlflow.end_run()

    return {"loss": best_loss}

In [7]:
HYPERPARAMETERS = {
    "batch_size": tune.choice([2, 4, 8]),
    "learning_rate": tune.choice([0.1, 0.05, 0.01, 0.001]),
    "sgd_momentum": tune.choice([0.9, 0.8, 0.5]),
    "scheduler_gamma": tune.choice([0.995, 1]),
    "model_embedding_size": tune.choice([8, 16, 32, 64, 128]),
    "model_attention_heads": tune.choice([1, 2, 3, 4]),
    "model_layers": tune.choice([1, 3, 5, 7]),
}

In [None]:
edge_list_df_unfiltered, graph_fc_unfiltered, edge_index_unfiltered, embedding_unfiltered = prepare_data(
    './data/training/edge_list_unfiltered_protein_only.csv',
    PRECURSOR_METABOLITES_NO_TRANSFORM,
)
edge_list_df_strict, graph_fc_strict, edge_index_strict, embedding_strict = prepare_data(
    './data/training/edge_list_strict_protein_only.csv',
    PRECURSOR_METABOLITES,
)
edge_list_df_all, graph_fc_all, edge_index_all, embedding_all = prepare_data(
    './data/training/edge_list_all_protein_only.csv',
    METABOLITES,
)

In [41]:
def get_train_test_split(mode, metabolite_id, strategy):
    if mode == 'unfiltered':
        train_samples, test_samples = get_samples_graph(
            metabolite_id, strategy, PRECURSOR_METABOLITES_NO_TRANSFORM, graph_fc_unfiltered, edge_index_unfiltered, embedding_unfiltered
        )
    elif mode == 'strict':
        train_samples, test_samples = get_samples_graph(
            target_metabolite_id=metabolite_id, strategy=strategy, valid_metabolites=PRECURSOR_METABOLITES, graph_fc_df=graph_fc_strict, edge_index=edge_index_strict, node_embeddings=embedding_strict,
        )
    elif mode == 'all':
        train_samples, test_samples = get_samples_graph(
            metabolite_id, strategy, METABOLITES, graph_fc_all, edge_index_all, embedding_all
        )
    return train_samples, test_samples

In [None]:
mlflow.end_run()
train_samples, test_samples = get_train_test_split('unfiltered', 'pyr', Strategy.ALL)
print(test_samples[0])

model_config = {
    "batch_size": 6,
    "learning_rate": 0.001,
    "sgd_momentum": 0.8,
    "scheduler_gamma": 1,
    "model_embedding_size": 16,
    "model_attention_heads": 1,
    "model_layers": 1,
}

run_one_training(model_config, train_samples, test_samples, None)
mlflow.end_run()

# Training

In [62]:
mlflow.end_run()
for strategy in [Strategy.ALL, Strategy.ONE_VS_ALL, Strategy.METABOLITE_CENTRIC]:
    for mode, graph_fc in [('unfiltered', graph_fc_unfiltered), ('strict', graph_fc_strict), ('all', graph_fc_all)]:
    # for mode, graph_fc in [('unfiltered', graph_fc_unfiltered)]:
        run_name = "model_gat_node_embeddings"
        experiment_name = f'protein_only_sweep_{mode}'
        mlflow.set_experiment(experiment_name)
        with mlflow.start_run(run_name=run_name) as run:
            for metabolite_id in list(set(graph_fc.columns.difference(ENZYMES).to_list()) & set(PRECURSOR_METABOLITES)):
                print(f"training {metabolite_id=}")
                train_samples, test_samples = get_train_test_split(mode, metabolite_id, strategy)
                print(len(train_samples))

                with mlflow.start_run(run_name=f"model_{metabolite_id}_{strategy}", nested=True):
                    result = tune_metabolite_hyper_parameters(experiment_name, HYPERPARAMETERS, train_samples, test_samples, run_fc=run_one_training, num_samples=16)
                    print(result)

== Status ==
Current time: 2022-12-19 12:24:24 (running for 00:00:22.32)
Memory usage on this node: 8.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=3
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: -0.5825647979974746 | Iter 10.000: -0.5837701894342899
Resources requested: 13.0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-24-01
Number of trials: 16/16 (13 RUNNING, 3 TERMINATED)
+------------------------------+------------+---------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+------------+----------------------+
| Trial name                   | status     | loc                 |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |       loss |   training_iteration |
|            

2022-12-19 12:26:58,707	INFO tune.py:762 -- Total run time: 176.79 seconds (176.66 seconds for the tuning loop).


== Status ==
Current time: 2022-12-19 12:26:58 (running for 00:02:56.67)
Memory usage on this node: 4.4/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5240097127854824 | Iter 80.000: -0.5704203397035599 | Iter 40.000: -0.5757669743150473 | Iter 20.000: -0.5778531371382996 | Iter 10.000: -0.5823181420564651
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-24-01
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+---------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                 |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   traini

Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_0d7a4_00000,2022-12-19_12-27-37,True,,d8988e71b9a746d6b64fa36d99e66fcf,"0_batch_size=4,learning_rate=0.0500,model_attention_heads=1,model_embedding_size=64,model_layers=1,scheduler_gamma=1,sgd_momentum=0.8000",VGI-DEKSPTOP-TIJMEn,193,0.817341,172.28.206.68,17390,34.9025,0.00335383,34.9025,1671449257,0,,193,0d7a4_00000,0.153913
run_one_training_0d7a4_00001,2022-12-19_12-27-16,True,,e2fdbd63d694463289c54b71bb7a653b,,VGI-DEKSPTOP-TIJMEn,10,0.627226,172.28.206.68,17690,9.71644,0.666147,9.71644,1671449236,0,,10,0d7a4_00001,0.304735
run_one_training_0d7a4_00002,2022-12-19_12-27-27,True,,639d947b97d34be1b37998ad6e3810b2,,VGI-DEKSPTOP-TIJMEn,20,0.613046,172.28.206.68,17692,20.0537,0.734643,20.0537,1671449247,0,,20,0d7a4_00002,0.239057
run_one_training_0d7a4_00003,2022-12-19_12-27-17,True,,6f37bce0ec4349519fdbb4c9b4c2a567,,VGI-DEKSPTOP-TIJMEn,10,0.701942,172.28.206.68,17700,10.5813,0.655334,10.5813,1671449237,0,,10,0d7a4_00003,0.242506
run_one_training_0d7a4_00004,2022-12-19_12-27-17,True,,10a6e400bf0e483b95a7c1234ca05bb8,,VGI-DEKSPTOP-TIJMEn,20,0.597815,172.28.206.68,17706,11.006,0.315164,11.006,1671449237,0,,20,0d7a4_00004,0.296244
run_one_training_0d7a4_00005,2022-12-19_12-27-11,True,,4f1ddd0ef42e4671a64c4acfb7b417e1,,VGI-DEKSPTOP-TIJMEn,10,0.632737,172.28.206.68,17711,4.50424,0.334767,4.50424,1671449231,0,,10,0d7a4_00005,0.261331
run_one_training_0d7a4_00006,2022-12-19_12-27-45,True,,c7b84a37621a4a2aa2c75cfc465de6e1,"6_batch_size=8,learning_rate=0.0500,model_attention_heads=2,model_embedding_size=16,model_layers=3,scheduler_gamma=0.9950,sgd_momentum=0.5000",VGI-DEKSPTOP-TIJMEn,218,0.820624,172.28.206.68,17717,38.6373,0.00316095,38.6373,1671449265,0,,218,0d7a4_00006,0.273391
run_one_training_0d7a4_00007,2022-12-19_12-27-19,True,,1726fb575f8a45998147799d6a74be2d,,VGI-DEKSPTOP-TIJMEn,20,0.612555,172.28.206.68,17723,12.9778,0.398597,12.9778,1671449239,0,,20,0d7a4_00007,0.292318
run_one_training_0d7a4_00008,2022-12-19_12-27-14,True,,c51ab879aa2b45c1b3923784cc5473d6,,VGI-DEKSPTOP-TIJMEn,10,0.663759,172.28.206.68,17729,7.14359,0.587588,7.14359,1671449234,0,,10,0d7a4_00008,0.288217
run_one_training_0d7a4_00009,2022-12-19_12-27-14,True,,81c9da2244e84f41a3c07bb0f471eb1f,,VGI-DEKSPTOP-TIJMEn,10,0.617465,172.28.206.68,17736,7.47614,0.530666,7.47614,1671449234,0,,10,0d7a4_00009,0.293528


== Status ==
Current time: 2022-12-19 12:27:05 (running for 00:00:06.11)
Memory usage on this node: 7.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: -0.6154014126342886
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-26-59
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+---------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                 |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |       

2022-12-19 12:27:45,674	INFO tune.py:762 -- Total run time: 46.04 seconds (45.90 seconds for the tuning loop).


== Status ==
Current time: 2022-12-19 12:27:45 (running for 00:00:45.91)
Memory usage on this node: 4.5/23.5 GiB 
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 160.000: -0.6011713016169523 | Iter 80.000: -0.5856408062908385 | Iter 40.000: -0.5859474076165093 | Iter 20.000: -0.6051852135779032 | Iter 10.000: -0.6219425806227852
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-26-59
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+---------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                 |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   traini

Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_297b7_00000,2022-12-19_12-27-58,True,,31a18e3b01b8451a8242fc2a3630602a,,VGI-DEKSPTOP-TIJMEn,10,0.62658,172.28.206.68,22226,9.39746,0.683231,9.39746,1671449278,0,,10,297b7_00000,0.166224
run_one_training_297b7_00001,2022-12-19_12-28-29,True,,16388190c7874587a536080a759d3298,,VGI-DEKSPTOP-TIJMEn,20,0.609173,172.28.206.68,22532,35.2039,1.38669,35.2039,1671449309,0,,20,297b7_00001,0.243109
run_one_training_297b7_00002,2022-12-19_12-28-34,True,,136127b357484dc7931a378d35a4dc78,"2_batch_size=4,learning_rate=0.0500,model_attention_heads=1,model_embedding_size=64,model_layers=1,scheduler_gamma=0.9950,sgd_momentum=0.8000",VGI-DEKSPTOP-TIJMEn,213,0.852465,172.28.206.68,22537,40.0317,0.00357914,40.0317,1671449314,0,,213,297b7_00002,0.288286
run_one_training_297b7_00003,2022-12-19_12-28-16,True,,e8eb88de17d24057a924f1cb44f1bc06,,VGI-DEKSPTOP-TIJMEn,80,0.560324,172.28.206.68,22542,22.6216,0.15565,22.6216,1671449296,0,,80,297b7_00003,0.254247
run_one_training_297b7_00004,2022-12-19_12-29-07,True,,e2a3fe39747448b58482ab968fbcc029,,VGI-DEKSPTOP-TIJMEn,40,0.588972,172.28.206.68,22547,73.7804,1.4198,73.7804,1671449347,0,,40,297b7_00004,0.277271
run_one_training_297b7_00005,2022-12-19_12-28-15,True,,5d07f835676f4d12a08004177c17ab12,,VGI-DEKSPTOP-TIJMEn,20,0.588073,172.28.206.68,22553,21.0463,0.866687,21.0463,1671449295,0,,20,297b7_00005,0.242761
run_one_training_297b7_00006,2022-12-19_12-28-31,True,,eaddfdf4d7594cd6b55ccd0c706a1acc,,VGI-DEKSPTOP-TIJMEn,80,0.585484,172.28.206.68,22559,37.6351,0.273005,37.6351,1671449311,0,,80,297b7_00006,0.297572
run_one_training_297b7_00007,2022-12-19_12-29-11,True,,6ff2ec7927354d8daa76684f84064978,"7_batch_size=8,learning_rate=0.0500,model_attention_heads=1,model_embedding_size=64,model_layers=7,scheduler_gamma=0.9950,sgd_momentum=0.5000",VGI-DEKSPTOP-TIJMEn,193,0.871682,172.28.206.68,22565,77.6057,0.00300527,77.6057,1671449351,0,,193,297b7_00007,0.238097
run_one_training_297b7_00008,2022-12-19_12-28-00,True,,91ec3e3e983843f39e5d6f6b4bd5ef46,,VGI-DEKSPTOP-TIJMEn,10,0.648105,172.28.206.68,22571,6.09859,0.502871,6.09859,1671449280,0,,10,297b7_00008,0.239015
run_one_training_297b7_00009,2022-12-19_12-28-03,True,,2d002db025a34572b4be843283b96ddb,,VGI-DEKSPTOP-TIJMEn,20,0.609173,172.28.206.68,22577,9.96721,0.377452,9.96721,1671449283,0,,20,297b7_00009,0.277795


== Status ==
Current time: 2022-12-19 12:27:52 (running for 00:00:06.04)
Memory usage on this node: 7.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-27-46
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+---------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                 |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                     |

2022-12-19 12:29:11,704	INFO tune.py:762 -- Total run time: 85.08 seconds (84.95 seconds for the tuning loop).


== Status ==
Current time: 2022-12-19 12:29:11 (running for 00:01:24.96)
Memory usage on this node: 4.5/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.5734469382201923 | Iter 80.000: -0.5597757626738813 | Iter 40.000: -0.587374754688319 | Iter 20.000: -0.5880239215843818 | Iter 10.000: -0.6093169387210817
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-27-46
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+---------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                 |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   trainin

Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_5cc29_00000,2022-12-19_12-29-25,True,,7f9b319fd5c54a079cabf28dd10f16f4,VGI-DEKSPTOP-TIJMEn,10,0.566553,172.28.206.68,27374,9.61556,0.843606,9.61556,1671449365,0,,10,5cc29_00000,0.154003
run_one_training_5cc29_00001,2022-12-19_12-29-35,True,,f141694daf7b4973b1bc516cfd69dc14,VGI-DEKSPTOP-TIJMEn,10,0.517179,172.28.206.68,27633,15.7858,1.35724,15.7858,1671449375,0,,10,5cc29_00001,0.287603
run_one_training_5cc29_00002,2022-12-19_12-29-56,True,,109408498a9f4cf2a096daa66a8a0817,VGI-DEKSPTOP-TIJMEn,133,0.0139628,172.28.206.68,27638,36.378,0.00456762,36.378,1671449396,0,,133,5cc29_00002,0.300736
run_one_training_5cc29_00003,2022-12-19_12-29-37,True,,3905f3dd5ffa4d499beed2c50ca49016,VGI-DEKSPTOP-TIJMEn,20,0.50634,172.28.206.68,27643,17.5806,0.756449,17.5806,1671449377,0,,20,5cc29_00003,0.235312
run_one_training_5cc29_00004,2022-12-19_12-29-48,True,,2462a537ffdc4ccaa2e7f9af23f8745c,VGI-DEKSPTOP-TIJMEn,20,0.50172,172.28.206.68,27648,28.2108,1.13216,28.2108,1671449388,0,,20,5cc29_00004,0.289476
run_one_training_5cc29_00005,2022-12-19_12-29-26,True,,983fa5df48e14e07b11eac55831839c5,VGI-DEKSPTOP-TIJMEn,10,0.530689,172.28.206.68,27654,6.89271,0.628673,6.89271,1671449366,0,,10,5cc29_00005,0.283655
run_one_training_5cc29_00006,2022-12-19_12-29-55,True,,dff1237a6ac14ca0960ef1c165aeb896,VGI-DEKSPTOP-TIJMEn,20,0.500722,172.28.206.68,27660,35.6217,1.16939,35.6217,1671449395,0,,20,5cc29_00006,0.292382
run_one_training_5cc29_00007,2022-12-19_12-29-33,True,,35625e74c3c94c1b935326c2af6fa0e4,VGI-DEKSPTOP-TIJMEn,20,0.500123,172.28.206.68,27666,13.2531,0.590366,13.2531,1671449373,0,,20,5cc29_00007,0.283807
run_one_training_5cc29_00008,2022-12-19_12-32-23,True,,81b9c30f6bb84fc7af75c83e4cc7f8b9,VGI-DEKSPTOP-TIJMEn,300,0.477061,172.28.206.68,27672,183.77,0.504362,183.77,1671449543,0,,300,5cc29_00008,0.325775
run_one_training_5cc29_00009,2022-12-19_12-29-47,True,,35a610a8a4744bedb2a1946826955174,VGI-DEKSPTOP-TIJMEn,10,0.511678,172.28.206.68,27678,28.0529,2.22503,28.0529,1671449387,0,,10,5cc29_00009,0.225358


== Status ==
Current time: 2022-12-19 12:29:18 (running for 00:00:06.32)
Memory usage on this node: 7.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-29-12
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+---------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                 |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                     |

2022-12-19 12:32:23,898	INFO tune.py:762 -- Total run time: 191.25 seconds (191.11 seconds for the tuning loop).


== Status ==
Current time: 2022-12-19 12:32:23 (running for 00:03:11.12)
Memory usage on this node: 4.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.47878084735323984 | Iter 80.000: -0.4798449774583181 | Iter 40.000: -0.4824652001261711 | Iter 20.000: -0.5001228889045508 | Iter 10.000: -0.5082875951503714
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-29-12
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+---------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+-----------+----------------------+
| Trial name                   | status     | loc                 |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |      loss |   tra

Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_cf502_00000,2022-12-19_12-32-53,True,,10c6dc4164f14a40afb24aeecd251826,VGI-DEKSPTOP-TIJMEn,80,0.479647,172.28.206.68,32168,25.5444,0.164333,25.5444,1671449573,0,,80,cf502_00000,0.163041
run_one_training_cf502_00001,2022-12-19_12-32-39,True,,053fda87ee544001b12f4ab42399a8ec,VGI-DEKSPTOP-TIJMEn,10,0.481452,172.28.206.68,32488,6.50208,0.572615,6.50208,1671449559,0,,10,cf502_00001,0.30966
run_one_training_cf502_00002,2022-12-19_12-32-50,True,,0d83076b2d6541c6ba6d7f7bcbcd133c,VGI-DEKSPTOP-TIJMEn,40,0.465814,172.28.206.68,32493,18.0611,0.285205,18.0611,1671449570,0,,40,cf502_00002,0.342487
run_one_training_cf502_00003,2022-12-19_12-32-53,True,,96bbd01a43a546eb959bb94231bfa1b9,VGI-DEKSPTOP-TIJMEn,10,0.497251,172.28.206.68,32498,20.9921,1.29677,20.9921,1671449573,0,,10,cf502_00003,0.342194
run_one_training_cf502_00004,2022-12-19_12-33-08,True,,bf930a97502242b9bb7fd13737df1b58,VGI-DEKSPTOP-TIJMEn,213,1.14108,172.28.206.68,32503,35.364,0.00305104,35.364,1671449588,0,,213,cf502_00004,0.313345
run_one_training_cf502_00005,2022-12-19_12-32-50,True,,f038bb6126f04c52afddc4435f4635f7,VGI-DEKSPTOP-TIJMEn,10,0.482936,172.28.206.68,32509,17.4073,1.16264,17.4073,1671449570,0,,10,cf502_00005,0.321103
run_one_training_cf502_00006,2022-12-19_12-32-47,True,,5945658449104ccc9897f1c4eb1dd5a8,VGI-DEKSPTOP-TIJMEn,10,0.494395,172.28.206.68,32515,14.6445,1.12584,14.6445,1671449567,0,,10,cf502_00006,0.283246
run_one_training_cf502_00007,2022-12-19_12-32-48,True,,92d4c07af24841bebb98864ff4b7de2b,VGI-DEKSPTOP-TIJMEn,20,0.471288,172.28.206.68,32521,15.2877,0.526175,15.2877,1671449568,0,,20,cf502_00007,0.294704
run_one_training_cf502_00008,2022-12-19_12-32-44,True,,c444810064de46ccb536a6e7164532e7,VGI-DEKSPTOP-TIJMEn,10,0.483304,172.28.206.68,32527,11.8594,0.98577,11.8594,1671449564,0,,10,cf502_00008,0.288084
run_one_training_cf502_00009,2022-12-19_12-32-40,True,,57fa5d6341fa49de9074fff860256ac1,VGI-DEKSPTOP-TIJMEn,10,0.536231,172.28.206.68,32534,7.3238,0.610544,7.3238,1671449560,0,,10,cf502_00009,0.292371


== Status ==
Current time: 2022-12-19 12:32:30 (running for 00:00:06.14)
Memory usage on this node: 7.5/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-32-24
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+---------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                 |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                     |

2022-12-19 12:33:10,191	INFO tune.py:762 -- Total run time: 45.35 seconds (45.21 seconds for the tuning loop).


== Status ==
Current time: 2022-12-19 12:33:10 (running for 00:00:45.22)
Memory usage on this node: 4.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 160.000: -0.46304002652565635 | Iter 80.000: -0.46543177713950473 | Iter 40.000: -0.4654076776156823 | Iter 20.000: -0.46824745647609234 | Iter 10.000: -0.4831202945307545
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-32-24
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+---------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                 |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   tra

Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_eaeeb_00000,2022-12-19_12-34-13,True,,4eb6afdb30b144f1b8e9789f36be3c73,"0_batch_size=8,learning_rate=0.0500,model_attention_heads=1,model_embedding_size=64,model_layers=5,scheduler_gamma=0.9950,sgd_momentum=0.8000",VGI-DEKSPTOP-TIJMEn,158,0.858497,172.28.206.68,4575,59.438,0.00285316,59.438,1671449653,0,,158,eaeeb_00000,0.162523
run_one_training_eaeeb_00001,2022-12-19_12-33-32,True,,86f2cd7552c149f296bbe6682b319520,,VGI-DEKSPTOP-TIJMEn,10,0.553095,172.28.206.68,4852,13.3939,1.05209,13.3939,1671449612,0,,10,eaeeb_00001,0.308153
run_one_training_eaeeb_00002,2022-12-19_12-33-35,True,,d3edc7abcc3a4680858922e60e97fe80,,VGI-DEKSPTOP-TIJMEn,20,0.497806,172.28.206.68,4857,16.9154,0.620043,16.9154,1671449615,0,,20,eaeeb_00002,0.352442
run_one_training_eaeeb_00003,2022-12-19_12-33-42,True,,ffb22cb2631a4803b7837357398aef26,,VGI-DEKSPTOP-TIJMEn,20,0.507067,172.28.206.68,4862,23.3685,0.825581,23.3685,1671449622,0,,20,eaeeb_00003,0.330716
run_one_training_eaeeb_00004,2022-12-19_12-33-24,True,,1e86630793f84b5e82762bd3a898a970,,VGI-DEKSPTOP-TIJMEn,10,0.519278,172.28.206.68,4867,5.62471,0.489828,5.62471,1671449604,0,,10,eaeeb_00004,0.334509
run_one_training_eaeeb_00005,2022-12-19_12-33-42,True,,745bbf61caed4f94a378bb1a3eb802d3,,VGI-DEKSPTOP-TIJMEn,20,0.503636,172.28.206.68,4874,23.8683,0.849319,23.8683,1671449622,0,,20,eaeeb_00005,0.309614
run_one_training_eaeeb_00006,2022-12-19_12-33-29,True,,f5e738304a044b2884e837ecb7d2d32d,,VGI-DEKSPTOP-TIJMEn,10,0.518155,172.28.206.68,4879,10.4788,0.832112,10.4788,1671449609,0,,10,eaeeb_00006,0.369204
run_one_training_eaeeb_00007,2022-12-19_12-33-27,True,,40fb9e001d614f77a6cb0acfcc581c49,,VGI-DEKSPTOP-TIJMEn,10,0.513108,172.28.206.68,4885,8.86368,0.771777,8.86368,1671449607,0,,10,eaeeb_00007,0.327102
run_one_training_eaeeb_00008,2022-12-19_12-33-43,True,,70306670f1a24ebd82cbfe7c3eece38d,,VGI-DEKSPTOP-TIJMEn,10,0.536785,172.28.206.68,4891,24.2651,1.79293,24.2651,1671449623,0,,10,eaeeb_00008,0.302894
run_one_training_eaeeb_00009,2022-12-19_12-33-41,True,,1b317c51112846ce9a23ab1379dbd179,,VGI-DEKSPTOP-TIJMEn,10,0.515704,172.28.206.68,4898,22.9145,1.75769,22.9145,1671449621,0,,10,eaeeb_00009,0.364948


== Status ==
Current time: 2022-12-19 12:33:17 (running for 00:00:06.51)
Memory usage on this node: 8.1/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-33-11
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                    |   

2022-12-19 12:34:13,595	INFO tune.py:762 -- Total run time: 62.42 seconds (62.28 seconds for the tuning loop).
2022/12/19 12:34:13 INFO mlflow.tracking.fluent: Experiment with name 'protein_only_sweep_strict' does not exist. Creating a new experiment.


== Status ==
Current time: 2022-12-19 12:34:13 (running for 00:01:02.29)
Memory usage on this node: 4.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 160.000: -0.4809261653572321 | Iter 80.000: -0.480314907617867 | Iter 40.000: -0.48079287726432085 | Iter 20.000: -0.48942850281794864 | Iter 10.000: -0.5101708937940246
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-33-11
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   trainin

Trial name,date,done,episodes_total,experiment_id,experiment_tag,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_10dd7_00000,2022-12-19_12-34-31,True,,c0badc2e3ada44e79e7dd4be4845dfc5,,VGI-DEKSPTOP-TIJMEn,10,0.622587,172.28.206.68,9416,14.1874,1.21137,14.1874,1671449671,0,,10,10dd7_00000,0.158516
run_one_training_10dd7_00001,2022-12-19_12-34-51,True,,cdcb8a58117c4ff9bd0bf1dde25fbca0,"1_batch_size=4,learning_rate=0.0500,model_attention_heads=4,model_embedding_size=8,model_layers=1,scheduler_gamma=1,sgd_momentum=0.5000",VGI-DEKSPTOP-TIJMEn,133,1.19424,172.28.206.68,9653,29.2732,0.00363827,29.2732,1671449691,0,,133,10dd7_00001,0.401754
run_one_training_10dd7_00002,2022-12-19_12-36-57,True,,6572cf503fab45e29e8b141c1f7cb19d,"2_batch_size=2,learning_rate=0.0010,model_attention_heads=4,model_embedding_size=128,model_layers=3,scheduler_gamma=0.9950,sgd_momentum=0.8000",VGI-DEKSPTOP-TIJMEn,133,1.18966,172.28.206.68,9658,155.128,0.00317574,155.128,1671449817,0,,133,10dd7_00002,0.404767
run_one_training_10dd7_00003,2022-12-19_12-34-32,True,,5cc5db6782674fb4b041f830053c6eb1,,VGI-DEKSPTOP-TIJMEn,10,0.623502,172.28.206.68,9663,9.05734,0.723634,9.05734,1671449672,0,,10,10dd7_00003,0.320577
run_one_training_10dd7_00004,2022-12-19_12-35-58,True,,b25a70fec2fb4a48a9712450e703285f,,VGI-DEKSPTOP-TIJMEn,300,0.578876,172.28.206.68,9668,95.8749,0.217566,95.8749,1671449758,0,,300,10dd7_00004,0.302885
run_one_training_10dd7_00005,2022-12-19_12-34-39,True,,6279c1f069f441b395dd33434ff7f5b2,,VGI-DEKSPTOP-TIJMEn,40,0.592028,172.28.206.68,9674,16.6454,0.304926,16.6454,1671449679,0,,40,10dd7_00005,0.369119
run_one_training_10dd7_00006,2022-12-19_12-34-34,True,,5d22cde1740948479dfc17afbce4dc37,,VGI-DEKSPTOP-TIJMEn,10,0.610022,172.28.206.68,9680,11.6412,0.9732,11.6412,1671449674,0,,10,10dd7_00006,0.311868
run_one_training_10dd7_00007,2022-12-19_12-34-31,True,,cea1d879840e480daa9cea5f35b919b6,,VGI-DEKSPTOP-TIJMEn,20,0.596278,172.28.206.68,9686,8.96544,0.371995,8.96544,1671449671,0,,20,10dd7_00007,0.294067
run_one_training_10dd7_00008,2022-12-19_12-34-28,True,,70a2d011f68a4b75a286a469a77eb1db,,VGI-DEKSPTOP-TIJMEn,10,0.59894,172.28.206.68,9692,5.58676,0.527426,5.58676,1671449668,0,,10,10dd7_00008,0.326475
run_one_training_10dd7_00009,2022-12-19_12-35-56,True,,2af82271045c4432a030554880cf3d04,"9_batch_size=4,learning_rate=0.0100,model_attention_heads=2,model_embedding_size=128,model_layers=3,scheduler_gamma=0.9950,sgd_momentum=0.8000",VGI-DEKSPTOP-TIJMEn,133,1.19386,172.28.206.68,9698,93.8422,0.00411439,93.8422,1671449756,0,,133,10dd7_00009,0.312883


== Status ==
Current time: 2022-12-19 12:34:21 (running for 00:00:06.46)
Memory usage on this node: 7.8/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-34-14
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                    |   

2022-12-19 12:37:15,908	INFO tune.py:762 -- Total run time: 181.09 seconds (180.96 seconds for the tuning loop).


== Status ==
Current time: 2022-12-19 12:37:15 (running for 00:03:00.96)
Memory usage on this node: 4.6/23.5 GiB 
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 160.000: -0.5805948767246623 | Iter 80.000: -0.5778572028785041 | Iter 40.000: -0.5832723571973688 | Iter 20.000: -0.5874351333169376 | Iter 10.000: -0.5947970283382079
Resources requested: 0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-34-14
Number of trials: 16/16 (16 TERMINATED)
+------------------------------+------------+--------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status     | loc                |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_

Trial name,date,done,episodes_total,experiment_id,hostname,iterations_since_restore,loss,node_ip,pid,time_since_restore,time_this_iter_s,time_total_s,timestamp,timesteps_since_restore,timesteps_total,training_iteration,trial_id,warmup_time
run_one_training_7d84e_00000,2022-12-19_12-40-06,False,,b8f6676706bd4215bede80b55fa334a5,VGI-DEKSPTOP-TIJMEn,137,0.653752,172.28.206.68,14269,166.746,0.9703,166.746,1671450006,0,,137,7d84e_00000,0.16563
run_one_training_7d84e_00001,2022-12-19_12-37-34,True,,c23a4ceb1e8a4d0cabc99eb3c93c05af,VGI-DEKSPTOP-TIJMEn,10,0.686499,172.28.206.68,14544,10.1131,0.929,10.1131,1671449854,0,,10,7d84e_00001,0.275516
run_one_training_7d84e_00002,2022-12-19_12-39-21,True,,659d06a689eb4be8beaf85f280d2913f,VGI-DEKSPTOP-TIJMEn,160,0.673702,172.28.206.68,14549,116.473,0.557829,116.473,1671449961,0,,160,7d84e_00002,0.338809
run_one_training_7d84e_00003,2022-12-19_12-40-06,False,,031a20c0a5d24c92b7f146e47ef84ac8,VGI-DEKSPTOP-TIJMEn,69,0.667776,172.28.206.68,14554,161.652,1.65498,161.652,1671450006,0,,69,7d84e_00003,0.331214
run_one_training_7d84e_00004,2022-12-19_12-38-35,True,,bc2af0c262dc406b8a19b64d30a6ea75,VGI-DEKSPTOP-TIJMEn,20,0.681503,172.28.206.68,14559,70.7658,3.00928,70.7658,1671449915,0,,20,7d84e_00004,0.287825
run_one_training_7d84e_00005,2022-12-19_12-37-41,True,,288d8b8f719f4a1c8fe78a8be99a50d4,VGI-DEKSPTOP-TIJMEn,10,0.689623,172.28.206.68,14565,16.8292,1.45389,16.8292,1671449861,0,,10,7d84e_00005,0.351484
run_one_training_7d84e_00006,2022-12-19_12-38-21,True,,ac0ab0e9e75d4e22986eb35a75b27ef1,VGI-DEKSPTOP-TIJMEn,40,0.683078,172.28.206.68,14570,57.2124,1.29161,57.2124,1671449901,0,,40,7d84e_00006,0.31913
run_one_training_7d84e_00007,2022-12-19_12-38-58,True,,7344aa9e6dd244e39659aad4fcee4761,VGI-DEKSPTOP-TIJMEn,80,0.675004,172.28.206.68,14577,93.542,0.956363,93.542,1671449938,0,,80,7d84e_00007,0.31839
run_one_training_7d84e_00008,2022-12-19_12-40-04,False,,907d369b7d6646db923a3e4866add840,VGI-DEKSPTOP-TIJMEn,172,0.666608,172.28.206.68,14583,159.168,0.774611,159.168,1671450004,0,,172,7d84e_00008,0.322461
run_one_training_7d84e_00009,2022-12-19_12-37-35,True,,f6258b905cbf42498308f951a4d51034,VGI-DEKSPTOP-TIJMEn,10,0.720129,172.28.206.68,14589,10.2674,0.908359,10.2674,1671449855,0,,10,7d84e_00009,0.323949


== Status ==
Current time: 2022-12-19 12:37:23 (running for 00:00:06.61)
Memory usage on this node: 7.9/23.5 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 160.000: None | Iter 80.000: None | Iter 40.000: None | Iter 20.000: None | Iter 10.000: None
Resources requested: 16.0/16 CPUs, 0/0 GPUs, 0.0/12.27 GiB heap, 0.0/6.14 GiB objects
Result logdir: /home/tvangraft/ray_results/run_one_training_2022-12-19_12-37-17
Number of trials: 16/16 (16 RUNNING)
+------------------------------+----------+---------------------+--------------+-----------------+------------------------+------------------------+----------------+-------------------+----------------+----------+----------------------+
| Trial name                   | status   | loc                 |   batch_size |   learning_rate |   model_attention_head |   model_embedding_size |   model_layers |   scheduler_gamma |   sgd_momentum |     loss |   training_iteration |
|                              |          |                     |