In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch  
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune import ExperimentAnalysis


  from .autonotebook import tqdm as notebook_tqdm


# Neural Net Class and Training Functions
Define Class and functions

In [3]:
def data_loaders():
   # Import data
    dir_X = '/Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/ev_adoption_ml/Data/df_X_county.csv'
    dir_y = '/Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/ev_adoption_ml/Data/df_y_county.csv'
    
    X = pd.read_csv(dir_X)
    X['constant'] = 1
    y = pd.read_csv(dir_y)

    # check if any nan values
    nan_row_X = X[X.isna().any(axis=1)]
    #print(nan_row_X)
    nan_row_y = y[y.isna().any(axis=1)]
    #print(nan_row_y)

    X = X.to_numpy()
    y = y.to_numpy()

    # split train/test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    # standardize X
    scaler = StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

def train_model(nn_model, data_loaded, opt, batch_size=32):
    
    '''
    Trains neural network model on X_train, y_train data.
    
    Returns
    ----------
    nn_model: torch.nn.Module
        trained neural network model
    '''
    # convert to tensors (for Pytorch)
    X_train, X_eval, y_train, y_eval = data_loaded
    X_train_tensor = torch.tensor(X_train)
    y_train_tensor = torch.tensor(y_train)
    X_test_tensor = torch.tensor(X_eval)
    y_test_tensor = torch.tensor(y_eval)
    
    # train with (mini-batch) SGD; initialize optimizer
    n_samples, n_features = X_train_tensor.shape
    # initialize mse loss function
    mse_loss = torch.nn.MSELoss()
    nn_model.train()  # put model in train mode
    
    # loop through data in batches
    for batch_start in range(0, n_samples, batch_size):
    # reset gradients to zero
        opt.zero_grad()
        # form batch
        X_batch = X_train_tensor[batch_start:batch_start+batch_size]
        y_batch = y_train_tensor[batch_start:batch_start+batch_size]
        X_batch_test = X_test_tensor[batch_start:batch_start+batch_size]
        y_batch_test = y_test_tensor[batch_start:batch_start+batch_size]
        # pass batch through neural net to get prediction
        y_pred = nn_model(X_batch.float())
        y_pred = y_pred.unsqueeze(1)
        y_pred_test = nn_model(X_batch_test.float())
        y_pred_test = y_pred_test.unsqueeze(1)
        # compute MSE loss
        loss = mse_loss(y_pred, y_batch[:, None].float())
        loss_test = mse_loss(y_pred_test, y_batch_test[:, None].float())
        # back-propagate loss
        loss.backward()
        # update model parameters based on backpropogated gradients
        torch.nn.utils.clip_grad_value_(nn_model.parameters(), clip_value=1.5)
        opt.step()
        
        #print(f"Mean Train MSE: {epoch_loss}")
        
    return loss, loss_test

def evaluate_model(nn_model, X_eval, y_eval, batch_size=32):
    '''
    Evaluates trained neural network model on X_eval, y_eval data.

    Parameters
    ----------
    nn_model: torch.nn.Module
        trained neural network model
    X_eval: np.array
        matrix of training data features
    y_eval: np.array
        vector of training data labels
    batch_size: int
        batch size to looping over dataset to generate predictions

    Returns
    ----------
    mse: float
        MSE of trained model on X_eval, y_eval data
    '''
    # initialize mse loss function
    mse_loss = torch.nn.MSELoss()
    # convert to tensors (for Pytorch)
    X_eval_tensor = torch.tensor(X_eval)
    y_eval_tensor = torch.tensor(y_eval)
    n_samples = X_eval_tensor.shape[0]
    nn_model.eval() # put in eval mode
    # loop over data and generate predictions
    preds = []
    for batch_start in range(0, n_samples, batch_size):
        # form batch
        X_batch = X_eval_tensor[batch_start:batch_start+batch_size]
        y_batch = y_eval_tensor[batch_start:batch_start+batch_size]
        with torch.no_grad():  # no need to compute gradients during evaluation
            # pass batch through neural net to get prediction
            y_pred = nn_model(X_batch.float())
            y_pred = y_pred.unsqueeze(1)
            preds.append(y_pred)
    # compute MSE across all samples
    all_preds = torch.cat(preds)
    loss = mse_loss(all_preds, y_eval_tensor[:, None].float()).item()
    return loss

def train_and_validate(config):

    '''Parameters
    ----------
    X_train: np.array
        matrix of training data features
    y_train: np.array
        vector of training data labels
    max_iter: int
        maximum number of iterations to train for
    batch_size: int
        batch size to use when training w/ SGD
    '''
    # intialize neural network
    data_loaded = data_loaders()
    X_train, X_eval, y_train, y_eval = data_loaded
    print(y_eval)
    n_samples, n_features = X_train.shape
    nn_model = NN_configureable(n_features, config["n_hidden_dim"], config["n_layers"])

    #opt = torch.optim.SGD(nn_model.parameters(), lr=config["lr"],  momentum=0.9)
    opt = torch.optim.SGD(nn_model.parameters(), lr=config["lr"])
    batch_size = config["batch_size"]
    max_iter = config["train_iterations"]

    # Start the training.
    for it in range(max_iter):
        # save losses across all batches
        train_epoch_loss, test_epoch_loss = train_model(nn_model, data_loaded, opt, batch_size)
        valid_epoch_loss = evaluate_model(nn_model, X_eval, y_eval, batch_size)

    with tune.checkpoint_dir(it) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, 'checkpoint')
            torch.save((nn_model.state_dict(), opt.state_dict()), path)
    tune.report(
        loss=valid_epoch_loss)

class NN(nn.Module):
    '''
    Class for fully connected neural net.
    '''
    def __init__(self, input_dim, hidden_dim):
        '''
        Parameters
        ----------
        input_dim: int
            input dimension (i.e., # of features in each example passed to the network)
        hidden_dim: int
            number of nodes in hidden layer
        '''
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.layers = nn.Sequential(
            # Network has a single hidden layer
            # Apply ReLU activation in between the hidden layer and output node
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.layers(x)
        return x


class NN_configureable(nn.Module):
    '''
    Class for fully connected neural net.
    '''
    def __init__(self, input_dim, hidden_dim, hidden_layers):
        '''
        Parameters
        ----------
        input_dim: int
            input dimension (i.e., # of features in each example passed to the network)
        hidden_dim: int
            number of nodes in hidden layer
        '''
        super().__init__()
        self.input_dim = input_dim
        self.hidden_layers = hidden_layers
        #self.layers = nn.ModuleDict()
        self.layers = nn.ModuleDict()
        
        # Define input layer
        self.layers["input"] = nn.Linear(in_features = input_dim, out_features = hidden_dim)
        # Define hidden layers
        for i in range(self.hidden_layers):
            self.layers[f"hidden_{i}"] = nn.Linear(in_features = hidden_dim, out_features = hidden_dim)
        # Define output layer
        self.layers["output"] = nn.Linear(in_features = hidden_dim, out_features = 1)

    def forward(self, x):
        x = self.layers["input"](x)
        for i in range(self.hidden_layers):
            x = F.relu(self.layers[f"hidden_{i}"](x))

        return self.layers["output"](x)        
        

# Search Function for Ray Tune
Hyperparameter search



# Import Data
Import combined data

In [4]:
def main():   
    
    # Search Function for Ray Tune - Hyperparameter search
     
    #X = pd.read_csv('./Data/df_X_county.csv')
    #print(X.head)
     
    X_train, X_eval, y_train, y_eval = data_loaders()
    n_samples, n_features = X_train.shape

    # Define the parameter search configuration.
    config = {
        "n_layers": 
            #tune.sample_from(lambda _: 2 ** np.random.randint(1, 5)),
            tune.grid_search([1, 2, 3, 4]),
        "n_hidden_dim": 
            #tune.sample_from(lambda _: 2 ** np.random.randint(4, 8)),
            tune.grid_search([4, 8, n_features]),
        "lr": tune.loguniform(1e-4, 1e-4),
        "batch_size": tune.choice([32]),
        "train_iterations": tune.choice([50, 100])
    }

    max_num_iter = 50
    grace_period = 1
    # Number of Ray Tune random search experiments to run.
    num_samples = 20
    
    # Schduler to stop bad performing trails.
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t = max_num_iter,
        grace_period = grace_period,
        reduction_factor = 2 
    )

    # Reporter to show on command line/output window
    reporter = CLIReporter(
        metric_columns=["loss", "accuracy", "training_iteration"])


    # Start Ray Tune search
    result = tune.run(
        train_and_validate,
        resources_per_trial = {"cpu": 2, "gpu": 0},
        config = config,
        num_samples = num_samples,
        scheduler = scheduler,
        local_dir = '../outputs/raytune_result',
        keep_checkpoints_num = 1,
        checkpoint_score_attr = 'min-validation_loss',
        progress_reporter = reporter)

    # Extract the best trial run from the search.
    best_trial = result.get_best_trial('loss', 'min', 'last')
    print(f"Best trial config: {best_trial.config}")
    print(f"Best trial final validation loss: {best_trial.last_result['loss']}")
    
    #result.dataframe().csv("df_raytune_search.csv")

    '''
    plt.xlabel('Iteration Step')
    plt.ylabel('Test Error')
    plt.title("Model 1 - Hidden Layer - ReLU")
    plt.legend()
    plt.figure(figsize=(20,12))
    plt.show()
'''

In [5]:
if __name__ == '__main__':
    main()

2023-05-11 00:38:06,076	INFO worker.py:1625 -- Started a local Ray instance.
2023-05-11 00:38:06,730	INFO tune.py:218 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.


== Status ==
Current time: 2023-05-11 00:38:06 (running for 00:00:00.07)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Logical resource usage: 2.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 16/240 (15 PENDING, 1 RUNNING)
+--------------------------------+----------+-----------------+--------------+--------+----------------+------------+--------------------+
| Trial name                     | status   | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |
|--------------------------------+----------+-----------------+--------------+--------+----------------+------------+--------------------|
| train_and_validate_9fcb7_00000 | RUNNING  | 127.0.0.1:82137 |           32 | 0.0001 |              4 |          1 |    

Trial name,date,done,experiment_tag,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
train_and_validate_9fcb7_00000,2023-05-11_00-38-11,True,"0_batch_size=32,lr=0.0001,n_hidden_dim=4,n_layers=1,train_iterations=50",Dons-MacBook-Pro.local,1,871348.0,127.0.0.1,82137,True,3.66407,3.66407,3.66407,1683779891,1,9fcb7_00000
train_and_validate_9fcb7_00001,2023-05-11_00-38-13,True,"1_batch_size=32,lr=0.0001,n_hidden_dim=8,n_layers=1,train_iterations=50",Dons-MacBook-Pro.local,1,697074.0,127.0.0.1,82138,True,3.57208,3.57208,3.57208,1683779893,1,9fcb7_00001
train_and_validate_9fcb7_00002,2023-05-11_00-38-17,True,"2_batch_size=32,lr=0.0001,n_hidden_dim=13,n_layers=1,train_iterations=100",Dons-MacBook-Pro.local,1,624930.0,127.0.0.1,82139,True,7.49119,7.49119,7.49119,1683779897,1,9fcb7_00002
train_and_validate_9fcb7_00003,2023-05-11_00-38-14,True,"3_batch_size=32,lr=0.0001,n_hidden_dim=4,n_layers=2,train_iterations=50",Dons-MacBook-Pro.local,1,707307.0,127.0.0.1,82140,True,4.37159,4.37159,4.37159,1683779894,1,9fcb7_00003
train_and_validate_9fcb7_00004,2023-05-11_00-38-16,True,"4_batch_size=32,lr=0.0001,n_hidden_dim=8,n_layers=2,train_iterations=50",Dons-MacBook-Pro.local,1,655070.0,127.0.0.1,82137,True,4.39051,4.39051,4.39051,1683779896,1,9fcb7_00004
train_and_validate_9fcb7_00005,2023-05-11_00-38-17,True,"5_batch_size=32,lr=0.0001,n_hidden_dim=13,n_layers=2,train_iterations=50",Dons-MacBook-Pro.local,1,630660.0,127.0.0.1,82138,True,4.51876,4.51876,4.51876,1683779897,1,9fcb7_00005
train_and_validate_9fcb7_00006,2023-05-11_00-38-24,True,,Dons-MacBook-Pro.local,1,1242970.0,127.0.0.1,82140,True,10.2007,10.2007,10.2007,1683779904,1,9fcb7_00006
train_and_validate_9fcb7_00007,2023-05-11_00-38-26,True,"7_batch_size=32,lr=0.0001,n_hidden_dim=8,n_layers=3,train_iterations=100",Dons-MacBook-Pro.local,1,565736.0,127.0.0.1,82137,True,10.0594,10.0594,10.0594,1683779906,1,9fcb7_00007
train_and_validate_9fcb7_00008,2023-05-11_00-38-22,True,"8_batch_size=32,lr=0.0001,n_hidden_dim=13,n_layers=3,train_iterations=50",Dons-MacBook-Pro.local,1,613281.0,127.0.0.1,82139,True,5.31633,5.31633,5.31633,1683779902,1,9fcb7_00008
train_and_validate_9fcb7_00009,2023-05-11_00-38-23,True,"9_batch_size=32,lr=0.0001,n_hidden_dim=4,n_layers=4,train_iterations=50",Dons-MacBook-Pro.local,1,645518.0,127.0.0.1,82138,True,5.72635,5.72635,5.72635,1683779903,1,9fcb7_00009


2023-05-11 00:38:11,881	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-11 00:38:11 (running for 00:00:05.13)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -871347.6875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 20/240 (16 PENDING, 4 RUNNING)
+--------------------------------+----------+-----------------+--------------+--------+----------------+------------+--------------------+--------+----------------------+
| Trial name                     | status   | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |   loss |   training_iteration |
|--------------------------------+----------+-----------------+--------------+--------+----------------+------------+--------------------+--------+----------------------|
| train_and_valid

2023-05-11 00:38:13,445	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82138)[0m  [314]][32m [repeated 20x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m
[2m[36m(train_and_validate pid=82138)[0m  [ 23][32m [repeated 10x across cluster][0m
[2m[36m(train_and_validate pid=82138)[0m  ...[32m [repeated 5x across cluster][0m


2023-05-11 00:38:14,180	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:38:16,306	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:38:17 (running for 00:00:10.61)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -697074.0625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 24/240 (16 PENDING, 4 RUNNING, 4 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |   loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------+---------------------

2023-05-11 00:38:17,999	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:38:22,715	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:38:22 (running for 00:00:15.96)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -655070.1875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 26/240 (16 PENDING, 4 RUNNING, 6 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |   loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------+---------------------

2023-05-11 00:38:23,759	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:38:24,408	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:38:29 (running for 00:00:23.17)
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -645518.5
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 30/240 (16 PENDING, 4 RUNNING, 10 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------

2023-05-11 00:38:31,540	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:38:33,536	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:38:35 (running for 00:00:29.23)
Using AsyncHyperBand: num_stopped=3
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -645518.5
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 34/240 (16 PENDING, 4 RUNNING, 14 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------

2023-05-11 00:38:37,307	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:38:37,985	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:38:42 (running for 00:00:35.81)
Using AsyncHyperBand: num_stopped=4
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -641489.5
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 38/240 (16 PENDING, 4 RUNNING, 18 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------

2023-05-11 00:38:43,484	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:38:46,513	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:38:48 (running for 00:00:41.93)
Using AsyncHyperBand: num_stopped=5
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -636074.9375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 41/240 (16 PENDING, 4 RUNNING, 21 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+---------

2023-05-11 00:38:49,861	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:38:50,210	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:38:53 (running for 00:00:46.93)
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -641489.5
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 46/240 (15 PENDING, 4 RUNNING, 27 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------

2023-05-11 00:38:54,739	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82140)[0m  [314]][32m [repeated 24x across cluster][0m
[2m[36m(train_and_validate pid=82140)[0m  [ 23][32m [repeated 12x across cluster][0m
[2m[36m(train_and_validate pid=82140)[0m  ...[32m [repeated 6x across cluster][0m
== Status ==
Current time: 2023-05-11 00:38:59 (running for 00:00:53.04)
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -643504.0
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 48/240 (16 PENDING, 4 RUNNING, 28 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |  

2023-05-11 00:39:00,804	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82139)[0m  [314]][32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=82139)[0m  [ 23][32m [repeated 2x across cluster][0m
[2m[36m(train_and_validate pid=82139)[0m  [314]]


2023-05-11 00:39:01,597	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82138)[0m  ...


2023-05-11 00:39:04,120	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:39:05,385	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:39:05 (running for 00:00:58.63)
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -636074.9375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 51/240 (16 PENDING, 4 RUNNING, 31 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------

2023-05-11 00:39:06,463	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82139)[0m  [314]][32m [repeated 16x across cluster][0m
[2m[36m(train_and_validate pid=82139)[0m  [ 23][32m [repeated 8x across cluster][0m


2023-05-11 00:39:07,703	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82138)[0m  ...[32m [repeated 4x across cluster][0m


2023-05-11 00:39:10,270	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:39:11,487	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:39:11 (running for 00:01:04.73)
Using AsyncHyperBand: num_stopped=12
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -637308.90625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 55/240 (16 PENDING, 4 RUNNING, 35 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-------

2023-05-11 00:39:13,966	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82139)[0m  ...[32m [repeated 3x across cluster][0m


2023-05-11 00:39:14,065	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:39:18,238	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:39:18 (running for 00:01:11.48)
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -641489.5
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 58/240 (16 PENDING, 4 RUNNING, 38 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----------

2023-05-11 00:39:18,666	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:39:20,283	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=82138)[0m  ...[32m [repeated 4x across cluster][0m


2023-05-11 00:39:22,962	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:39:23,510	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:39:23 (running for 00:01:16.76)
Using AsyncHyperBand: num_stopped=16
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -641489.5
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 62/240 (16 PENDING, 4 RUNNING, 42 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----------

2023-05-11 00:39:23,925	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-11 00:39:29 (running for 00:01:22.30)
Using AsyncHyperBand: num_stopped=16
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640748.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 64/240 (16 PENDING, 4 RUNNING, 44 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+---------

2023-05-11 00:39:29,553	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82140)[0m  ...[32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=82140)[0m  [314]][32m [repeated 8x across cluster][0m
[2m[36m(train_and_validate pid=82140)[0m  [ 23][32m [repeated 4x across cluster][0m


2023-05-11 00:39:31,083	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:39:34,710	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:39:34 (running for 00:01:27.96)
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640007.75
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 66/240 (16 PENDING, 4 RUNNING, 46 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+----------

2023-05-11 00:39:35,075	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:39:36,397	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:39:40 (running for 00:01:34.03)
Using AsyncHyperBand: num_stopped=21
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -643098.9375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 71/240 (16 PENDING, 4 RUNNING, 51 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------

2023-05-11 00:39:43,886	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:39:45,863	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:39:45 (running for 00:01:39.11)
Using AsyncHyperBand: num_stopped=22
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -643098.9375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 73/240 (16 PENDING, 4 RUNNING, 53 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------

2023-05-11 00:39:45,978	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:39:48,657	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:39:51 (running for 00:01:45.12)
Using AsyncHyperBand: num_stopped=23
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640748.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 77/240 (16 PENDING, 4 RUNNING, 57 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+---------

2023-05-11 00:39:51,953	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82137)[0m  ...[32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=82137)[0m  [314]][32m [repeated 16x across cluster][0m
[2m[36m(train_and_validate pid=82137)[0m  [ 23][32m [repeated 8x across cluster][0m


2023-05-11 00:39:55,504	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:39:56,611	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:39:59 (running for 00:01:52.70)
Using AsyncHyperBand: num_stopped=25
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640748.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 81/240 (16 PENDING, 4 RUNNING, 61 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+---------

2023-05-11 00:40:00,090	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:40:01,207	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:40:05 (running for 00:01:58.48)
Using AsyncHyperBand: num_stopped=28
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -643098.9375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 85/240 (16 PENDING, 4 RUNNING, 65 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------

2023-05-11 00:40:06,623	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:40:08,466	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:40:11 (running for 00:02:04.42)
Using AsyncHyperBand: num_stopped=29
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -641489.5
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 88/240 (16 PENDING, 4 RUNNING, 68 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----------

2023-05-11 00:40:11,441	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:40:14,766	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:40:18 (running for 00:02:11.95)
Using AsyncHyperBand: num_stopped=31
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -641489.5
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 92/240 (16 PENDING, 4 RUNNING, 72 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----------

2023-05-11 00:40:19,457	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:40:20,926	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:40:24 (running for 00:02:17.26)
Using AsyncHyperBand: num_stopped=33
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640748.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 97/240 (16 PENDING, 4 RUNNING, 77 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+---------

2023-05-11 00:40:26,092	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:40:28,713	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:40:32 (running for 00:02:25.26)
Using AsyncHyperBand: num_stopped=34
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640007.75
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 100/240 (16 PENDING, 4 RUNNING, 80 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+---------

2023-05-11 00:40:32,740	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:40:35,851	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:40:39 (running for 00:02:32.47)
Using AsyncHyperBand: num_stopped=35
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640000.21875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 103/240 (16 PENDING, 4 RUNNING, 83 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:40:39,971	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:40:41,287	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:40:44 (running for 00:02:37.61)
Using AsyncHyperBand: num_stopped=37
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640000.21875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 107/240 (16 PENDING, 4 RUNNING, 87 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:40:47,912	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:40:48,352	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:40:50 (running for 00:02:43.68)
Using AsyncHyperBand: num_stopped=39
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640007.75
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 110/240 (16 PENDING, 4 RUNNING, 90 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+---------

2023-05-11 00:40:54,189	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:40:54,676	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:40:56 (running for 00:02:49.70)
Using AsyncHyperBand: num_stopped=40
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640000.21875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 113/240 (16 PENDING, 4 RUNNING, 93 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:40:58,556	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:41:01,851	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:41:01 (running for 00:02:55.09)
Using AsyncHyperBand: num_stopped=41
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640000.21875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 115/240 (16 PENDING, 4 RUNNING, 95 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:41:02,302	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:41:03,774	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:41:11 (running for 00:03:05.07)
Using AsyncHyperBand: num_stopped=44
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640007.75
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 121/240 (16 PENDING, 4 RUNNING, 101 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------

2023-05-11 00:41:12,108	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82137)[0m  ...[32m [repeated 6x across cluster][0m
[2m[36m(train_and_validate pid=82137)[0m  [314]][32m [repeated 24x across cluster][0m
[2m[36m(train_and_validate pid=82137)[0m  [ 23][32m [repeated 12x across cluster][0m


2023-05-11 00:41:13,090	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:41:16,927	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:41:16 (running for 00:03:10.17)
Using AsyncHyperBand: num_stopped=45
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640000.21875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 123/240 (16 PENDING, 4 RUNNING, 103 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:41:17,075	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82138)[0m  ...[32m [repeated 3x across cluster][0m
[2m[36m(train_and_validate pid=82138)[0m  [314]][32m [repeated 12x across cluster][0m
[2m[36m(train_and_validate pid=82138)[0m  [ 23][32m [repeated 6x across cluster][0m


2023-05-11 00:41:18,285	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:41:19,391	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:41:23 (running for 00:03:16.54)
Using AsyncHyperBand: num_stopped=46
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639883.15625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 127/240 (16 PENDING, 4 RUNNING, 107 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:41:24,296	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:41:25,486	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:41:28 (running for 00:03:21.93)
Using AsyncHyperBand: num_stopped=50
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640007.75
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 132/240 (16 PENDING, 4 RUNNING, 112 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------

2023-05-11 00:41:32,122	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:41:34,414	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:41:34 (running for 00:03:27.66)
Using AsyncHyperBand: num_stopped=51
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640007.75
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 134/240 (16 PENDING, 4 RUNNING, 114 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------

2023-05-11 00:41:38,043	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:41:39,275	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:41:40 (running for 00:03:33.60)
Using AsyncHyperBand: num_stopped=51
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639883.15625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 137/240 (16 PENDING, 4 RUNNING, 117 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:41:42,900	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:41:43,976	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:41:46 (running for 00:03:39.84)
Using AsyncHyperBand: num_stopped=55
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640748.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 141/240 (16 PENDING, 4 RUNNING, 121 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-------

2023-05-11 00:41:50,498	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:41:51,144	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:41:52 (running for 00:03:45.95)
Using AsyncHyperBand: num_stopped=56
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640007.75
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 144/240 (16 PENDING, 4 RUNNING, 124 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------

2023-05-11 00:41:52,932	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:41:55,824	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:42:00 (running for 00:03:54.15)
Using AsyncHyperBand: num_stopped=57
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -640007.75
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 147/240 (16 PENDING, 4 RUNNING, 127 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------

2023-05-11 00:42:01,637	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82138)[0m  ...[32m [repeated 3x across cluster][0m
[2m[36m(train_and_validate pid=82138)[0m  [314]][32m [repeated 12x across cluster][0m
[2m[36m(train_and_validate pid=82138)[0m  [ 23][32m [repeated 6x across cluster][0m


2023-05-11 00:42:03,658	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:42:05,210	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:42:07 (running for 00:04:00.61)
Using AsyncHyperBand: num_stopped=58
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639992.6875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 150/240 (16 PENDING, 4 RUNNING, 130 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:42:08,051	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:42:08,163	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:42:12 (running for 00:04:05.81)
Using AsyncHyperBand: num_stopped=60
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639992.6875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 154/240 (16 PENDING, 4 RUNNING, 134 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:42:13,650	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:42:14,808	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:42:18 (running for 00:04:12.23)
Using AsyncHyperBand: num_stopped=60
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639619.0625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 158/240 (16 PENDING, 4 RUNNING, 138 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:42:20,251	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:42:22,857	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:42:25 (running for 00:04:18.27)
Using AsyncHyperBand: num_stopped=61
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -637917.15625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 161/240 (16 PENDING, 4 RUNNING, 141 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:42:26,463	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:42:28,761	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:42:32 (running for 00:04:25.54)
Using AsyncHyperBand: num_stopped=64
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639773.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 164/240 (16 PENDING, 4 RUNNING, 144 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-------

2023-05-11 00:42:32,531	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:42:33,774	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:42:37 (running for 00:04:30.74)
Using AsyncHyperBand: num_stopped=65
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639696.34375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 167/240 (16 PENDING, 4 RUNNING, 147 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:42:38,971	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:42:41,259	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:42:42 (running for 00:04:36.01)
Using AsyncHyperBand: num_stopped=66
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -637917.15625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 171/240 (16 PENDING, 4 RUNNING, 151 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:42:47,400	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:42:47,568	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:42:49 (running for 00:04:43.03)
Using AsyncHyperBand: num_stopped=67
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -636215.25
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 174/240 (16 PENDING, 4 RUNNING, 154 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------

2023-05-11 00:42:51,151	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:42:53,534	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:42:54 (running for 00:04:48.06)
Using AsyncHyperBand: num_stopped=70
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639696.34375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 177/240 (16 PENDING, 4 RUNNING, 157 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:42:55,406	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:42:59,621	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:43:00 (running for 00:04:53.73)
Using AsyncHyperBand: num_stopped=71
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639619.0625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 180/240 (16 PENDING, 4 RUNNING, 160 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:43:02,846	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:43:05,385	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:43:06 (running for 00:04:59.68)
Using AsyncHyperBand: num_stopped=71
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -637112.90625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 183/240 (16 PENDING, 4 RUNNING, 163 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:43:06,668	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:43:10,350	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:43:12 (running for 00:05:06.11)
Using AsyncHyperBand: num_stopped=72
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -636215.25
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 186/240 (16 PENDING, 4 RUNNING, 166 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+--------

2023-05-11 00:43:15,069	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:43:16,702	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:43:19 (running for 00:05:12.76)
Using AsyncHyperBand: num_stopped=76
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -638814.8125
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 191/240 (16 PENDING, 4 RUNNING, 171 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:43:21,251	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:43:22,062	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:43:24 (running for 00:05:17.98)
Using AsyncHyperBand: num_stopped=78
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -638995.5625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 194/240 (16 PENDING, 4 RUNNING, 174 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:43:26,774	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:43:28,085	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:43:33 (running for 00:05:26.46)
Using AsyncHyperBand: num_stopped=78
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -638010.5625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 197/240 (16 PENDING, 4 RUNNING, 177 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:43:34,021	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82137)[0m  ...[32m [repeated 3x across cluster][0m
[2m[36m(train_and_validate pid=82137)[0m  [314]][32m [repeated 12x across cluster][0m
[2m[36m(train_and_validate pid=82137)[0m  [ 23][32m [repeated 6x across cluster][0m


2023-05-11 00:43:35,447	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:43:36,836	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:43:39 (running for 00:05:32.47)
Using AsyncHyperBand: num_stopped=81
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -638503.0625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 201/240 (16 PENDING, 4 RUNNING, 181 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:43:39,633	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:43:42,241	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:43:44 (running for 00:05:38.14)
Using AsyncHyperBand: num_stopped=83
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -638503.0625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 205/240 (16 PENDING, 4 RUNNING, 185 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:43:45,643	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:43:47,487	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:43:51 (running for 00:05:44.29)
Using AsyncHyperBand: num_stopped=86
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639307.3125
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 209/240 (16 PENDING, 4 RUNNING, 189 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------

2023-05-11 00:43:53,055	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:43:56,749	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:43:56 (running for 00:05:49.99)
Using AsyncHyperBand: num_stopped=88
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639696.34375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 211/240 (16 PENDING, 4 RUNNING, 191 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:43:57,507	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:43:58,357	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:44:02 (running for 00:05:55.29)
Using AsyncHyperBand: num_stopped=91
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639773.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 216/240 (16 PENDING, 4 RUNNING, 196 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-------

2023-05-11 00:44:03,044	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:44:05,198	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:44:09 (running for 00:06:02.41)
Using AsyncHyperBand: num_stopped=93
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639883.15625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 219/240 (16 PENDING, 4 RUNNING, 199 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:44:11,131	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:44:11,734	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:44:16 (running for 00:06:09.84)
Using AsyncHyperBand: num_stopped=94
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639696.34375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 223/240 (16 PENDING, 4 RUNNING, 203 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:44:18,442	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:44:19,083	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:44:22 (running for 00:06:15.54)
Using AsyncHyperBand: num_stopped=96
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639773.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 226/240 (16 PENDING, 4 RUNNING, 206 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-------

2023-05-11 00:44:24,056	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:44:25,885	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:44:27 (running for 00:06:21.05)
Using AsyncHyperBand: num_stopped=97
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639696.34375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 229/240 (16 PENDING, 4 RUNNING, 209 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:44:31,558	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:44:33,236	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:44:33 (running for 00:06:26.48)
Using AsyncHyperBand: num_stopped=98
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639696.34375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 231/240 (16 PENDING, 4 RUNNING, 211 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:44:34,096	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:44:34,865	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:44:38 (running for 00:06:32.00)
Using AsyncHyperBand: num_stopped=100
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639696.34375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 235/240 (16 PENDING, 4 RUNNING, 215 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+----

2023-05-11 00:44:41,532	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:44:45,341	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:44:45 (running for 00:06:38.58)
Using AsyncHyperBand: num_stopped=101
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -639696.34375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 237/240 (16 PENDING, 4 RUNNING, 217 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+----

2023-05-11 00:44:46,131	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:44:46,337	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:44:51 (running for 00:06:45.06)
Using AsyncHyperBand: num_stopped=102
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -638995.5625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 240/240 (14 PENDING, 4 RUNNING, 222 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:44:53,413	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:44:56,653	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:44:58 (running for 00:06:51.26)
Using AsyncHyperBand: num_stopped=102
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -638220.71875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 240/240 (11 PENDING, 4 RUNNING, 225 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+----

2023-05-11 00:45:02,026	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:45:03,907	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:45:03 (running for 00:06:57.15)
Using AsyncHyperBand: num_stopped=104
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -638713.21875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 240/240 (9 PENDING, 4 RUNNING, 227 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-----

2023-05-11 00:45:05,404	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:45:06,137	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:45:09 (running for 00:07:02.69)
Using AsyncHyperBand: num_stopped=105
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -638430.875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 240/240 (6 PENDING, 4 RUNNING, 230 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-------

2023-05-11 00:45:12,766	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:45:14,347	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:45:14 (running for 00:07:07.70)
Using AsyncHyperBand: num_stopped=106
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -638430.875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 240/240 (3 PENDING, 4 RUNNING, 233 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+-------

2023-05-11 00:45:14,734	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=82138)[0m  ...[32m [repeated 3x across cluster][0m
[2m[36m(train_and_validate pid=82138)[0m  [314]][32m [repeated 12x across cluster][0m
[2m[36m(train_and_validate pid=82138)[0m  [ 23][32m [repeated 6x across cluster][0m


2023-05-11 00:45:15,397	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:45:18,051	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:45:20 (running for 00:07:14.10)
Using AsyncHyperBand: num_stopped=108
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -638430.875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 240/240 (4 RUNNING, 236 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------

2023-05-11 00:45:21,551	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-11 00:45:25,265	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-11 00:45:29 (running for 00:07:23.00)
Using AsyncHyperBand: num_stopped=108
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -637112.90625
Logical resource usage: 2.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 240/240 (1 RUNNING, 239 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+----------------

2023-05-11 00:45:29,835	INFO tune.py:945 -- Total run time: 443.10 seconds (443.05 seconds for the tuning loop).


== Status ==
Current time: 2023-05-11 00:45:29 (running for 00:07:23.07)
Using AsyncHyperBand: num_stopped=108
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -637112.90625
Logical resource usage: 0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-11_00-38-06
Number of trials: 240/240 (240 TERMINATED)
+--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |     lr |   n_hidden_dim |   n_layers |   train_iterations |             loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+--------+----------------+------------+--------------------+------------------+----------