In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch  
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler


# Neural Net Class and Training Functions
Define Class and functions

In [None]:
def data_loaders():
   # Import data
    dir_X = '/Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/ev_adoption_ml/Data/df_X_county.csv'
    dir_y = '/Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/ev_adoption_ml/Data/df_y_county.csv'
    
    X = pd.read_csv(dir_X)
    X['constant'] = 1
    y = pd.read_csv(dir_y)

    # check if any nan values
    nan_row_X = X[X.isna().any(axis=1)]
    #print(nan_row_X)
    nan_row_y = y[y.isna().any(axis=1)]
    #print(nan_row_y)

    X = X.to_numpy()
    y = y.to_numpy()

    # split train/test
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

    # standardize X
    scaler = StandardScaler().fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    return X_train, X_test, y_train, y_test

def train_model(nn_model, data_loaded, opt, batch_size=32):
    
    '''
    Trains neural network model on X_train, y_train data.
    
    Returns
    ----------
    nn_model: torch.nn.Module
        trained neural network model
    '''
    # convert to tensors (for Pytorch)
    X_train, X_eval, y_train, y_eval = data_loaded
    X_train_tensor = torch.tensor(X_train)
    y_train_tensor = torch.tensor(y_train)
    X_test_tensor = torch.tensor(X_eval)
    y_test_tensor = torch.tensor(y_eval)
    
    nn_model.train()  # put model in train mode
    # initialize mse loss function
    mse_loss = torch.nn.MSELoss()
    # train with (mini-batch) SGD; initialize optimizer
    #opt = torch.optim.SGD(nn_model.parameters(), lr=1e-4)
    n_samples, n_features = X_train_tensor.shape
    # loop through data in batches

    for batch_start in range(0, n_samples, batch_size):
    # reset gradients to zero
        opt.zero_grad()
        # form batch
        X_batch = X_train_tensor[batch_start:batch_start+batch_size]
        y_batch = y_train_tensor[batch_start:batch_start+batch_size]
        X_batch_test = X_test_tensor[batch_start:batch_start+batch_size]
        y_batch_test = y_test_tensor[batch_start:batch_start+batch_size]
        # pass batch through neural net to get prediction
        y_pred = nn_model(X_batch.float())
        y_pred = y_pred.unsqueeze(1)
        y_pred_test = nn_model(X_batch_test.float())
        y_pred_test = y_pred_test.unsqueeze(1)
        # compute MSE loss
        loss = mse_loss(y_pred, y_batch[:, None].float())
        loss_test = mse_loss(y_pred_test, y_batch_test[:, None].float())
        # back-propagate loss
        loss.backward()
        # update model parameters based on backpropogated gradients
        opt.step()
        
        #print(f"Mean Train MSE: {epoch_loss}")
        
    return epoch_loss, epoch_loss_test

def evaluate_model(nn_model, X_eval, y_eval, batch_size=32):
    '''
    Evaluates trained neural network model on X_eval, y_eval data.

    Parameters
    ----------
    nn_model: torch.nn.Module
        trained neural network model
    X_eval: np.array
        matrix of training data features
    y_eval: np.array
        vector of training data labels
    batch_size: int
        batch size to looping over dataset to generate predictions

    Returns
    ----------
    mse: float
        MSE of trained model on X_eval, y_eval data
    '''
    # initialize mse loss function
    mse_loss = torch.nn.MSELoss()
    # convert to tensors (for Pytorch)
    X_eval_tensor = torch.tensor(X_eval)
    y_eval_tensor = torch.tensor(y_eval)
    n_samples = X_eval_tensor.shape[0]
    nn_model.eval() # put in eval mode
    # loop over data and generate predictions
    preds = []
    for batch_start in range(0, n_samples, batch_size):
        # form batch
        X_batch = X_eval_tensor[batch_start:batch_start+batch_size]
        y_batch = y_eval_tensor[batch_start:batch_start+batch_size]
        with torch.no_grad():  # no need to compute gradients during evaluation
            # pass batch through neural net to get prediction
            y_pred = nn_model(X_batch.float())
            y_pred = y_pred.unsqueeze(1)
            preds.append(y_pred)
    # compute MSE across all samples
    all_preds = torch.cat(preds)
    loss = mse_loss(all_preds, y_eval_tensor[:, None].float()).item()
    return loss

def train_and_validate(config):

    '''Parameters
    ----------
    X_train: np.array
        matrix of training data features
    y_train: np.array
        vector of training data labels
    max_iter: int
        maximum number of iterations to train for
    batch_size: int
        batch size to use when training w/ SGD
    '''
    # intialize neural network
    data_loaded = data_loaders()
    X_train, X_eval, y_train, y_eval = data_loaded
    print(y_eval)
    n_samples, n_features = X_train.shape
    nn_model = NN_configureable(n_features, config["n_hidden_dim"], config["n_layers"])

    opt = torch.optim.SGD(nn_model.parameters(), lr=config["lr"],  momentum=0.9)
    batch_size = config["batch_size"]
    max_iter = config["train_iterations"]

    # Start the training.
    for it in range(max_iter):
        # save losses across all batches
        train_epoch_loss, test_epoch_loss = train_model(nn_model, data_loaded, opt, batch_size)
        valid_epoch_loss = evaluate_model(nn_model, X_eval, y_eval, batch_size)

    with tune.checkpoint_dir(it) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, 'checkpoint')
            torch.save((nn_model.state_dict(), opt.state_dict()), path)
    tune.report(
        loss=valid_epoch_loss)

class NN(nn.Module):
    '''
    Class for fully connected neural net.
    '''
    def __init__(self, input_dim, hidden_dim):
        '''
        Parameters
        ----------
        input_dim: int
            input dimension (i.e., # of features in each example passed to the network)
        hidden_dim: int
            number of nodes in hidden layer
        '''
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.layers = nn.Sequential(
            # Network has a single hidden layer
            # Apply ReLU activation in between the hidden layer and output node
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1),
            nn.ReLU()
        )

    def forward(self, x):
        x = self.layers(x)
        return x


class NN_configureable(nn.Module):
    '''
    Class for fully connected neural net.
    '''
    def __init__(self, input_dim, hidden_dim, hidden_layers):
        '''
        Parameters
        ----------
        input_dim: int
            input dimension (i.e., # of features in each example passed to the network)
        hidden_dim: int
            number of nodes in hidden layer
        '''
        super().__init__()
        self.input_dim = input_dim
        print(input_dim)
        self.hidden_layers = hidden_layers
        #self.layers = nn.ModuleDict()
        self.layers = nn.ModuleDict()
        
        # Define input layer
        self.layers["input"] = nn.Linear(in_features = input_dim, out_features = hidden_dim)
        # Define hidden layers
        for i in range(self.hidden_layers):
            self.layers[f"hidden_{i}"] = nn.Linear(in_features = hidden_dim, out_features = hidden_dim)
        # Define output layer
        self.layers["output"] = nn.Linear(in_features = hidden_dim, out_features = 1)

    def forward(self, x):
        x = self.layers["input"](x)
        for i in range(self.hidden_layers):
            x = F.relu(self.layers[f"hidden_{i}"](x))

        return self.layers["output"](x)        
        

# Search Function for Ray Tune
Hyperparameter search



# Import Data
Import combined data

In [18]:
def main():   
    
    # Search Function for Ray Tune - Hyperparameter search
     
    #X = pd.read_csv('./Data/df_X_county.csv')
    #print(X.head)
     
    # Define the parameter search configuration.
    config = {
        "n_layers": 
            #tune.sample_from(lambda _: 2 ** np.random.randint(1, 5)),
            tune.grid_search([1, 2, 3]),
        "n_hidden_dim": 
            #tune.sample_from(lambda _: 2 ** np.random.randint(4, 8)),
            tune.grid_search([1, 2]),
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([32]),
        "train_iterations": tune.choice([50, 100])
    }

    max_num_iter = 50
    grace_period = 1
    # Number of Ray Tune random search experiments to run.
    num_samples = 20
    
    # Schduler to stop bad performing trails.
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t = max_num_iter,
        grace_period = grace_period,
        reduction_factor = 2 
    )

    # Reporter to show on command line/output window
    reporter = CLIReporter(
        metric_columns=["loss", "accuracy", "training_iteration"])

    #nn_model_place = NN_configureable(input_dim, config["n_hidden_dim"], config["n_layers"])

    # Start Ray Tune search
    result = tune.run(
        train_and_validate,
        resources_per_trial = {"cpu": 2, "gpu": 0},
        config = config,
        num_samples = num_samples,
        scheduler = scheduler,
        local_dir = '../outputs/raytune_result',
        keep_checkpoints_num = 1,
        checkpoint_score_attr = 'min-validation_loss',
        progress_reporter = reporter
    )

    # Extract the best trial run from the search.
    best_trial = result.get_best_trial(
        'loss', 'min', 'last'
    )
    print(f"Best trial config: {best_trial.config}")
    print(f"Best trial final validation loss: {best_trial.last_result['loss']}")
    
    #n_layers = np.arange(1, 5) # iterate through hidden layer count
    #n_hidden_dim = np.arange(8, 65, 8)  # iterate through hidden layer node count
    #mse_dict = {}
    
    #for i in range(len(n_layers)):
    #    for j in range(len(n_hidden_dim)):
    #        tuple_place = (i, j)
    #        nn_model_place = NN_configureable(input_dim, hidden_dim = j, hidden_layers = i)
    #        nn_model_result = train_model(nn_model_place, X_train, y_train, X_test, y_test, 32)
    #        train_mse =  evaluate_model(nn_model_result[0], X_train, y_train)
    #        test_mse = evaluate_model(nn_model_result[0], X_test, y_test)
    #        #print(tuple_place)
    #        train_test_list = [train_mse, test_mse]
    #        mse_dict[tuple_place] = train_test_list
            
        
        
    #nn_model_place = NN_configureable(input_dim, 8, 2)
    #nn_model_place = NN(input_dim, 8)
    #nn_model_result = train_model(nn_model_place, X_train, y_train, X_test, y_test, 32)
    #train_mse = evaluate_model(nn_model_result[0], X_train, y_train)
    #test_mse = evaluate_model(nn_model_result[0], X_test, y_test)
        
    #for key in mse_dict:
    #    print(f"Train MSE for model: hidden_layers = {key[0]}, hidden_dim = {key[1]} is: {mse_dict[key][0]}")
    #    print(f"Test MSE for model: hidden_layers = {key[0]}, hidden_dim = {key[1]} is: {mse_dict[key][1]}")

    # plot the model's test errors
    #plt.plot(range(len(nn_model_result[1])), nn_model_result[1])
    # axis labels

    '''
    plt.xlabel('Iteration Step')
    plt.ylabel('Test Error')
    plt.title("Model 1 - Hidden Layer - ReLU")
    plt.legend()
    plt.figure(figsize=(20,12))
    plt.show()
'''

In [19]:
if __name__ == '__main__':
    main()



== Status ==
Current time: 2023-05-10 16:47:53 (running for 00:00:00.06)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Logical resource usage: 2.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 16/120 (15 PENDING, 1 RUNNING)
+--------------------------------+----------+-----------------+--------------+-------------+----------------+------------+--------------------+
| Trial name                     | status   | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |
|--------------------------------+----------+-----------------+--------------+-------------+----------------+------------+--------------------|
| train_and_validate_ef910_00000 | RUNNING  | 127.0.0.1:76483 |           32 | 0.000376769 |              

Trial name,date,done,experiment_tag,hostname,iterations_since_restore,loss,node_ip,pid,should_checkpoint,time_since_restore,time_this_iter_s,time_total_s,timestamp,training_iteration,trial_id
train_and_validate_ef910_00000,2023-05-10_16-47-59,True,"0_batch_size=32,lr=0.0004,n_hidden_dim=1,n_layers=1,train_iterations=50",dhcp-10-29-108-17.dyn.MIT.EDU,1,1116100.0,127.0.0.1,76483,True,4.16121,4.16121,4.16121,1683751679,1,ef910_00000
train_and_validate_ef910_00001,2023-05-10_16-48-01,True,"1_batch_size=32,lr=0.0006,n_hidden_dim=2,n_layers=1,train_iterations=50",dhcp-10-29-108-17.dyn.MIT.EDU,1,,127.0.0.1,76484,True,4.21764,4.21764,4.21764,1683751681,1,ef910_00001
train_and_validate_ef910_00002,2023-05-10_16-48-02,True,,dhcp-10-29-108-17.dyn.MIT.EDU,1,1117390.0,127.0.0.1,76485,True,5.1529,5.1529,5.1529,1683751682,1,ef910_00002
train_and_validate_ef910_00003,2023-05-10_16-48-02,True,"3_batch_size=32,lr=0.0004,n_hidden_dim=2,n_layers=2,train_iterations=50",dhcp-10-29-108-17.dyn.MIT.EDU,1,1116070.0,127.0.0.1,76488,True,5.22721,5.22721,5.22721,1683751682,1,ef910_00003
train_and_validate_ef910_00004,2023-05-10_16-48-05,True,"4_batch_size=32,lr=0.0004,n_hidden_dim=1,n_layers=3,train_iterations=50",dhcp-10-29-108-17.dyn.MIT.EDU,1,1116090.0,127.0.0.1,76483,True,5.77656,5.77656,5.77656,1683751685,1,ef910_00004
train_and_validate_ef910_00005,2023-05-10_16-48-13,True,,dhcp-10-29-108-17.dyn.MIT.EDU,1,1117840.0,127.0.0.1,76484,True,11.6102,11.6102,11.6102,1683751693,1,ef910_00005
train_and_validate_ef910_00006,2023-05-10_16-48-06,True,"6_batch_size=32,lr=0.0160,n_hidden_dim=1,n_layers=1,train_iterations=50",dhcp-10-29-108-17.dyn.MIT.EDU,1,,127.0.0.1,76485,True,4.20862,4.20862,4.20862,1683751686,1,ef910_00006
train_and_validate_ef910_00007,2023-05-10_16-48-10,True,"7_batch_size=32,lr=0.0010,n_hidden_dim=2,n_layers=1,train_iterations=100",dhcp-10-29-108-17.dyn.MIT.EDU,1,,127.0.0.1,76488,True,8.38895,8.38895,8.38895,1683751690,1,ef910_00007
train_and_validate_ef910_00008,2023-05-10_16-48-10,True,,dhcp-10-29-108-17.dyn.MIT.EDU,1,1117130.0,127.0.0.1,76483,True,4.82228,4.82228,4.82228,1683751690,1,ef910_00008
train_and_validate_ef910_00009,2023-05-10_16-48-16,True,"9_batch_size=32,lr=0.0002,n_hidden_dim=2,n_layers=2,train_iterations=100",dhcp-10-29-108-17.dyn.MIT.EDU,1,1116130.0,127.0.0.1,76485,True,10.3066,10.3066,10.3066,1683751696,1,ef910_00009


2023-05-10 16:47:59,750	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:47:59 (running for 00:00:05.95)
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116098.125
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 20/120 (16 PENDING, 4 RUNNING)
+--------------------------------+----------+-----------------+--------------+-------------+----------------+------------+--------------------+------------+----------------------+
| Trial name                     | status   | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |       loss |   training_iteration |
|--------------------------------+----------+-----------------+--------------+-------------+----------------+------------+--------------------+------------+--------------

2023-05-10 16:48:01,372	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:48:02,300	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: inf


2023-05-10 16:48:02,411	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:48:05,579	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-10 16:48:05 (running for 00:00:11.78)
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116096.0
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 24/120 (16 PENDING, 4 RUNNING, 4 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---

2023-05-10 16:48:06,560	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: inf[32m [repeated 2x across cluster][0m


2023-05-10 16:48:10,452	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76483)[0m  ...[32m [repeated 2x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m  [ 23][32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: nan[32m [repeated 30061x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m 13[32m [repeated 44x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 1248835.125[32m [repeated 57178x across cluster][0m


2023-05-10 16:48:10,854	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:48:10 (running for 00:00:17.06)
Using AsyncHyperBand: num_stopped=2
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116098.125
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 27/120 (16 PENDING, 4 RUNNING, 7 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+-

2023-05-10 16:48:13,027	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m  ...[32m [repeated 2x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m  [ 23][32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: nan[32m [repeated 894x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 35x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: 321807.46875[32m [repeated 78912x across cluster][0m


2023-05-10 16:48:16,148	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:48:16 (running for 00:00:22.35)
Using AsyncHyperBand: num_stopped=3
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116117.0
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 29/120 (16 PENDING, 4 RUNNING, 9 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---

2023-05-10 16:48:16,852	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:48:16,914	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76483)[0m  ...[32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m  [ 23][32m [repeated 8x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: nan[32m [repeated 36568x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m [32m [repeated 61x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 153183.21875[32m [repeated 53545x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: 1449913.25


2023-05-10 16:48:21,332	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:48:21 (running for 00:00:27.54)
Using AsyncHyperBand: num_stopped=5
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116628.1875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 33/120 (16 PENDING, 4 RUNNING, 13 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------

2023-05-10 16:48:22,120	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: inf
[2m[36m(train_and_validate pid=76485)[0m  ...[32m [repeated 2x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m  [ 23][32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: nan[32m [repeated 23676x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 33x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 275199.5[32m [repeated 59836x across cluster][0m


2023-05-10 16:48:26,959	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:48:26 (running for 00:00:33.16)
Using AsyncHyperBand: num_stopped=5
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116125.375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 35/120 (16 PENDING, 4 RUNNING, 15 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+

2023-05-10 16:48:27,289	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m  ...[32m [repeated 2x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m  [ 23][32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: nan[32m [repeated 43337x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 42x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 1027862.375[32m [repeated 43979x across cluster][0m


2023-05-10 16:48:30,697	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:48:31,759	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-10 16:48:32 (running for 00:00:38.43)
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1117112.9375
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 39/120 (16 PENDING, 4 RUNNING, 19 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------

2023-05-10 16:48:35,473	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: inf
[2m[36m(train_and_validate pid=76488)[0m  ...[32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m  [ 23][32m [repeated 8x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: nan[32m [repeated 24535x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m 13[32m [repeated 44x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: 351553.09375[32m [repeated 57372x across cluster][0m


2023-05-10 16:48:36,007	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:48:38,197	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-10 16:48:38 (running for 00:00:44.40)
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116837.6875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 42/120 (16 PENDING, 4 RUNNING, 22 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------

2023-05-10 16:48:40,374	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: 551026.5625
[2m[36m(train_and_validate pid=76485)[0m  ...[32m [repeated 3x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m  [ 23][32m [repeated 6x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: nan[32m [repeated 46892x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m 13[32m [repeated 36x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: 195121.875[32m [repeated 40675x across cluster][0m


2023-05-10 16:48:43,125	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:48:43,849	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-10 16:48:43 (running for 00:00:50.05)
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1117094.875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 45/120 (16 PENDING, 4 RUNNING, 25 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+

2023-05-10 16:48:45,705	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: inf


2023-05-10 16:48:48,141	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m  ...[32m [repeated 2x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m  [ 23][32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: nan[32m [repeated 37543x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 40x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 281469.90625[32m [repeated 47578x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: inf[32m [repeated 2x across cluster][0m


2023-05-10 16:48:52,353	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:48:52 (running for 00:00:58.56)
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116580.5
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 48/120 (16 PENDING, 4 RUNNING, 28 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+--

2023-05-10 16:48:54,208	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:48:54,626	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76483)[0m  ...[32m [repeated 3x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m  [ 23][32m [repeated 6x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: nan[32m [repeated 26347x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 37x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 119191.984375[32m [repeated 55946x across cluster][0m


2023-05-10 16:48:55,846	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:48:57,444	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-10 16:48:57 (running for 00:01:03.65)
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116489.0625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 52/120 (16 PENDING, 4 RUNNING, 32 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------

2023-05-10 16:48:59,599	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:49:00,477	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76483)[0m  ...[32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m  [ 23][32m [repeated 8x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: nan[32m [repeated 21336x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m 13[32m [repeated 45x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 81265.125[32m [repeated 58389x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: 362960.1875


2023-05-10 16:49:04,045	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:49:04 (running for 00:01:10.25)
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116489.0625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 55/120 (16 PENDING, 4 RUNNING, 35 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+-------------------

2023-05-10 16:49:05,842	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m  ...


2023-05-10 16:49:08,038	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:49:10,377	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-10 16:49:10 (running for 00:01:16.58)
Using AsyncHyperBand: num_stopped=11
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116566.5625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 58/120 (16 PENDING, 4 RUNNING, 38 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+-------------------

2023-05-10 16:49:11,671	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:49:14,021	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76485)[0m  [ 23][32m [repeated 6x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan[32m [repeated 46700x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 41x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: 1375475.125[32m [repeated 43032x across cluster][0m


2023-05-10 16:49:15,875	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:49:15 (running for 00:01:22.08)
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116876.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 62/120 (16 PENDING, 4 RUNNING, 42 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------

2023-05-10 16:49:18,594	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76483)[0m  [ 23][32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan[32m [repeated 16115x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 48x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 236616.703125[32m [repeated 66291x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m  ...[32m [repeated 2x across cluster][0m


2023-05-10 16:49:21,724	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:49:21 (running for 00:01:27.93)
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116728.5625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 64/120 (16 PENDING, 4 RUNNING, 44 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+-------------------

2023-05-10 16:49:24,040	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: inf


2023-05-10 16:49:24,481	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:49:25,086	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76485)[0m  [ 23][32m [repeated 8x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: nan[32m [repeated 7747x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 58x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 149387.015625[32m [repeated 77902x across cluster][0m


2023-05-10 16:49:25,882	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76485)[0m  ...[32m [repeated 4x across cluster][0m


2023-05-10 16:49:28,379	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:49:28 (running for 00:01:34.58)
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116952.1875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 69/120 (16 PENDING, 4 RUNNING, 49 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+-------------------

2023-05-10 16:49:30,311	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: inf
[2m[36m(train_and_validate pid=76485)[0m  [ 23][32m [repeated 6x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: nan[32m [repeated 15105x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m [32m [repeated 44x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 166348.9375[32m [repeated 67556x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m  ...[32m [repeated 3x across cluster][0m


2023-05-10 16:49:34,262	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:49:34 (running for 00:01:40.46)
Using AsyncHyperBand: num_stopped=18
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116952.1875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 71/120 (16 PENDING, 4 RUNNING, 51 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+-------------------

2023-05-10 16:49:37,629	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m  ...


2023-05-10 16:49:39,034	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:49:40,563	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-10 16:49:40 (running for 00:01:46.76)
Using AsyncHyperBand: num_stopped=18
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116728.5625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 74/120 (16 PENDING, 4 RUNNING, 54 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+-------------------

2023-05-10 16:49:42,486	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76483)[0m  ...[32m [repeated 3x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m  [ 23][32m [repeated 2x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan[32m [repeated 9441x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m [32m [repeated 27x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 1269580.5[32m [repeated 71704x across cluster][0m


2023-05-10 16:49:46,495	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:49:46 (running for 00:01:52.70)
Using AsyncHyperBand: num_stopped=18
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116580.5
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 76/120 (16 PENDING, 4 RUNNING, 56 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+-

2023-05-10 16:49:47,656	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m  ...[32m [repeated 2x across cluster][0m


2023-05-10 16:49:49,504	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76485)[0m  [ 23][32m [repeated 6x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: nan[32m [repeated 36033x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m [32m [repeated 49x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 2374913.0[32m [repeated 49950x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: 292025.4375
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: 390716.1875


2023-05-10 16:49:54,196	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:49:54 (running for 00:02:00.40)
Using AsyncHyperBand: num_stopped=19
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116566.5625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 79/120 (16 PENDING, 4 RUNNING, 59 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+-------------------

2023-05-10 16:49:54,996	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76488)[0m  [ 23][32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: nan[32m [repeated 47102x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m [32m [repeated 33x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 928848.0[32m [repeated 42402x across cluster][0m


2023-05-10 16:49:56,084	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m  ...[32m [repeated 2x across cluster][0m


2023-05-10 16:49:59,958	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:49:59 (running for 00:02:06.16)
Using AsyncHyperBand: num_stopped=19
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116552.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 82/120 (16 PENDING, 4 RUNNING, 62 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------

2023-05-10 16:50:01,238	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76488)[0m  ...[32m [repeated 2x across cluster][0m


2023-05-10 16:50:04,451	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: inf[32m [repeated 2x across cluster][0m


2023-05-10 16:50:05,564	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:50:05 (running for 00:02:11.77)
Using AsyncHyperBand: num_stopped=21
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116580.5
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 85/120 (16 PENDING, 4 RUNNING, 65 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+-

2023-05-10 16:50:08,186	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:50:08,640	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76485)[0m  ...[32m [repeated 4x across cluster][0m


2023-05-10 16:50:09,605	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: inf[32m [repeated 2x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m  [ 23][32m [repeated 6x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan[32m [repeated 19188x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m [32m [repeated 51x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: 79236.890625[32m [repeated 59217x across cluster][0m


2023-05-10 16:50:11,146	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:50:11 (running for 00:02:17.34)
Using AsyncHyperBand: num_stopped=22
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116566.5625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 89/120 (16 PENDING, 4 RUNNING, 69 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+-------------------

2023-05-10 16:50:15,032	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: 541917.0625


2023-05-10 16:50:15,459	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76488)[0m  [ 23][32m [repeated 6x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan[32m [repeated 47275x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m 13[32m [repeated 43x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: 448216.375[32m [repeated 37788x across cluster][0m


2023-05-10 16:50:18,028	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:50:18 (running for 00:02:24.23)
Using AsyncHyperBand: num_stopped=22
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116552.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 92/120 (16 PENDING, 4 RUNNING, 72 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------

2023-05-10 16:50:20,334	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:50:20,412	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: inf
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: inf
[2m[36m(train_and_validate pid=76485)[0m  [ 23][32m [repeated 6x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: nan[32m [repeated 12968x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m 13[32m [repeated 41x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 122837.625[32m [repeated 65405x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m  ...[32m [repeated 2x across cluster][0m
== Status ==
Current time: 2023-05-10 16:50:25 (running for 00:02:31.72)
Using AsyncHyperBand: num_stopped=23
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116552.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_an

2023-05-10 16:50:25,979	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76488)[0m  [  1]
[2m[36m(train_and_validate pid=76488)[0m  [ 23]
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: inf


2023-05-10 16:50:26,333	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:50:28,845	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76485)[0m  ...[32m [repeated 3x across cluster][0m


2023-05-10 16:50:29,646	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: nan[32m [repeated 39033x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 55x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: 469555.03125[32m [repeated 46664x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m  [ 23][32m [repeated 6x across cluster][0m


2023-05-10 16:50:34,268	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:50:34 (running for 00:02:40.47)
Using AsyncHyperBand: num_stopped=25
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116552.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 99/120 (16 PENDING, 4 RUNNING, 79 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------

2023-05-10 16:50:34,742	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: inf
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: nan[32m [repeated 23889x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m [32m [repeated 36x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: 122176.3359375[32m [repeated 57602x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m  [ 23][32m [repeated 4x across cluster][0m


2023-05-10 16:50:36,764	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m  ...[32m [repeated 2x across cluster][0m


2023-05-10 16:50:39,587	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:50:39 (running for 00:02:45.79)
Using AsyncHyperBand: num_stopped=25
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116508.0625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 102/120 (16 PENDING, 4 RUNNING, 82 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+------------------

2023-05-10 16:50:41,932	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76483)[0m  ...[32m [repeated 2x across cluster][0m


2023-05-10 16:50:44,743	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:50:44 (running for 00:02:50.94)
Using AsyncHyperBand: num_stopped=27
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116566.5625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 104/120 (16 PENDING, 4 RUNNING, 84 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+------------------

2023-05-10 16:50:45,510	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: nan[32m [repeated 24646x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m [32m [repeated 51x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: 615694.125[32m [repeated 59149x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m  [ 23][32m [repeated 6x across cluster][0m


2023-05-10 16:50:46,914	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: inf
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: inf


2023-05-10 16:50:46,991	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76483)[0m  ...[32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: nan[32m [repeated 19131x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m [32m [repeated 42x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: 178977.046875[32m [repeated 64990x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m  [ 23][32m [repeated 4x across cluster][0m


2023-05-10 16:50:51,199	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:50:51 (running for 00:02:57.40)
Using AsyncHyperBand: num_stopped=28
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116552.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 108/120 (16 PENDING, 4 RUNNING, 88 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+-------------------

2023-05-10 16:50:51,467	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m  ...[32m [repeated 2x across cluster][0m


2023-05-10 16:50:55,265	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: nan[32m [repeated 23459x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 45x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 804528.5625[32m [repeated 61835x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m  [ 23][32m [repeated 6x across cluster][0m


2023-05-10 16:50:56,518	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:50:56 (running for 00:03:02.72)
Using AsyncHyperBand: num_stopped=28
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116463.5
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 111/120 (16 PENDING, 4 RUNNING, 91 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+

2023-05-10 16:51:00,887	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: inf
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)

2023-05-10 16:51:01,418	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan
[2m[36m(train_and_validate pid=76483)

2023-05-10 16:51:06,551	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:51:06 (running for 00:03:12.75)
Using AsyncHyperBand: num_stopped=28
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116462.625
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 114/120 (16 PENDING, 4 RUNNING, 94 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+-------------------

2023-05-10 16:51:08,149	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76485)[0m  ...[32m [repeated 2x across cluster][0m


2023-05-10 16:51:09,576	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:51:10,171	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 61x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 2832912.5[32m [repeated 51762x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan[32m [repeated 22984x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m  [ 23][32m [repeated 8x across cluster][0m


2023-05-10 16:51:11,692	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:51:11 (running for 00:03:17.89)
Using AsyncHyperBand: num_stopped=28
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116403.75
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 118/120 (16 PENDING, 4 RUNNING, 98 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------

2023-05-10 16:51:16,206	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: inf


2023-05-10 16:51:16,455	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:51:19,344	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76483)[0m  ...[32m [repeated 2x across cluster][0m
== Status ==
Current time: 2023-05-10 16:51:19 (running for 00:03:25.55)
Using AsyncHyperBand: num_stopped=29
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116403.75
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 120/120 (15 PENDING, 4 RUNNING, 101 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+---------

2023-05-10 16:51:20,895	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: nan[32m [repeated 22210x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m  [ 23][32m [repeated 8x across cluster][0m


2023-05-10 16:51:21,764	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:51:22,292	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: inf
[2m[36m(train_and_validate pid=76488)[0m  ...[32m [repeated 4x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 35x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: 494793.09375[32m [repeated 59755x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: nan[32m [repeated 17609x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m  [ 23][32m [repeated 4x across cluster][0m
== Status ==
Current time: 2023-05-10 16:51:27 (running for 00:03:33.61)
Using AsyncHyperBand: num_stopped=29
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116386.875
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 120/120 (11 PEN

2023-05-10 16:51:28,569	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: inf
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 251705.03125
[2m[36m(train_and_validate pid=76483)[0m [32m [repeated 29x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m Mean Train MSE: 709206.5[32m [repeated 40177x across cluster][0m


2023-05-10 16:51:30,940	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan[32m [repeated 32321x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m  ...
[2m[36m(train_and_validate pid=76485)[0m  [ 23][32m [repeated 4x across cluster][0m


2023-05-10 16:51:31,939	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:51:32,953	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-10 16:51:32 (running for 00:03:39.16)
Using AsyncHyperBand: num_stopped=29
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116335.75
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 120/120 (8 PENDING, 4 RUNNING, 108 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------

2023-05-10 16:51:33,763	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


[2m[36m(train_and_validate pid=76484)[0m [32m [repeated 45x across cluster][0m
[2m[36m(train_and_validate pid=76485)[0m Mean Train MSE: 544224.125[32m [repeated 62092x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: nan[32m [repeated 13957x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m  ...[32m [repeated 3x across cluster][0m
[2m[36m(train_and_validate pid=76484)[0m  [ 23][32m [repeated 6x across cluster][0m


2023-05-10 16:51:37,407	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:51:39,822	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

== Status ==
Current time: 2023-05-10 16:51:39 (running for 00:03:46.02)
Using AsyncHyperBand: num_stopped=31
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116376.125
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 120/120 (5 PENDING, 4 RUNNING, 111 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+-------------------

2023-05-10 16:51:41,524	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:51:44,067	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76484)[0m [32m [repeated 43x across cluster][0m
[2m[36m(train_and_validate pid=76483)[0m Mean Train MSE: 905191.25[32m [repeated 71753x across cluster][0m


2023-05-10 16:51:45,919	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:51:45 (running for 00:03:52.12)
Using AsyncHyperBand: num_stopped=32
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116335.75
Logical resource usage: 8.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 120/120 (1 PENDING, 4 RUNNING, 115 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------

2023-05-10 16:51:46,630	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']
2023-05-10 16:51:48,915	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/t

[2m[36m(train_and_validate pid=76488)[0m [32m [repeated 38x across cluster][0m
[2m[36m(train_and_validate pid=76488)[0m Mean Train MSE: 74457.53125[32m [repeated 55459x across cluster][0m


2023-05-10 16:51:51,337	ERROR checkpoint_manager.py:361 -- Result dict has no key: validation_loss. checkpoint_score_attr must be set to a key in the result dict. Valid keys are: ['loss', 'time_this_iter_s', 'should_checkpoint', 'done', 'training_iteration', 'trial_id', 'date', 'timestamp', 'time_total_s', 'pid', 'hostname', 'node_ip', 'time_since_restore', 'iterations_since_restore', 'experiment_tag', 'config/n_layers', 'config/n_hidden_dim', 'config/lr', 'config/batch_size', 'config/train_iterations']


== Status ==
Current time: 2023-05-10 16:51:51 (running for 00:03:57.54)
Using AsyncHyperBand: num_stopped=33
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116271.125
Logical resource usage: 2.0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 120/120 (1 RUNNING, 119 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------

2023-05-10 16:51:51,378	INFO tune.py:945 -- Total run time: 237.60 seconds (237.57 seconds for the tuning loop).


== Status ==
Current time: 2023-05-10 16:51:51 (running for 00:03:57.58)
Using AsyncHyperBand: num_stopped=33
Bracket: Iter 32.000: None | Iter 16.000: None | Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: -1116271.125
Logical resource usage: 0/8 CPUs, 0/0 GPUs
Result logdir: /Users/donokoye/Documents/Spring_23/ML_1.C51/Final_Project/outputs/raytune_result/train_and_validate_2023-05-10_16-47-53
Number of trials: 120/120 (120 TERMINATED)
+--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+----------------------+
| Trial name                     | status     | loc             |   batch_size |          lr |   n_hidden_dim |   n_layers |   train_iterations |          loss |   training_iteration |
|--------------------------------+------------+-----------------+--------------+-------------+----------------+------------+--------------------+---------------+------

KeyError: 'accuracy'