# Base CNN model from NVIDIA paper


In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os

PROJECT_ROOT = '/content/drive/MyDrive/cs-7643 project v2'
CODE_DIRECTORY = 'NVIDIA Dave Starter Code'

# Checking that files are accessible for the current user.
if not CODE_DIRECTORY in os.listdir(PROJECT_ROOT):
    # If this message displays, please create a shortcut to the group shared folder.
    print("Unable to access project files.")
else:
    print("Project files are accessible.")

In [None]:
# Uncomment the command below if there is some unforeseen need to unpack the compressed dataset again.
# !unzip "/content/drive/MyDrive/cs-7643 project v2/NVIDIA Dave Starter Code/data/dataset.zip" -d "/content/drive/MyDrive/cs-7643 project v2/NVIDIA Dave Starter Code/data"

In [None]:
# Switching to the code directory to allow imports and access to dataset.
%cd "/content/drive/MyDrive/cs-7643 project v2/NVIDIA Dave Starter Code"
!pwd

In [None]:
# ! pip install ipywidgets
# ! jupyter nbextension enable --py widgetsnbextension

In [23]:
import copy
import time
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim
import torchvision.transforms as transforms

from utils import dataset
from models import nvidia

# Tqdm progress bar
from tqdm import tqdm_notebook, tqdm
import sigopt

LEARNING_RATE = 1e-4
MOMENTUM = 0.7
WEIGHT_DECAY_REGULARIZATION_TERM = 1e-6  # aiming for overfitting
BATCH_SIZE = 256
NUM_EPOCHS = 3


# Citation:
# - AverageMeter taken verbatim from the Assignment 2 training code.
# - Remainder of code in this file based on Assignment 2 training code.

class AverageMeter(object):
    """Computes and stores the average and current value"""

    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

def train(epoch, data_loader, model, optimizer, criterion, scaler=None):
    iter_time = AverageMeter()
    losses = AverageMeter()
    

    # Get the progress bar for later modification
    progress_bar = tqdm_notebook(data_loader, ascii=True)

    for idx, (data, target) in enumerate(progress_bar):
        start = time.time()

        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()

        # Forward pass and computation of loss.
        with torch.autocast("cuda"): #Automatic Mixed precision
            out = model(data).reshape(target.shape)
            # RMSE loss
            loss = torch.sqrt(criterion(out, target))

        
        # Backwards pass to determine gradients and update model parameters.
        # optimizer.zero_grad()
        # loss.backward()
        # optimizer.step()

        for param in model.parameters():
            param.grad = None
        scaler.scale(loss).backward()
        scaler.step(optimizer)

        scaler.update()
        losses.update(loss, out.shape[0])

        iter_time.update(time.time() - start)
        # if idx % 5 == 0:
        #     print(('Epoch: [{0}][{1}/{2}]\t'
        #            'Time {iter_time.val:.3f} ({iter_time.avg:.3f})\t'
        #            'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
        #           .format(epoch, idx, len(data_loader), iter_time=iter_time, loss=losses)))

        progress_bar.set_description_str(f"Epoch {epoch}, Batch: {idx+1}, Loss: {loss.item():.4f}")
            # "Batch: %d, Loss: %.4f" % ((idx + 1), loss.item()))
        
    return losses.avg

def validate(epoch, validation_loader, model, criterion):
    iter_time = AverageMeter()
    losses = AverageMeter()

    # Get the progress bar for later modification
    progress_bar = tqdm_notebook(validation_loader, ascii=True)

    for idx, (data, target) in enumerate(progress_bar):
        start = time.time()

        if torch.cuda.is_available():
            data = data.cuda()
            target = target.cuda()

        out = None
        loss = None

        with torch.no_grad():
            out = model(data)
            # RMSE loss
            loss = torch.sqrt(criterion(out, target))

        # loss.squeeze()
        losses.update(loss, out.shape[0])

        iter_time.update(time.time() - start)

#         if idx % 10 == 0:
#             print(('Epoch: [{0}][{1}/{2}]\t'
#                    'Time {iter_time.val:.3f} ({iter_time.avg:.3f})\t')
#                   .format(epoch, idx, len(validation_loader), iter_time=iter_time, loss=losses))

        progress_bar.set_description_str(f"Batch: {idx+1}, Loss: {loss.item():.4f}")
           
        
        # progress_bar.set_description_str(
        #     "Batch: %d, Loss: %.4f" % ((idx + 1), loss.item()))

    print("* Average Loss @1: {loss.avg:.4f}".format(loss=losses))
    return losses.avg

# RSME Loss function. eps prevents [nan] in the backward pass
def RSMELoss(yhat, y, eps=1e-6):
    return torch.sqrt(torch.mean((yhat-y)**2) + eps)

def plots(losses, lr=LEARNING_RATE, reg=WEIGHT_DECAY_REGULARIZATION_TERM, batch=BATCH_SIZE, momentum=MOMENTUM):
    fig1, ax1 = plt.subplots(figsize=(8, 10))
    ax1.plot(losses[0], label='Training Losses')
    ax1.plot(losses[1], label='Validation Losses')
    ax1.set_xlabel('Nr Epochs')
    ax1.set_ylabel('Loss')
    ax1.set_title(f'L-Curves -> Train & Valid LR={lr} Momentum={momentum}, Reg_Term={reg}, Batch={batch}')
    ax1.legend(loc="best")
    plt.show()



In [None]:
# !! Skip this cell for sigopt experimentation !!
def main():
    # Normalizing images per the paper and resizing each image to 66 x 200.
    transform = transforms.Compose([
        # Citation:
        # https://pytorch.org/vision/stable/transforms.html#scriptable-transforms
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        transforms.Resize((66, 200)),
    ])

    # Loading in images with normalization and resizing applied.
    training_set, validation_set, test_set = dataset.load_nvidia_dataset(transform=transform, batch_size=BATCH_SIZE)
    torch.cuda.empty_cache()

    # Loading in the NVIDIA DAVE-2 model.
    model = nvidia.NvidiaDaveCNN()

    if torch.cuda.is_available():
        model = model.to(torch.device("cuda"))


    # Specify Mean Squared Error (MSE) or RSME as the criterion since this is a regression task.
    criterion = nn.MSELoss()
    # criterion = RMSELoss

    # Using Stochastic Gradient Descent (SGD) as the optimizer.
    optimizer = torch.optim.SGD(model.parameters(), 
                                LEARNING_RATE,
                                momentum=MOMENTUM,
                                weight_decay=WEIGHT_DECAY_REGULARIZATION_TERM)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True) 
    # torch.backends.cudnn.benchmark = True

    best = float('inf')
    best_model = None
    best_epoch = None
    train_losses = []
    valid_losses = []
    scaler = torch.cuda.amp.GradScaler()
    for epoch in range(NUM_EPOCHS):

        # Training.
        train_loss = train(epoch, training_set, model, optimizer, criterion, scaler=scaler)
        temp = train_loss
        temp.cpu()
        temp = float(temp)
        train_losses.append(temp)  # average losses
        print(f"Average training loss this epoch: {temp:.4f}")

        # Validation.
        valid_loss = validate(epoch, test_set, model, criterion)
        temp = valid_loss
        temp.cpu()
        temp = float(temp)
        valid_losses.append(temp)

        scheduler.step(train_loss)

        if valid_loss < best:
            best = valid_loss
            best_model = copy.deepcopy(model)
            best_epoch = epoch
        print(f"Best performing model so far average validation loss: {best:.4f} on epoch {best_epoch}\n")

    print('Best Training Loss @1: {:.4f}'.format(best))

    torch.save(best_model.state_dict(), './checkpoints/nvidia_dave2.pth')

    losses_to_plot = train_losses, valid_losses
    plots(losses_to_plot, lr=LEARNING_RATE, reg=WEIGHT_DECAY_REGULARIZATION_TERM,
          batch=BATCH_SIZE, momentum=MOMENTUM)

    

if __name__ == '__main__':
    main()

In [24]:
def evaluate(args):       
        
    # log source of hyperparameter suggestion
    sigopt.log_metadata('optimizer', "SGD with momentum")
    sigopt.log_model("CNN - NVIDIA inspired")
    sigopt.log_dataset("Udacity self-driving dataset ")
    
    sigopt.params.setdefault("learning_rate", args['learning_rate'])
    sigopt.params.setdefault("momentum", args['momentum'])
    sigopt.params.setdefault("reg", args['reg'])
    sigopt.params.setdefault("batch_size", int(args['batch_size']))
    
    # Normalizing images per the paper and resizing each image to 66 x 200.
    transform = transforms.Compose([
        # Citation:
        # https://pytorch.org/vision/stable/transforms.html#scriptable-transforms
        transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
        transforms.Resize((66, 200)),
    ])

    print("Params : ", sigopt.params)
    # Loading in images with normalization and resizing applied.
    training_set, validation_set, test_set = dataset.load_nvidia_dataset(transform=transform, batch_size=int(sigopt.params.batch_size))
    torch.cuda.empty_cache()

    # Loading in the NVIDIA DAVE-2 model.
    model = nvidia.NvidiaDaveCNN()

    if torch.cuda.is_available():
        model = model.to(torch.device("cuda"))


    # Specify Mean Squared Error (MSE) as criterion since this is a regression task. (We ultimately take sqrt and convert it to RMSE)
    criterion = nn.MSELoss()

    # Using Stochastic Gradient Descent (SGD) as the optimizer.
    optimizer = torch.optim.SGD(model.parameters(), sigopt.params.learning_rate,
                                momentum=sigopt.params.momentum,
                                weight_decay=sigopt.params.reg)
    
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True) 
    # torch.backends.cudnn.benchmark = True

    best = float('inf')
    best_model = None
    best_epoch = None
    train_losses = []
    valid_losses = []
    scaler = torch.cuda.amp.GradScaler()
    for epoch in range(NUM_EPOCHS):

        # Training.
        train_loss = train(epoch, training_set, model, optimizer, criterion, scaler=scaler)
        temp = train_loss
        temp.cpu()
        temp = float(temp)
        train_losses.append(temp)  # average losses
        print(f"Average training loss this epoch: {temp:.4f}")

        # Validation.
        valid_loss = validate(epoch, test_set, model, criterion)
        temp = valid_loss
        temp.cpu()
        temp = float(temp)
        valid_losses.append(temp)

        scheduler.step(train_loss)

        if valid_loss < best:
            best = valid_loss
            best_model = copy.deepcopy(model)
            best_epoch = epoch
        print(f"Best performing model so far average validation loss: {best:.4f} on epoch {best_epoch}\n")

    print('Best Training Loss @1: {:.4f}'.format(best))

    torch.save(best_model.state_dict(), './checkpoints/nvidia_dave2.pth')
    sigopt.log_metric(name='RMSE', value=best)
    return best.cpu().numpy()

In [25]:
import os
os.environ["SIGOPT_API_TOKEN"] = "XWBIVDWCVQXALUZQFDHNGOELLLKDJBMOJALEPCNQXQGBNIGC"
os.environ['SIGOPT_PROJECT'] = 'nvidia_cnn'
%reload_ext sigopt
args = {
    'batch_size': 256,
    'learning_rate': 0.005,
    'momentum': 0.9,
    'reg': 0.0005
}

In [27]:
%%experiment
{
    'name': 'GCP NVIDIA CNN model Optimization',
    'metrics': [
        {
            'name': 'RMSE',
            'strategy': 'optimize',
            'objective': 'minimize',
        }
    ],
    'parameters': [
        {
            'name': 'reg',
            'type': 'double',
            'bounds': {'min': 0.00001, 'max': 1.0},
            'transformation': 'log'
        },
        {
            'name': 'learning_rate',
            'type': 'double',
            'bounds': {'min': 0.00001, 'max': 1.0},
            'transformation': 'log'
        },
        {
            'name': 'momentum',
            'type': 'double',
            'bounds': {'min': 0.01, 'max': 1.0}
        },
        {
            'name': 'batch_size',
            'type': 'categorical',
            'categorical_values': ['32', '64', '128', '256']
        }
    ],
    'budget': 10
}

Experiment created, view it on the SigOpt dashboard at https://app.sigopt.com/experiment/530051


In [28]:
%%optimize gcp_nvidia_cnn_optimization_run
evaluate(args)

Run started, view it on the SigOpt dashboard at https://app.sigopt.com/run/432043
Params :  {'learning_rate': 0.005, 'reg': 0.0005, 'batch_size': '256', 'momentum': 0.9}


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/286 [00:00<?, ?it/s]

Average training loss this epoch: 0.2713


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`


  0%|          | 0/40 [00:00<?, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


* Average Loss @1: 0.2674
Best performing model so far average validation loss: 0.2674 on epoch 0



  return F.mse_loss(input, target, reduction=self.reduction)


  0%|          | 0/286 [00:00<?, ?it/s]

Average training loss this epoch: 0.2704


  0%|          | 0/40 [00:00<?, ?it/s]

* Average Loss @1: 0.2678
Best performing model so far average validation loss: 0.2674 on epoch 0



  0%|          | 0/286 [00:00<?, ?it/s]

Average training loss this epoch: 0.2705


  0%|          | 0/40 [00:00<?, ?it/s]

* Average Loss @1: 0.2670
Best performing model so far average validation loss: 0.2670 on epoch 2

Best Training Loss @1: 0.2670
Run finished, view it on the SigOpt dashboard at https://app.sigopt.com/run/432043
Run started, view it on the SigOpt dashboard at https://app.sigopt.com/run/432045
Params :  {'batch_size': '32', 'learning_rate': 0.07715483261889189, 'momentum': 0.9598464130195161, 'reg': 0.00035313176253227996}


  0%|          | 0/2282 [00:00<?, ?it/s]

Average training loss this epoch: 0.2771


  0%|          | 0/317 [00:00<?, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


* Average Loss @1: 0.2701
Best performing model so far average validation loss: 0.2701 on epoch 0



  return F.mse_loss(input, target, reduction=self.reduction)


  0%|          | 0/2282 [00:00<?, ?it/s]

Average training loss this epoch: 0.2793


  0%|          | 0/317 [00:00<?, ?it/s]

* Average Loss @1: 0.2746
Best performing model so far average validation loss: 0.2701 on epoch 0



  0%|          | 0/2282 [00:00<?, ?it/s]

Average training loss this epoch: 0.2754


  0%|          | 0/317 [00:00<?, ?it/s]

* Average Loss @1: 0.2993
Best performing model so far average validation loss: 0.2701 on epoch 0

Best Training Loss @1: 0.2701
Run finished, view it on the SigOpt dashboard at https://app.sigopt.com/run/432045
Run started, view it on the SigOpt dashboard at https://app.sigopt.com/run/432047
Params :  {'momentum': 0.05291773149433152, 'learning_rate': 0.18036619837476486, 'batch_size': '64', 'reg': 0.021919579323320726}


  0%|          | 0/1141 [00:00<?, ?it/s]

Average training loss this epoch: 0.2658


  0%|          | 0/159 [00:00<?, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


* Average Loss @1: 0.2638
Best performing model so far average validation loss: 0.2638 on epoch 0



  0%|          | 0/1141 [00:00<?, ?it/s]

Average training loss this epoch: 0.2657


  0%|          | 0/159 [00:00<?, ?it/s]

* Average Loss @1: 0.2631
Best performing model so far average validation loss: 0.2631 on epoch 1



  0%|          | 0/1141 [00:00<?, ?it/s]

Average training loss this epoch: 0.2655


  0%|          | 0/159 [00:00<?, ?it/s]

* Average Loss @1: 0.2649
Best performing model so far average validation loss: 0.2631 on epoch 1

Best Training Loss @1: 0.2631
Run finished, view it on the SigOpt dashboard at https://app.sigopt.com/run/432047
Run started, view it on the SigOpt dashboard at https://app.sigopt.com/run/432050
Params :  {'learning_rate': 0.00047128765639560316, 'momentum': 0.5504545005925385, 'reg': 2.2622122579387346e-05, 'batch_size': '128'}


  0%|          | 0/571 [00:00<?, ?it/s]

Average training loss this epoch: 0.2973


  0%|          | 0/80 [00:00<?, ?it/s]

  return F.mse_loss(input, target, reduction=self.reduction)


* Average Loss @1: 0.2670
Best performing model so far average validation loss: 0.2670 on epoch 0



  0%|          | 0/571 [00:00<?, ?it/s]

Average training loss this epoch: 0.2690


  0%|          | 0/80 [00:00<?, ?it/s]

* Average Loss @1: 0.2660
Best performing model so far average validation loss: 0.2660 on epoch 1



  0%|          | 0/571 [00:00<?, ?it/s]

Average training loss this epoch: 0.2685


  0%|          | 0/80 [00:00<?, ?it/s]

* Average Loss @1: 0.2659
Best performing model so far average validation loss: 0.2659 on epoch 2

Best Training Loss @1: 0.2659
Run finished, view it on the SigOpt dashboard at https://app.sigopt.com/run/432050
Run started, view it on the SigOpt dashboard at https://app.sigopt.com/run/432051
Params :  {'reg': 0.0023168492153577665, 'momentum': 0.6178110948615166, 'batch_size': '128', 'learning_rate': 0.5025434496599754}


  0%|          | 0/571 [00:00<?, ?it/s]

Average training loss this epoch: 0.2750


  0%|          | 0/80 [00:00<?, ?it/s]

* Average Loss @1: 0.2736
Best performing model so far average validation loss: 0.2736 on epoch 0



  0%|          | 0/571 [00:00<?, ?it/s]

Average training loss this epoch: 0.2730


  0%|          | 0/80 [00:00<?, ?it/s]

* Average Loss @1: 0.2724
Best performing model so far average validation loss: 0.2724 on epoch 1



  0%|          | 0/571 [00:00<?, ?it/s]

Average training loss this epoch: 0.2736


  0%|          | 0/80 [00:00<?, ?it/s]

* Average Loss @1: 0.2755
Best performing model so far average validation loss: 0.2724 on epoch 1

Best Training Loss @1: 0.2724
Run finished, view it on the SigOpt dashboard at https://app.sigopt.com/run/432051
Run started, view it on the SigOpt dashboard at https://app.sigopt.com/run/432053
Params :  {'reg': 0.01505922300120135, 'learning_rate': 0.012886039487109603, 'batch_size': '256', 'momentum': 0.35501507799513227}


  0%|          | 0/286 [00:00<?, ?it/s]

Average training loss this epoch: 0.2706


  0%|          | 0/40 [00:00<?, ?it/s]

* Average Loss @1: 0.2701
Best performing model so far average validation loss: 0.2701 on epoch 0



  0%|          | 0/286 [00:00<?, ?it/s]

Average training loss this epoch: 0.2702


  0%|          | 0/40 [00:00<?, ?it/s]

* Average Loss @1: 0.2694
Best performing model so far average validation loss: 0.2694 on epoch 1



  0%|          | 0/286 [00:00<?, ?it/s]

Average training loss this epoch: 0.2701


  0%|          | 0/40 [00:00<?, ?it/s]

* Average Loss @1: 0.2697
Best performing model so far average validation loss: 0.2694 on epoch 1

Best Training Loss @1: 0.2694
Run finished, view it on the SigOpt dashboard at https://app.sigopt.com/run/432053
Run started, view it on the SigOpt dashboard at https://app.sigopt.com/run/432054
Params :  {'momentum': 0.502290549393431, 'learning_rate': 0.0025821991685939906, 'reg': 0.007274328235686918, 'batch_size': '32'}


  0%|          | 0/2282 [00:00<?, ?it/s]

Average training loss this epoch: 0.2609


  0%|          | 0/317 [00:00<?, ?it/s]

* Average Loss @1: 0.2623
Best performing model so far average validation loss: 0.2623 on epoch 0



  0%|          | 0/2282 [00:00<?, ?it/s]

Average training loss this epoch: 0.2604


  0%|          | 0/317 [00:00<?, ?it/s]

* Average Loss @1: 0.2617
Best performing model so far average validation loss: 0.2617 on epoch 1



  0%|          | 0/2282 [00:00<?, ?it/s]

Average training loss this epoch: 0.2601


  0%|          | 0/317 [00:00<?, ?it/s]

* Average Loss @1: 0.2618
Best performing model so far average validation loss: 0.2617 on epoch 1

Best Training Loss @1: 0.2617
Run finished, view it on the SigOpt dashboard at https://app.sigopt.com/run/432054
Run started, view it on the SigOpt dashboard at https://app.sigopt.com/run/432056
Params :  {'batch_size': '256', 'reg': 0.7088234546054498, 'learning_rate': 0.0002309952608187497, 'momentum': 0.10470277718166823}


  0%|          | 0/286 [00:00<?, ?it/s]

Average training loss this epoch: 0.3669


  0%|          | 0/40 [00:00<?, ?it/s]

* Average Loss @1: 0.3390
Best performing model so far average validation loss: 0.3390 on epoch 0



  0%|          | 0/286 [00:00<?, ?it/s]

Average training loss this epoch: 0.3242


  0%|          | 0/40 [00:00<?, ?it/s]

* Average Loss @1: 0.3048
Best performing model so far average validation loss: 0.3048 on epoch 1



  0%|          | 0/286 [00:00<?, ?it/s]

Average training loss this epoch: 0.2985


  0%|          | 0/40 [00:00<?, ?it/s]

* Average Loss @1: 0.2851
Best performing model so far average validation loss: 0.2851 on epoch 2

Best Training Loss @1: 0.2851
Run finished, view it on the SigOpt dashboard at https://app.sigopt.com/run/432056
Run started, view it on the SigOpt dashboard at https://app.sigopt.com/run/432057
Params :  {'learning_rate': 0.00010265019517411648, 'reg': 0.23238349221147978, 'batch_size': '64', 'momentum': 0.7015874766695447}


  0%|          | 0/1141 [00:00<?, ?it/s]

Average training loss this epoch: 0.3282


  0%|          | 0/159 [00:00<?, ?it/s]

* Average Loss @1: 0.2741
Best performing model so far average validation loss: 0.2741 on epoch 0



  0%|          | 0/1141 [00:00<?, ?it/s]

Average training loss this epoch: 0.2669


  0%|          | 0/159 [00:00<?, ?it/s]

* Average Loss @1: 0.2663
Best performing model so far average validation loss: 0.2663 on epoch 1



  0%|          | 0/1141 [00:00<?, ?it/s]

Average training loss this epoch: 0.2649


  0%|          | 0/159 [00:00<?, ?it/s]

* Average Loss @1: 0.2670
Best performing model so far average validation loss: 0.2663 on epoch 1

Best Training Loss @1: 0.2663
Run finished, view it on the SigOpt dashboard at https://app.sigopt.com/run/432057
Run started, view it on the SigOpt dashboard at https://app.sigopt.com/run/432058
Params :  {'reg': 7.280041606239745e-05, 'momentum': 0.21941523178752614, 'learning_rate': 0.04160217141476355, 'batch_size': '32'}


  0%|          | 0/2282 [00:00<?, ?it/s]

Average training loss this epoch: 0.2522


  0%|          | 0/317 [00:00<?, ?it/s]

* Average Loss @1: 0.2796
Best performing model so far average validation loss: 0.2796 on epoch 0



  0%|          | 0/2282 [00:00<?, ?it/s]

Average training loss this epoch: 0.2156


  0%|          | 0/317 [00:00<?, ?it/s]

* Average Loss @1: 0.3184
Best performing model so far average validation loss: 0.2796 on epoch 0



  0%|          | 0/2282 [00:00<?, ?it/s]

Average training loss this epoch: 0.1826


  0%|          | 0/317 [00:00<?, ?it/s]

* Average Loss @1: 0.3050
Best performing model so far average validation loss: 0.2796 on epoch 0

Best Training Loss @1: 0.2796
Run finished, view it on the SigOpt dashboard at https://app.sigopt.com/run/432058
