# This notebook is a tutorial on NetDebugger
Author: Rishi Gurnani, Georgia Institute of Technology<br />
Creation Date: July 21, 2021 4:54 PM EST

# Import
Some python packages are needed to run this notebook. We import all of those below.

In [2]:
import pandas as pd
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from torch import tensor, cuda, manual_seed, zeros, nn, optim, reshape
from torch import float as torch_float
from torch_geometric.data import DataLoader
from torch import device as torch_device
import torch.nn.functional as F
import random
from sklearn.model_selection import train_test_split
from torch_geometric.data import Data

In [3]:
from nndebugger import constants, loss, dl_debug
from nndebugger import torch_utils as utils

# TODO For Rishi before publishing notebook:

1. Consider using `trainer` function for all tests in `dl_debug`
1. Remove all 'importlib' statements
1. Run all cells and verify that the outputs are what you expected
1. Try using all the polymers for "chart dependencies" instead of a small sample
1. Include "visualize large training batch"?
1. Delete this cell
1. Delete the cell below

# TODO for Rishi on presentation:
1. Describe GNN?

# Fix random seeds to ensure reproducible results

In [4]:
random.seed(constants.RANDOM_SEED)
manual_seed(constants.RANDOM_SEED)
np.random.seed(constants.RANDOM_SEED)

# Load data set 

In [5]:
data_df = pd.read_csv('data/export.csv',index_col=0)
data_df.head()

Unnamed: 0,smiles,property,value
822,[*]C[*],Egc,6.8972
823,[*]CC([*])C,Egc,6.5196
824,[*]CC([*])CC,Egc,6.517
825,[*]CC([*])CCC,Egc,6.7336
826,[*]CC([*])CC(C)C,Egc,6.7394


# Featurize data set

In [6]:
N_FEATURES = 512
N_DATA = len(data_df)

def featurize_smiles(smile):
    smile = smile.replace('*', 'H')
    mol = Chem.MolFromSmiles(smile)
    fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius=2, nBits=N_FEATURES, useChirality=True)
    return np.array(fp)

feature_array = np.zeros((N_DATA, N_FEATURES))

ind = 0
for smiles in data_df.smiles.values:
    feature_array[ind,:] = featurize_smiles(smiles)
    ind += 1

# Prepare inputs for DebugSession

In [7]:
# bug free processing pipeline!
model_type = 'mlp'
# data_set
n_test = int(np.floor(N_DATA*constants.TRAIN_FRAC))
n_train = N_DATA - n_test
(X_train, X_test, label_train, 
label_test) = train_test_split(
                                    feature_array,
                                    data_df.value.values.tolist(),
                                    test_size=n_test,
                                    shuffle=True,
                                    random_state=constants.RANDOM_SEED
                                )

train_X = [Data(x=tensor(X_train[ind,:], dtype=torch_float).view(1,N_FEATURES),
                y=tensor(label_train[ind], dtype=torch_float)
            ) 
            for ind in range(n_train)]
zero_data_set = [Data(x=zeros((1,N_FEATURES)), y=x.y) for x in train_X]
data_set = {}
data_set['train'] = train_X
loss_fn = loss.st_loss()
target_mean = np.mean(label_train)
epsilon = constants.DL_DBG_OVERFIT_EPS_RATIO*(target_mean)
device = torch_device('cuda' if cuda.is_available() else 'cpu')

# Write a logical architecture that will pass all test cases

In [8]:
class FFNet(nn.Module):
    
    def __init__(self, input_dim, output_dim, capacity):

        super(FFNet,self).__init__()
        self.layers = nn.ModuleList()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.n_hidden = capacity
        unit_sequence = utils.unit_sequence(self.input_dim, 
                                            self.output_dim, 
                                            self.n_hidden)
        self.relu = nn.ReLU()
        # set up hidden layers
        for ind,n_units in enumerate(unit_sequence[:-2]):
            size_out_ = unit_sequence[ind+1]
            layer = nn.Linear(n_units, size_out_)
            self.layers.append(layer)

        # set up output layer
        size_in_ = unit_sequence[-2]
        size_out_ = unit_sequence[-1]
        layer = nn.Linear(size_in_, size_out_)
        self.layers.append(layer)
    
    def forward(self, data):
        x = data.x
        for i in range(len(self.layers)):
            x = self.layers[i](x)
            if i < (self.n_hidden - 1):
                x = self.relu(x)
   
        return x.view(data.num_graphs,)

# a list of models that are bug free!
capacity_ls = [1,2,3]
correct_model_class_ls = [lambda : FFNet(N_FEATURES, 1, capacity) for capacity in
                          capacity_ls]

# Test output shape

The shape of the model output should match the shape of the labels.

In [9]:
# this cell should pass since it uses a bug-free model

ds = dl_debug.DebugSession(model_type, correct_model_class_ls, capacity_ls, data_set, zero_data_set, loss_fn, epsilon,
                 device, do_test_output_shape=True)
ds.main()

Training data contains 676 points


Verified that shape of model predictions is equal to shape of labels


Debug session complete.


In [10]:
# buggy model. Can you spot the bug?

class BuggyNet(nn.Module):
    
    def __init__(self, input_dim, output_dim, capacity):

        super(BuggyNet,self).__init__()
        self.layers = nn.ModuleList()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.n_hidden = capacity
        unit_sequence = utils.unit_sequence(self.input_dim, 
                                            self.output_dim, 
                                            self.n_hidden)
        self.relu = nn.ReLU()
        # set up hidden layers
        for ind,n_units in enumerate(unit_sequence[:-2]):
            size_out_ = unit_sequence[ind+1]
            layer = nn.Linear(n_units, size_out_)
            self.layers.append(layer)

        # set up output layer
        size_in_ = unit_sequence[-2]
        size_out_ = unit_sequence[-1]
        layer = nn.Linear(size_in_, size_out_)
        self.layers.append(layer)
    
    def forward(self, data):
        x = data.x
        for i in range(len(self.layers)):
            x = self.layers[i](x)
            if i < (self.n_hidden - 1):
                x = self.relu(x)
   
        return x # Spoiler! The bug is here.

# a list of models that are buggy
capacity_ls = [1,2,3]
buggy_model_class_ls = [lambda : BuggyNet(N_FEATURES, 1, capacity) for capacity in
                          capacity_ls]

In [11]:
# this cell should NOT pass since it uses a buggy model 

ds = dl_debug.DebugSession(model_type, buggy_model_class_ls, capacity_ls, data_set, zero_data_set, loss_fn, epsilon,
                 device, do_test_output_shape=True)
ds.main()

Training data contains 676 points



  return F.mse_loss(input, target, reduction=self.reduction)


AssertionError: The model output shape torch.Size([6, 1]) and label shape torch.Size([6]) are not the same

# Test input independent baseline
The loss of the model should be lower when real features are passed in than when zeroed features are passed in.

In [12]:
# trainer without bugs!

def trainer(model, data_set, batch_size, learning_rate, n_epochs, device, loss_obj):
    
    data_loader = DataLoader(data_set, batch_size=batch_size, shuffle=True)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate) # Adam optimization
    model.train() # set model to train mode
    loss_history = []
    for epoch in range(n_epochs):
        per_epoch_loss = 0
        for ind, data in enumerate(data_loader): # loop through training batches
            data = data.to(device) # send data to GPU, if available
            optimizer.zero_grad() # zero the gradients
            output = model(data) # perform forward pass
            loss = loss_obj(output, data) # compute loss
            per_epoch_loss += loss.detach().cpu().numpy()
            loss.backward() # perform backward pass
            optimizer.step() # update weights
        loss_history.append(per_epoch_loss)
    
    return loss_history

In [12]:
# this test should pass since we are using a trainer without bugs

ds = dl_debug.DebugSession(model_type, correct_model_class_ls, capacity_ls, data_set, zero_data_set, loss_fn, epsilon,
                 device, do_test_input_independent_baseline=True, trainer=trainer)
ds.main()

Training data contains 676 points


Checking input-independent baseline
..last epoch real_data_loss 0.22421006858348846
..last epoch zero_data_loss 14.373095989227295
Input-independent baseline is verified


Debug session complete.


In [13]:
# trainer with bugs! Can you spot the bug?

def buggy_trainer(model, data_set, batch_size, learning_rate, n_epochs, device, loss_obj):
    
    data_loader = DataLoader(data_set, batch_size=batch_size, shuffle=True)
    optimizer = optim.Adam(model.parameters(), lr=learning_rate) # Adam optimization
    model.train() # set model to train mode
    loss_history = []
    for epoch in range(n_epochs):
        per_epoch_loss = 0
        for ind, data in enumerate(data_loader): # loop through training batches
            data = data.to(device) # send data to GPU, if available
            optimizer.zero_grad() # zero the gradients
            output = model(data) # perform forward pass
            loss = loss_obj(output, data) # compute loss
            per_epoch_loss += loss.detach().cpu().numpy()
            optimizer.step() # update weights
        loss_history.append(per_epoch_loss)
    
    return loss_history

# Spoiler! The bug is that there is no backward pass being performed!

In [14]:
import importlib
importlib.reload(dl_debug)

<module 'nndebugger.dl_debug' from '/data/rgur/nndebugger/nndebugger/dl_debug.py'>

In [15]:
# this test should NOT pass since we are using a buggy trainer

ds = dl_debug.DebugSession(model_type, correct_model_class_ls, capacity_ls, data_set, zero_data_set, loss_fn, epsilon,
                 device, do_test_input_independent_baseline=True, trainer=buggy_trainer)
ds.main()

Training data contains 676 points


Checking input-independent baseline
..last epoch real_data_loss 137.30295944213867
..last epoch zero_data_loss 135.52406883239746


ValueError: The loss of zeroed inputs is nearly the same as the loss of
                    real inputs. This may indicate that your model is not learning anything
                    during training. Check your trainer function and your model architecture.

# Overfit small batch
If you hope to learn a good map on your whole data set using model archicture ***A***, then ***A*** should have enough capacity to completely overfit a small batch of the data set.

In [16]:
import importlib
importlib.reload(constants)
importlib.reload(dl_debug)

<module 'nndebugger.dl_debug' from '/data/rgur/nndebugger/nndebugger/dl_debug.py'>

In [17]:
# this test should pass since we are using a good model

ds = dl_debug.DebugSession(model_type, correct_model_class_ls, capacity_ls, data_set, zero_data_set, loss_fn, epsilon,
                 device, do_test_overfit_small_batch=True, trainer=trainer)
ds.main()

Training data contains 676 points


Checking if a small batch can be overfit
epsilon is 0.22499709319526628
..Epoch 0
....Loss: 5.080452065966185
....Outputs -0.0215 -0.0118 -0.0215 -0.0048 -0.0261
....Labels  5.6991 2.9694 5.7012 5.3739 5.0497
..Epoch 1
....Loss: 5.013669021778016
....Outputs 0.0494 0.0489 0.0772 0.0489 0.0267
....Labels  2.9694 5.7012 5.3739 5.6991 5.0497
..Epoch 2
....Loss: 4.939931536366104
....Outputs 0.1281 0.1659 0.0844 0.1281 0.1160
....Labels  5.7012 5.3739 5.0497 5.6991 2.9694
..Epoch 3
....Loss: 4.853417574776702
....Outputs 0.2705 0.1530 0.1956 0.2201 0.2201
....Labels  5.3739 5.0497 2.9694 5.6991 5.7012
..Epoch 4
....Loss: 4.747480226046666
....Outputs 0.2944 0.3309 0.4002 0.3309 0.2392
....Labels  2.9694 5.7012 5.3739 5.6991 5.0497
..Epoch 5
....Loss: 4.614684483414844
....Outputs 0.5635 0.4170 0.3469 0.4701 0.4701
....Labels  5.3739 2.9694 5.0497 5.7012 5.6991
..Epoch 6
....Loss: 4.448245296205006
....Outputs 0.7672 0.6455 0.4823 0.6455 0.5710
....Labels

In [18]:
# buggy model. Can you spot the "bug"?

class BuggyNet(nn.Module):
    
    def __init__(self, input_dim, output_dim, capacity):

        super(BuggyNet,self).__init__()
        self.layers = nn.ModuleList()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.n_hidden = capacity
        unit_sequence = utils.unit_sequence(self.input_dim, 
                                            self.output_dim, 
                                            self.n_hidden)
        self.sigmoid = nn.Sigmoid() # Spoiler! The "bug" is here.
        # set up hidden layers
        for ind,n_units in enumerate(unit_sequence[:-2]):
            size_out_ = unit_sequence[ind+1]
            layer = nn.Linear(n_units, size_out_)
            self.layers.append(layer)

        # set up output layer
        size_in_ = unit_sequence[-2]
        size_out_ = unit_sequence[-1]
        layer = nn.Linear(size_in_, size_out_)
        self.layers.append(layer)
    
    def forward(self, data):
        x = data.x
        for i in range(len(self.layers)):
            x = self.layers[i](x)
            if i < (self.n_hidden - 1):
                x = self.sigmoid(x)
   
        return x.view(data.num_graphs,) 

# a list of models that are buggy
capacity_ls = [1,2,3]
buggy_model_class_ls = [lambda : BuggyNet(N_FEATURES, 1, capacity) for capacity in
                          capacity_ls]

In [19]:
# this test should not pass since we are using a buggy model

ds = dl_debug.DebugSession(model_type, buggy_model_class_ls, capacity_ls, data_set, zero_data_set, loss_fn, epsilon,
                 device, do_test_overfit_small_batch=True, trainer=trainer)
ds.main()

Training data contains 676 points


Checking if a small batch can be overfit
epsilon is 0.22499709319526628
..Epoch 0
....Loss: 4.8753192503638925
....Outputs 0.1923 0.1942 0.1926 0.1910 0.1910
....Labels  5.0497 2.9694 5.3739 5.6991 5.7012
..Epoch 1
....Loss: 4.553087323097695
....Outputs 0.5218 0.5235 0.5229 0.5208 0.5218
....Labels  5.7012 5.3739 2.9694 5.0497 5.6991
..Epoch 2
....Loss: 4.230841369280654
....Outputs 0.8527 0.8506 0.8557 0.8539 0.8539
....Labels  2.9694 5.0497 5.3739 5.6991 5.7012
..Epoch 3
....Loss: 3.9045361230395352
....Outputs 1.1918 1.1918 1.1881 1.1859 1.1938
....Labels  5.7012 5.6991 2.9694 5.0497 5.3739
..Epoch 4
....Loss: 3.5711887878576944
....Outputs 1.5302 1.5414 1.5391 1.5391 1.5327
....Labels  5.0497 5.3739 5.7012 5.6991 2.9694
..Epoch 5
....Loss: 3.22896163453589
....Outputs 1.8865 1.8989 1.9013 1.8989 1.8894
....Labels  5.0497 5.6991 5.3739 5.7012 2.9694
..Epoch 6
....Loss: 2.877508313177779
....Outputs 2.2564 2.2728 2.2754 2.2728 2.2598
....Labels  5

ValueError: Error: Your model was not able to overfit a small batch 
                               of data. The minimum RMSE over 100 epochs was not less than 0.22499709319526628

# Visualize predictions of a large batch as a function of epoch
There should not be a large jump in predicted value between epochs (except, perhaps, in the first few epochs). However, predictions should not stay constant between epochs either.

# Chart Dependencies
The `forward` method should not pass information along the batch dimension.

![image info](./images/graph.png)

![image info](./images/graphnet.png)

![image info](./images/graph_batch.png)

In [20]:
# data to illustrate the point

import importlib
importlib.reload(dl_debug)
np.random.seed(constants.RANDOM_SEED)
polymer_indices = data_df.sample(n=4).index
polymer_smiles = data_df.loc[polymer_indices, 'smiles'].values.tolist()
polymer_smiles

['[*]C(C#N)=C([*])c1ccccc1',
 '[*]CCCCOC(=O)C(=O)O[*]',
 '[*]CC(CCl)(CCl)C(=O)O[*]',
 '[*]c1[nH]c([*])c(C(=O)O)c1C']

In [21]:
feature_dict = {'C': np.array([1,0,0,0]),
    'O': np.array([0,1,0,0]),
    'N': np.array([0,0,1,0]),
    'Cl': np.array([0,0,0,1])
}
N_FEATURES_ = len(feature_dict)
N_DATA_ = len(polymer_smiles)
MAX_N_ATOMS = max([Chem.MolFromSmiles(smile).GetNumAtoms() for smile in polymer_smiles])
PROJECTOR_DIM = 100

def featurize_smiles_by_atom(smile):
    smile = smile.replace('*', 'H')
    mol = Chem.MolFromSmiles(smile)
    features = np.zeros((MAX_N_ATOMS, N_FEATURES_))
    for ind,atom in enumerate(mol.GetAtoms()):
        atom_feature = feature_dict[atom.GetSymbol()]
        features[ind, :] = atom_feature

    return features

# feature_array = np.zeros((N_DATA_, MAX_N_ATOMS, N_FEATURES_))
labels = data_df.loc[polymer_indices, 'value'].values
# for ind, smiles in enumerate(polymer_smiles):
#     feature_array[ind, ].append(featurize_smiles_by_atom(smiles))

train_X_ = [Data(x=tensor(featurize_smiles_by_atom(polymer_smiles[ind]), dtype=torch_float),
                    y=tensor(labels[ind], dtype=torch_float)
            ) 
            for ind in range(N_DATA_)
]
# for smiles,data in zip(polymer_smiles,train_X_):
#     data.num_atoms = Chem.MolFromSmiles(smiles).GetNumAtoms()
data_set_ = {'train': train_X_}

In [22]:
[(smile, featurize_smiles_by_atom(smile)) for smile in polymer_smiles]

[('[*]C(C#N)=C([*])c1ccccc1',
  array([[1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [0., 0., 1., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]])),
 ('[*]CCCCOC(=O)C(=O)O[*]',
  array([[1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [0., 1., 0., 0.],
         [1., 0., 0., 0.],
         [0., 1., 0., 0.],
         [1., 0., 0., 0.],
         [0., 1., 0., 0.],
         [0., 1., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]])),
 ('[*]CC(CCl)(CCl)C(=O)O[*]',
  array([[1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [1., 0., 0., 0.],
         [0., 0., 0., 1.],
         [1., 0., 0., 0.],
         [0., 0., 0., 1.],
         [1., 0., 0., 0.],
         [0., 1., 0., 0.],
         [0., 1., 0., 0.],
         [0., 0

In [23]:
class GraphNet(nn.Module):
    
    def __init__(self, input_dim, output_dim, capacity):

        super(GraphNet,self).__init__()
        self.layers = nn.ModuleList()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.n_hidden = capacity
        unit_sequence = utils.unit_sequence(self.input_dim, 
                                            self.output_dim, 
                                            self.n_hidden)
        self.node_projector = nn.Linear(N_FEATURES_, PROJECTOR_DIM)
        self.relu = nn.ReLU()
        # set up hidden layers
        for ind,n_units in enumerate(unit_sequence[:-2]):
            size_out_ = unit_sequence[ind+1]
            layer = nn.Linear(n_units, size_out_)
            self.layers.append(layer)

        # set up output layer
        size_in_ = unit_sequence[-2]
        size_out_ = unit_sequence[-1]
        layer = nn.Linear(size_in_, size_out_)
        self.layers.append(layer)
    
    def forward(self, data):
        x = data.x
        x = x.view(data.num_graphs, MAX_N_ATOMS, N_FEATURES_)
        x = self.node_projector(x)
        x_mean = x.mean(dim=1)
        x = x - x_mean[:, None, :] # make use of broadcasting
        x = x.sum(dim=1)
        for i in range(len(self.layers)):
            x = self.layers[i](x)
            if i < (self.n_hidden - 1):
                x = self.relu(x)
   
        return x.view(data.num_graphs,)

# a list of models that are bug free!
capacity_ls = [1,2,3]
correct_graphnet_class_ls = [lambda : GraphNet(PROJECTOR_DIM, 1, capacity) for capacity in
                          capacity_ls]

In [24]:
# this test should pass since we are using a bug-free model

ds = dl_debug.DebugSession('gnn', correct_graphnet_class_ls, capacity_ls, data_set_, zero_data_set, loss_fn, epsilon,
                 device, do_chart_dependencies=True)
ds.main()

Training data contains 4 points


Beginning to chart dependencies
..Epoch 0
....Outputs 0.0636 0.0636 0.0636 0.0636
....Labels  4.3452 5.0922 6.5510 3.2017
....Loss: 0.06361331045627594
Finished charting dependencies. Data is not getting passed along the batch dimension.


Debug session complete.


In [25]:
# this is a buggy model. Can you spot the bugs?

class BuggyGraphNet(nn.Module):
    
    def __init__(self, input_dim, output_dim, capacity):

        super(BuggyGraphNet,self).__init__()
        self.layers = nn.ModuleList()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.n_hidden = capacity
        unit_sequence = utils.unit_sequence(self.input_dim, 
                                            self.output_dim, 
                                            self.n_hidden)
        self.node_projector = nn.Linear(N_FEATURES_, PROJECTOR_DIM)
        self.relu = nn.ReLU()
        # set up hidden layers
        for ind,n_units in enumerate(unit_sequence[:-2]):
            size_out_ = unit_sequence[ind+1]
            layer = nn.Linear(n_units, size_out_)
            self.layers.append(layer)

        # set up output layer
        size_in_ = unit_sequence[-2]
        size_out_ = unit_sequence[-1]
        layer = nn.Linear(size_in_, size_out_)
        self.layers.append(layer)
    
    def forward(self, data):
        x = data.x
        x = x.view(data.num_graphs, MAX_N_ATOMS, N_FEATURES_)
        x = self.node_projector(x)
        x_mean = x.mean(dim=0)
        x = x - x_mean[None, :, :] # make use of broadcasting
        x = x.sum(dim=1)
        for i in range(len(self.layers)):
            x = self.layers[i](x)
            if i < (self.n_hidden - 1):
                x = self.relu(x)
   
        return x.view(data.num_graphs,)

# a list of models that are bug free!
capacity_ls = [1,2,3]
buggy_graphnet_class_ls = [lambda : BuggyGraphNet(PROJECTOR_DIM, 1, capacity) for capacity in
                          capacity_ls]

In [26]:
# this test should not pass since we are using a buggy model

ds = dl_debug.DebugSession('gnn', buggy_graphnet_class_ls, capacity_ls, data_set_, zero_data_set, loss_fn, epsilon,
                 device, do_chart_dependencies=True)
best_model_capacity = ds.main()

Training data contains 4 points


Beginning to chart dependencies
..Epoch 0
....Outputs 0.2596 0.2020 0.2162 0.2014
....Labels  4.3452 5.0922 6.5510 3.2017
....Loss: 0.25957411527633667


ValueError: Data is getting passed along the batch dimension.

# Overfit training data & gradient check
The capacity of your architecture should be just large enough to overfit the training data. Also, the gradients should not equal zero before overfitting all training data.

In [15]:
import importlib
importlib.reload(utils)
importlib.reload(constants)

<module 'nndebugger.constants' from '/data/rgur/nndebugger/nndebugger/constants.py'>

In [16]:
ds = dl_debug.DebugSession(model_type, correct_model_class_ls, capacity_ls, data_set, zero_data_set, loss_fn, epsilon,
                 device, do_choose_model_size_by_overfit=True, trainer=trainer)
ds.main()

Training data contains 676 points


Beginning model size search

..Training model 0 


....Epoch 0
......[rmse] 4.728853643258016 [r2] -8.419611402272475
......Outputs -0.1223 -0.1223 -0.1161 -0.1223 -0.1182 -0.1232 -0.1385 -0.1240 -0.1106 -0.1229
......Labels  5.0497 5.6991 2.9694 5.7012 5.3739 2.8673 6.7030 5.1590 2.1188 3.1085
......[best rmse] 4.728853643258016 [best r2] -8.419611402272475

....Epoch 1
......[rmse] 3.7929112024626206 [r2] -5.059915824095616
......Outputs 0.3605 0.5941 0.4553 0.5941 0.5427 0.6862 0.4110 0.6120 0.4511 0.6152
......Labels  5.0497 5.6991 2.9694 5.7012 5.3739 2.8673 6.7030 5.1590 2.1188 3.1085
......[best rmse] 3.7929112024626206 [best r2] -5.059915824095616

....Epoch 2
......[rmse] 2.7049325653964638 [r2] -2.082010584696905
......Outputs 2.3631 3.8501 2.9555 3.8501 3.7417 4.7116 2.8184 3.8961 2.7980 4.0575
......Labels  5.0497 5.6991 2.9694 5.7012 5.3739 2.8673 6.7030 5.1590 2.1188 3.1085
......[best rmse] 2.7049325653964638 [best r2] -2.0820105846969

1

# Run all tests