## Experiment Setup

### Random seed / PyTorch / CUDA related

In [1]:
import torch
import numpy as np

# Google Colab-only setup. No need to run this cell in other environments.
use_colab = False

if use_colab:
    # Mount my Google Drive root folder
    from google.colab import drive
    drive.mount('/content/drive')

    # cd to bayesian-dl-experiments directory
    %cd 'drive/My Drive/Colab Notebooks/bayesian-dl-experiments'
    !ls

# IPython reloading magic
%load_ext autoreload
%autoreload 2

# Random seeds
# Based on https://pytorch.org/docs/stable/notes/randomness.html
torch.manual_seed(682)
np.random.seed(682)

# torch.device / CUDA Setup
use_cuda = True

if use_cuda and torch.cuda.is_available():
    torch_device = torch.device('cuda')
    torch.backends.cudnn.deterministic = True
    # Note: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936
    torch.backends.cudnn.benchmark = False
    use_pin_memory=True # Faster Host to GPU copies with page-locked memory
else:
    torch_device = torch.device('cpu')
    use_pin_memory=False

### Variable settings

In [2]:
# Dataset to use
dataset_name = 'naval-propulsion-plant' #@param ["yacht", "naval-propulsion-plant"]

# Training set size
dataset_train_size = 0.8 #@param {type:"slider", min:0.1, max:0.9, step:0.05}

# L2 regularization strength
reg_strength = 0.01 #@param {type:"slider", min:0, max:1.0, step:0.05}

# Epochs
n_epochs = 4000 #@param {type:"integer"}

# Number of different data splits to try
n_splits = 20 #@param {type:"integer"}

# Data batch sizes
n_training_batch = 128 #@param {type:"integer"}

# Number of test predictions (for each data point)
n_predictions = 10000 #@param {type:"integer"}


### Training setup

In [3]:
import time
from itertools import product

from torch import nn, optim
from torch.utils.data import random_split, DataLoader, RandomSampler
from ronald_bdl import models, datasets

# Set the training/test set sizes
subset_proportions = np.arange(0.1, 1.1, 0.1)

# NN hyperparameters
network_hidden_dim_candidates = np.arange(10, 100+10, 10)
network_hidden_layer_candidates = np.arange(1, 5+1, 1)

# Mean Squared Error for loss function to minimize
objective = nn.MSELoss()

rmse_non_mc, rmse_mc, test_lls_mc = [], [], []

## Train the network

In [None]:
for subset_prop, hidden_dim, n_hidden in zip(
    subset_proportions, 
    network_hidden_dim_candidates, network_hidden_layer_candidates):

    # Prepare new subset of the original dataset
    subset = datasets.UCIDatasets(
        dataset_name, root_dir='./datasets_files', 
        limit_size=subset_prop, transform=None, download=True, pin_memory=use_pin_memory)

    # Determine sizes of training and testing set
    train_size = int(dataset_train_size * len(subset))
    test_size = len(subset) - train_size
    
    # Print the size of the subset
    print("subset size = " + str((len(subset), subset.n_features)))
    print("training set size = %d" % train_size)
    print("test set size = %d" % test_size)
    
    train, test = random_split(subset, lengths=[train_size, test_size])
    train_loader = DataLoader(train, batch_size=n_training_batch, pin_memory=use_pin_memory)

    # Prepare network
    network = models.FCNetMCDropout(
        input_dim=subset.n_features, 
        output_dim=subset.n_targets,
        hidden_dim=hidden_dim,
        n_hidden=n_hidden,
        dropout_rate=0.01,
        dropout_type='bernoulli'
    )
    
    # Send the whole model to the selected torch.device
    network.to(torch_device)

    # Print the network structure
    print(network)
    
    # Model to train mode
    network.train()
    
    # Adam optimizer
    # https://pytorch.org/docs/stable/optim.html?highlight=adam#torch.optim.Adam
    # NOTE: Need to set L2 regularization from here
    optimizer = optim.Adam(
        network.parameters(),
        lr=0.01,
        weight_decay=reg_strength, # L2 regularization
    )
    
    """
    Training
    """

    print("Starting subset %f, n_hidden %d, hidden_dim %d" % (subset_prop, n_hidden, hidden_dim))

    # Record training start time (for this split)
    tic = time.time()

    for epoch in range(n_epochs): # loop over the dataset multiple times

        for i, data in enumerate(train_loader):
            # get the inputs; data is a list of [inputs, labels]
            inputs, targets = data

            # Store the batch to torch_device's memory
            inputs = inputs.to(torch_device)
            targets = targets.to(torch_device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = network(inputs)

            loss = objective(outputs, targets)
            loss.backward()

            optimizer.step()
            
    # Record training end time
    toc = time.time()

    # Report the final loss
    print("Finished subset %f, n_hidden %d, hidden_dim %d: final loss = %f" % (subset_prop, n_hidden, hidden_dim, loss.item()))

    """
    Testing
    """

    # Model to eval mode
    network.eval()

    # Get the test data
    inputs, targets = test.dataset[test.indices]

    # Store the batch to torch_device's memory
    inputs = inputs.to(torch_device)
    targets = targets.to(torch_device)

    # Record testing start time (for this split)
    tic_testing = time.time()    
    
    predictions, mean, var, metrics = network.mc_predict(inputs, n_predictions,
                                                         y_test=targets, reg_strength=reg_strength)

    # Record testing end time
    toc_testing = time.time()    
    
    """
    Print results
    """
    print()
    print("Running split subset %f, n_hidden %d, hidden_dim %d test: "% (subset_prop, n_hidden, hidden_dim))
    print("Mean = " + str(mean))
    print("Variance = " + str(var))

    # Print and store additional metrics
    if len(metrics) > 0:
        for key, value in metrics.items():
            print(str(key) + " = " + str(value))

            if key == 'rmse_mc': rmse_mc.append(value.item())
            elif key == 'rmse_non_mc': rmse_non_mc.append(value.item())
            elif key == 'test_ll_mc': test_lls_mc.append(value.item())

    # Report the total training time
    print("training time = " + str(toc - tic) + " seconds")
    
    # Report the total testing time
    print("testing time = " + str(toc_testing - tic_testing) + " seconds")
    print()

Using downloaded and verified file: ./datasets_files/naval-propulsion-plant/data.txt
subset size = (1193, 16)
training set size = 954
test set size = 239
FCNetMCDropout(
  (input): Linear(in_features=16, out_features=10, bias=True)
  (hidden_layers): ModuleList(
    (0): Linear(in_features=10, out_features=10, bias=True)
  )
  (output): Linear(in_features=10, out_features=1, bias=True)
)
Starting subset 0.100000, n_hidden 1, hidden_dim 10


### Print statistics

In [None]:
# Copied from DropoutUncertaintyExps repo
print('non-MC RMSE %f +- %f (stddev) +- %f (std error), median %f 25p %f 75p %f \n' % (
        np.mean(rmse_non_mc), np.std(rmse_non_mc), np.std(rmse_non_mc)/np.sqrt(n_splits),
        np.percentile(rmse_non_mc, 50), np.percentile(rmse_non_mc, 25), np.percentile(rmse_non_mc, 75)))

print('MC RMSE %f +- %f (stddev) +- %f (std error), median %f 25p %f 75p %f \n' % (
        np.mean(rmse_mc), np.std(rmse_mc), np.std(rmse_mc)/np.sqrt(n_splits),
        np.percentile(rmse_mc, 50), np.percentile(rmse_mc, 25), np.percentile(rmse_mc, 75)))

print('MC Test Log-likelihood %f +- %f (stddev) +- %f (std error), median %f 25p %f 75p %f \n' % (
        np.mean(test_lls_mc), np.std(test_lls_mc), np.std(test_lls_mc)/np.sqrt(n_splits), 
        np.percentile(test_lls_mc, 50), np.percentile(test_lls_mc, 25), np.percentile(test_lls_mc, 75)))