## Experiment Setup

### Random seed / PyTorch / CUDA related

In [1]:
import time
import datetime
import os
import sys
import itertools

# Use Google Colab
use_colab = True

# Is this notebook running on Colab?
# If so, then google.colab package (github.com/googlecolab/colabtools)
# should be available in this environment

# Previous version used importlib, but we could do the same thing with
# just attempting to import google.colab
try:
    from google.colab import drive
    colab_available = True
except:
    colab_available = False

if use_colab and colab_available:
    drive.mount('/content/drive')
    
    # If there's a package I need to install separately, do it here
    !pip install pyro-ppl

    # cd to the appropriate working directory under my Google Drive
    %cd 'drive/My Drive/Colab Notebooks/bayesian-dl-experiments'
    
    # List the directory contents
    !ls

# IPython reloading magic
%load_ext autoreload
%autoreload 2

# Random seeds
# Based on https://pytorch.org/docs/stable/notes/randomness.html
random_seed = 682

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
Collecting pyro-ppl
[?25l  Downloading https://files.pythonhosted.org/packages/07/9b/a8aa400ebb22a74ac00717ee897b9f179461d4f9052b348644aee36d1b87/pyro_ppl-1.0.0-py3-none-any.whl (404kB)
[K     |████████████████████████████████| 409kB 9.6MB/s 
[?25hCollecting tqdm>=4.36
[?25l  Downloading https://files.pythonhosted.org/packages/bb/62/6f823501b3bf2bac242bd3c320b592ad1516b3081d82c77c1d813f076856/tqdm-4.39.0-py2.py3-none-any.whl (53kB)
[K     |███████████████

/content/drive/My Drive/Colab Notebooks/bayesian-dl-experiments
datasets_files			 experiment_nn_capacity_1.ipynb  ronald_bdl
experiment_comparison_toy.ipynb  LICENSE			 test_results
experiment_convergence_1.ipynb	 README.md


### Third party libraries (NumPy, PyTorch, Pyro)

In [2]:
# Third party libraries import
import numpy as np
import torch
import pyro
import matplotlib.pyplot as plt

# Print version information
print("Python Version: " + sys.version)
print("NumPy Version: " + np.__version__)
print("PyTorch Version: " + torch.__version__)
print("Pyro Version: " + pyro.__version__)

Python Version: 3.6.8 (default, Oct  7 2019, 12:59:55) 
[GCC 8.3.0]
NumPy Version: 1.17.4
PyTorch Version: 1.3.1
Pyro Version: 1.0.0


In [3]:
# More imports...
from torch import nn, optim
from torch.utils.data import random_split, DataLoader, RandomSampler

# Import model and dataset classes from ronald_bdl
from ronald_bdl import models, datasets

# pyplot setting
%matplotlib inline

# torch.device / CUDA Setup
use_cuda = True

if use_cuda and torch.cuda.is_available():
    torch_device = torch.device('cuda')

    torch.backends.cudnn.deterministic = True
    
    # Disable 'benchmark' mode
    # Note: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936
    torch.backends.cudnn.benchmark = False
    use_pin_memory = True # Faster Host to GPU copies with page-locked memory

    # CUDA libraries version information
    print("CUDA Version: " + str(torch.version.cuda))
    print("cuDNN Version: " + str(torch.backends.cudnn.version()))
    print("CUDA Device Name: " + str(torch.cuda.get_device_name()))
    print("CUDA Capabilities: "+ str(torch.cuda.get_device_capability()))
else:
    torch_device = torch.device('cpu')
    use_pin_memory = False

CUDA Version: 10.1.243
cuDNN Version: 7603
CUDA Device Name: Tesla P100-PCIE-16GB
CUDA Capabilities: (6, 0)


### Variable settings

In [0]:
# Dataset to use
dataset_name = 'yacht'

# Training set size
dataset_train_size = 0.8

# Number of dataset splits
n_splits = 10

# Epochs
n_epochs = [40, 400, 4000, 40000]

# Data batch sizes
n_training_batch = 128

# Set the proportion of the dataset to be available as a whole
subset_proportions = [1.0]

# NN hyperparameters
network_hidden_dims = [25, 50, 100]
network_hidden_layers = [1, 3, 5]

# Dropout
network_dropout_rates = [0.01]

# Regularization strengths
regularization_strengths = [0.05]

# Number of test predictions (for each data point)
prediction_runs = [300]


### Training setup

In [0]:
# Mean Squared Error for loss function to minimize
objective = nn.MSELoss()

# Test start time
test_start_time = datetime.datetime.today().strftime('%Y%m%d%H%M')

## Train the network

In [0]:
for subset_prop, hidden_dim, n_hidden, dropout_rate, reg_strength, n_predictions, n_epoch in itertools.product(
    subset_proportions,
    network_hidden_dims, network_hidden_layers,
    network_dropout_rates, regularization_strengths,
    prediction_runs, n_epochs,
):
    print(
    "subset %f, n_hidden %d, hidden_dim %d, dropout_rate %f, reg_strength %f"
    % (subset_prop, n_hidden, hidden_dim, dropout_rate, reg_strength))
    
    print("n_epoch %d" % n_epoch)
    print()

    # Create directory to store results for the current test configuration
    test_results_path = os.path.join(
        './test_results',
        'convergence_1',
        dataset_name,
        test_start_time,
        (
            str(subset_prop) 
            + '_' + str(hidden_dim) 
            + '_' + str(n_hidden) 
            + '_' + str(dropout_rate) 
            + '_' + str(reg_strength)
            + '_' + str(n_epoch)),
    )
    
    os.makedirs(test_results_path, exist_ok=True)
    
    test_results_rmse_mc_path = os.path.join(
        test_results_path,
        "rmse_mc.txt"
    )
    
    test_results_lls_mc_path = os.path.join(
        test_results_path,
        "lls_mc.txt"
    )

    # Prepare new subset of the original dataset
    subset = datasets.UCIDatasets(
        dataset_name, root_dir='./datasets_files', 
        limit_size=subset_prop, transform=None, download=True)

    # Determine sizes of training and testing set
    train_size = int(dataset_train_size * len(subset))
    test_size = len(subset) - train_size
    
    # Print the size of the subset
    print("subset size = " + str((len(subset), subset.n_features)))
    print("training set size = %d" % train_size)
    print("test set size = %d" % test_size)
    print()
    
    # Prepare multiple sets of random train-test splits 
    # to test the parameter combination
    subset_splits = []

    for _ in range(n_splits):
        train, test = random_split(subset, lengths=[train_size, test_size])
        subset_splits.append((train, test))

    # Try learning with different splits
    for s, (train, test) in enumerate(subset_splits):
        print('Training with split %d' % s)

        train_loader = DataLoader(train, batch_size=n_training_batch, pin_memory=use_pin_memory)

        # Prepare network
        network = models.FCNetMCDropout(
          input_dim=subset.n_features, 
          output_dim=subset.n_targets,
          hidden_dim=hidden_dim,
          n_hidden=n_hidden,
          dropout_rate=dropout_rate,
          dropout_type='bernoulli',
        )

        # Send the whole model to the selected torch.device
        network.to(torch_device)

        # Model to train mode
        network.train()

        # Adam optimizer
        # https://pytorch.org/docs/stable/optim.html?highlight=adam#torch.optim.Adam
        # NOTE: Need to set L2 regularization from here
        optimizer = optim.Adam(
            network.parameters(),
            lr=0.01,
            weight_decay=reg_strength, # L2 regularization
        )

        """
        Training
        """

        # Record training start time (for this split)
        tic = time.time()

        for epoch in range(n_epoch): # loop over the dataset multiple times
            # Mini-batches
            for data in train_loader:
                # get the inputs; data is a list of [inputs, labels]
                inputs, targets = data

                # Store the batch to torch_device's memory
                inputs = inputs.to(torch_device)
                targets = targets.to(torch_device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = network(inputs)

                loss = objective(outputs, targets)
                loss.backward()

                optimizer.step()

        # Record training end time
        toc = time.time()

        # Report the final loss
        print("final loss = %f" % (loss.item()))

        """
        Testing
        """

        # Model to eval mode
        network.eval()

        # Get the test data
        inputs, targets = test.dataset[test.indices]

        # Store the batch to torch_device's memory
        inputs = inputs.to(torch_device)
        targets = targets.to(torch_device)

        # Record testing start time
        tic_testing = time.time()

        _, mean, var, metrics = network.predict_dist(
            inputs, n_predictions, y_test=targets, reg_strength=reg_strength)
        
        # Record testing end time
        toc_testing = time.time()

        # store additional metrics
        if len(metrics) > 0:
            for key, value in metrics.items():
                print(str(key) + " = " + str(value))

                if key == 'rmse_mc':
                    with open(test_results_rmse_mc_path, 'a+') as rmse_mc_file:
                        rmse_mc_file.write('%d %f \n' % (s, value))

                elif key == 'test_ll_mc':
                    with open(test_results_lls_mc_path, 'a+') as lls_mc_file:
                        lls_mc_file.write('%d %f \n' % (s, value))
                        
        # Report the total training time
        print("training time = " + str(toc - tic) + " seconds")

        # Report the total testing time
        print("testing time = " + str(toc_testing - tic_testing) + " seconds")

        print()

subset 1.000000, n_hidden 1, hidden_dim 25, dropout_rate 0.010000, reg_strength 0.050000
n_epoch 40



0it [00:00, ?it/s]

Downloading http://archive.ics.uci.edu/ml/machine-learning-databases/00243/yacht_hydrodynamics.data to ./datasets_files/yacht/yacht_hydrodynamics.data


16384it [00:00, 35135.24it/s]            


subset size = (308, 6)
training set size = 246
test set size = 62

Training with split 0
final loss = 252.278519
rmse_mc = tensor(14.8029, device='cuda:0', grad_fn=<SqrtBackward>)
rmse_non_mc = tensor(14.8018, device='cuda:0', grad_fn=<SqrtBackward>)
test_ll_mc = tensor(-7.8493, device='cuda:0', grad_fn=<MeanBackward0>)
training time = 0.6883201599121094 seconds
testing time = 0.12498617172241211 seconds

Training with split 1
final loss = 245.172470
rmse_mc = tensor(13.6397, device='cuda:0', grad_fn=<SqrtBackward>)
rmse_non_mc = tensor(13.6373, device='cuda:0', grad_fn=<SqrtBackward>)
test_ll_mc = tensor(-7.0218, device='cuda:0', grad_fn=<MeanBackward0>)
training time = 0.5279178619384766 seconds
testing time = 0.0707998275756836 seconds

Training with split 2
final loss = 203.990540
rmse_mc = tensor(14.4400, device='cuda:0', grad_fn=<SqrtBackward>)
rmse_non_mc = tensor(14.4368, device='cuda:0', grad_fn=<SqrtBackward>)
test_ll_mc = tensor(-7.5749, device='cuda:0', grad_fn=<MeanBackwar