## Experiment Setup

### Random seed / PyTorch / CUDA related

In [1]:
import time
import datetime
import os
import sys
import itertools

# Use Google Colab
use_colab = True

# Is this notebook running on Colab?
# If so, then google.colab package (github.com/googlecolab/colabtools)
# should be available in this environment

# Previous version used importlib, but we could do the same thing with
# just attempting to import google.colab
try:
    from google.colab import drive
    colab_available = True
except:
    colab_available = False

if use_colab and colab_available:
    drive.mount('/content/drive')
    
    # If there's a package I need to install separately, do it here
    !pip install pyro-ppl

    # cd to the appropriate working directory under my Google Drive
    %cd 'drive/My Drive/Colab Notebooks/bayesian-dl-experiments'
    
    # List the directory contents
    !ls

# IPython reloading magic
%load_ext autoreload
%autoreload 2

# Random seeds
# Based on https://pytorch.org/docs/stable/notes/randomness.html
random_seed = 682

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/My Drive/Colab Notebooks/bayesian-dl-experiments
datasets_files			 experiment_convergence_2.ipynb  README.md
experiment_comparison_toy.ipynb  experiment_nn_capacity_1.ipynb  ronald_bdl
experiment_convergence_1.ipynb	 LICENSE			 test_results


### Third party libraries (NumPy, PyTorch, Pyro)

In [2]:
# Third party libraries import
import numpy as np
import torch
import pyro
import matplotlib.pyplot as plt

# Print version information
print("Python Version: " + sys.version)
print("NumPy Version: " + np.__version__)
print("PyTorch Version: " + torch.__version__)
print("Pyro Version: " + pyro.__version__)

Python Version: 3.6.8 (default, Oct  7 2019, 12:59:55) 
[GCC 8.3.0]
NumPy Version: 1.17.4
PyTorch Version: 1.3.1
Pyro Version: 1.0.0


In [3]:
# More imports...
from torch import nn, optim
from torch.utils.data import random_split, DataLoader, RandomSampler
import torchvision
import torchvision.transforms as transforms
from pyro.infer import SVI, Trace_ELBO, HMC, MCMC

# Import model and dataset classes from ronald_bdl
from ronald_bdl import models, datasets

# pyplot setting
%matplotlib inline

# torch.device / CUDA Setup
use_cuda = True

if use_cuda and torch.cuda.is_available():
    torch_device = torch.device('cuda')

    torch.backends.cudnn.deterministic = True
    
    # Disable 'benchmark' mode
    # Note: https://discuss.pytorch.org/t/what-does-torch-backends-cudnn-benchmark-do/5936
    torch.backends.cudnn.benchmark = False
    use_pin_memory = True # Faster Host to GPU copies with page-locked memory

    # CUDA libraries version information
    print("CUDA Version: " + str(torch.version.cuda))
    print("cuDNN Version: " + str(torch.backends.cudnn.version()))
    print("CUDA Device Name: " + str(torch.cuda.get_device_name()))
    print("CUDA Capabilities: "+ str(torch.cuda.get_device_capability()))
else:
    torch_device = torch.device('cpu')
    use_pin_memory = False

CUDA Version: 10.1.243
cuDNN Version: 7603
CUDA Device Name: Tesla K80
CUDA Capabilities: (3, 7)


### Variable settings

#### Data prep

In [0]:
# CIFAR10 data transformation setting
transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# Set the proportion of the original dataset to be available as a whole
subset_proportions = [0.1, 0.5, 1]

# Proportion of the dataset to be used for training
dataset_train_size = 0.8

# Number of dataset splits
n_splits = 10

#### NN settings

In [0]:
# Dropout
dropout_rates = [0.01]

# Regularization strengths
reg_strengths = [0.0005]


### Training setup

In [0]:
# Epochs
n_epochs = [10, 100, 1000]

# Optimizer learning rate
learning_rate = 0.001 # PyTorch default value is 0.001

# Training data batch sizes
n_training_batch = 128

# Number of test predictions (for each data point)
n_prediction = 10

# Cross Entropy to minimize
objective = nn.CrossEntropyLoss()

# Test start time
test_start_time = datetime.datetime.today().strftime('%Y%m%d%H%M')

## Train the network

In [0]:
for subset_prop, dropout_rate, reg_strength, n_epoch in itertools.product(
    subset_proportions,
    dropout_rates, reg_strengths, 
    n_epochs,
):  
    # Reset the random number generator for each method (to produce identical results)
    torch.manual_seed(random_seed)
    np.random.seed(random_seed)
    pyro.set_rng_seed(random_seed)

    # Print parameter combinations being tested
    print(
        "subset %f, dropout_rate %f, reg_strength %f"
        % (subset_prop, dropout_rate, reg_strength))

    print("n_epoch %d" % n_epoch)
    print()

    """
    Results file storage
    """

    # Create directory to store results for the current test configuration
    test_results_path = os.path.join(
        './test_results',
        'error_convergence_2',
        'CIFAR-10',
        test_start_time,
        (
            str(subset_prop)
            + '_' + str(dropout_rate) 
            + '_' + str(reg_strength)
            + '_' + str(n_epoch)),
    )

    os.makedirs(test_results_path, exist_ok=True)

    test_results_accuracy_mc_path = os.path.join(
        test_results_path,
        "accuracy_mc.txt"
    )

    test_results_accuracy_non_mc_path = os.path.join(
        test_results_path,
        "accuracy_non_mc.txt"
    )    

    """
    Dataset multiple splits prep
    """
    # Prepare new subset of the original dataset
    subset = datasets.CIFAR10(
        root='./datasets_files', limit_size=subset_prop, transform=transform, download=True)

    # Determine sizes of training and testing set
    train_size = int(dataset_train_size * len(subset))
    test_size = len(subset) - train_size

    # Print the size of the subset
    print("subset size = " + str(subset.data.shape))
    print("training set size = %d" % train_size)
    print("test set size = %d" % test_size)
    print()

    # Prepare multiple sets of random train-test splits 
    # to test the parameter combination
    subset_splits = []

    for _ in range(n_splits):
        train, test = random_split(subset, lengths=[train_size, test_size])
        subset_splits.append((train, test))

    """
    Training & testing
    """

    # Try learning with different splits
    for s, (train, test) in enumerate(subset_splits):

        """
        Training
        """

        print('Training with split %d' % s)

        train_loader = DataLoader(train, batch_size=n_training_batch, pin_memory=use_pin_memory)

        # Prepare network
        network = models.SimpleCIFAR10MCDropout(
            dropout_rate=dropout_rate,
            dropout_type='bernoulli',
        )

        # Send the whole model to the selected torch.device
        network.to(torch_device)

        # Model to train mode
        network.train()

        # Adam optimizer
        # https://pytorch.org/docs/stable/optim.html?highlight=adam#torch.optim.Adam
        # NOTE: Need to set L2 regularization from here
        optimizer = optim.Adam(
            network.parameters(),
            lr=learning_rate,
            weight_decay=reg_strength, # L2 regularization
        )

        # Record training start time (for this split)
        tic = time.time()

        for epoch in range(n_epoch): # loop over the dataset multiple times
            # Mini-batches
            for data in train_loader:
                # get the inputs; data is a list of [inputs, labels]
                inputs, targets = data

                # Store the batch to torch_device's memory
                inputs = inputs.to(torch_device)
                targets = targets.to(torch_device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward + backward + optimize
                outputs = network(inputs)

                loss = objective(outputs, targets)
                loss.backward()

                optimizer.step()

            print("epoch %d loss = %f" % (epoch, loss.item()))

        # Record training end time
        toc = time.time()

        # Report the final loss
        print("final loss = %f" % (loss.item()))

        """
        Testing
        """

        # Model to eval mode
        network.eval()

        # Store the batch to torch_device's memory
        test_loader = DataLoader(test, batch_size=n_training_batch, pin_memory=use_pin_memory)

        # Record testing start time
        tic_testing = time.time()

        _, mean, metrics = network.predict_dist(test_loader, n_prediction, torch_device)

        # Record testing end time
        toc_testing = time.time()

        # Record all the scores to the score files
        if len(metrics) > 0:
            for key, value in metrics.items():
                print(str(key) + " = " + str(value))

                if key == 'accuracy_mc':
                    with open(test_results_accuracy_mc_path, 'a+') as accuracy_mc_file:
                        accuracy_mc_file.write('%d %f \n' % (s, value))

                elif key == 'accuracy_non_mc':
                    with open(test_results_accuracy_non_mc_path, 'a+') as accuracy_non_mc_file:
                        accuracy_non_mc_file.write('%d %f \n' % (s, value))

        # Report the total training time
        print("training time = " + str(toc - tic) + " seconds")

        # Report the total testing time
        print("testing time = " + str(toc_testing - tic_testing) + " seconds")

        print()

subset 0.100000, dropout_rate 0.010000, reg_strength 0.000500
n_epoch 10

Files already downloaded and verified
subset size = (5000, 32, 32, 3)
training set size = 4000
test set size = 1000

Training with split 0
epoch 0 loss = 2.007522
epoch 1 loss = 1.848649
epoch 2 loss = 1.701818
epoch 3 loss = 1.565948
epoch 4 loss = 1.474506
epoch 5 loss = 1.376433
epoch 6 loss = 1.310120
epoch 7 loss = 1.230645
epoch 8 loss = 1.173191
epoch 9 loss = 1.096413
final loss = 1.096413
accuracy_mc = tensor(0.4556, device='cuda:0')
accuracy_non_mc = tensor(0.4534, device='cuda:0')
training time = 11.017351865768433 seconds
testing time = 0.3115875720977783 seconds

Training with split 1
epoch 0 loss = 2.166982
epoch 1 loss = 2.111107
epoch 2 loss = 2.000000
epoch 3 loss = 1.836208
epoch 4 loss = 1.714867
epoch 5 loss = 1.645934
epoch 6 loss = 1.574343
epoch 7 loss = 1.475113
epoch 8 loss = 1.388012
epoch 9 loss = 1.312057
final loss = 1.312057
accuracy_mc = tensor(0.4367, device='cuda:0')
accuracy_non_