# Train a deep CNN on XPS data on Google Colab

In this notebook, we will train a deep convolutional network on iron XPS spectra made up of linear combinations of single iron reference spectra.

## Setup

### Mount google drive, change working directory

In [None]:
# Mount drive
from google.colab import drive
import os

drive.mount("/content/drive")

# Change working path
os.chdir("/content/drive/My Drive/deepxps")

### Install packages and import modules

In [None]:
%%capture
# Install packages
!pip install python-docx

# Import standard modules and magic commands
import datetime
import numpy as np
import pytz
import importlib
import random

# Magic commands
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### Install and import TensorFlow and TensorFlow Probability

In [None]:
# Disable tf warnings
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"

import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
tf.config.experimental.enable_op_determinism()
tf.keras.backend.clear_session()

### Set seeds and restart session to ensure reproducibility

In [None]:
def reset_seeds_and_session(seed=1):
   os.environ['PYTHONHASHSEED']=str(seed)
   tf.random.set_seed(seed)
   np.random.seed(seed)

   session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                                           inter_op_parallelism_threads=1)
   sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(),
                               config=session_conf)
   tf.compat.v1.keras.backend.set_session(sess) 

reset_seeds_and_session(seed=1)

### Check TensorFlow and TensorFlow Probability versions

In [None]:
f"TF version: {tf.__version__}."
f"TFP version: {tfp.__version__}."

### Check hardware

In [None]:
from tensorflow.python.profiler import profiler_client

if tf.test.gpu_device_name():
    print(f"Found GPU: {tf.test.gpu_device_name()}.")
    !nvidia-smi
else:
    print("Found no GPU.")
try:
    tpu_profile_service_address = os.environ['COLAB_TPU_ADDR'].replace('8470', '8466')
    print(f"Found TPU: {profiler_client.monitor(tpu_profile_service_address, 100, 2)}.")
except:
    print("Found no TPU.")

## Initial training

### Load custom modules

In [None]:
try:
    importlib.reload(classifier)
    importlib.reload(clfutils)
    print("Modules were reloaded.")
except:
    import xpsdeeplearning.network.classifier as classifier
    import xpsdeeplearning.network.utils as clfutils
    print("Modules were loaded.")

### Set up the parameters & folder structure



In [None]:
time = datetime.datetime.now().astimezone(pytz.timezone('Europe/Berlin')).strftime("%Y%m%d_%Hh%Mm")
exp_name = "Ni_2_classes_linear_comb_regression_spatial_prior"
#exp_name = "MNIST_classification_bayesian"
#exp_name = "housing_prices_regression_bayesian"
#exp_name = "shrunken_babys_regression_bayesian"

clf = classifier.Classifier(time=time,
                            exp_name=exp_name,
                            task="regression",
                            intensity_only=True)

### If labels not saved with data ###
# =============================================================================
# labels = ['Fe metal', 'FeO', 'Fe3O4', 'Fe2O3']
# clf = classifier.Classifier(time=time,
#                            exp_name=exp_name,
#                            task='regression',
#                            intensity_only=True,
#                            labels=labels)
# =============================================================================

### Load and inspect the data

In [None]:
input_filepath = r'/content/drive/My Drive/deepxps/datasets/20210528_Ni_linear_combination_small_gas_phase.h5'

train_test_split = 0.2
train_val_split = 0.2
no_of_examples = 100000

X_train, X_val, X_test, y_train, y_val, y_test,\
    sim_values_train, sim_values_val, sim_values_test =\
        clf.load_data_preprocess(input_filepath=input_filepath,
                                 no_of_examples=no_of_examples,
                                 train_test_split=train_test_split,
                                 train_val_split=train_val_split)
               
# Check how the examples are distributed across the classes.
class_distribution = clf.datahandler.check_class_distribution(clf.task)
clf.plot_class_distribution()
clf.plot_random(no_of_spectra=10, dataset='train')  

In [None]:
# =============================================================================
# clf.datahandler.y_train = clf.datahandler.y_train[6:8]
# clf.datahandler.y_val = clf.datahandler.y_val[:1]
# clf.datahandler.y_test = clf.datahandler.y_test[1:2]
# clf.datahandler.X_train = clf.datahandler.X_train[6:8]
# clf.datahandler.X_val = clf.datahandler.X_val[:1]
# clf.datahandler.X_test = clf.datahandler.X_test[1:2]
# 
# clf.datahandler.y_train
# clf.datahandler.y_val
# clf.datahandler.y_test
# =============================================================================

#### Other data

##### Only use classification data

In [None]:
### Only use classification data        
if clf.task == "classification":
    X_train, X_val, X_test, y_train, y_val, y_test, \
        sim_values_train, sim_values_val, sim_values_test =\
            clf.datahandler._only_keep_classification_data()
    
    clf.plot_random(no_of_spectra = 10, dataset = 'train')  

elif clf.task == "regression":
    print("Dataset was not changed.")

##### MNIST

In [None]:
### Loads MNIST dataset.###
import matplotlib.pyplot as plt
clf.datahandler.train_test_split = 0.1
clf.datahandler.train_val_split = 0.1
clf.datahandler.no_of_examples = 4000

print('Loading MNIST dataset')
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

X = np.expand_dims(
    np.concatenate((X_train, X_test), axis=0), 
    -1)[:no_of_examples]
X = X.astype('float32') / 255
y = np.expand_dims(
    np.concatenate((y_train, y_test), axis=0),
    -1)[:no_of_examples]
clf.datahandler.X, clf.datahandler.y = X, y

(
    X_train,
    X_val,
    X_test,
    y_train,
    y_val,
    y_test,
 ) = clf.datahandler._split_test_val_train(X, y)

clf.datahandler.X_train, clf.datahandler.y_train = X_train, y_train
clf.datahandler.X_val, clf.datahandler.y_val = X_val, y_val
clf.datahandler.X_test, clf.datahandler.y_test = X_test, y_test
clf.datahandler.input_shape = (clf.datahandler.X_train.shape[1:])
clf.datahandler.num_classes = 10
clf.datahandler.labels = list(range(clf.datahandler.num_classes))

print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)
print("X_test.shape =", X_test.shape)
print("y_test.shape =", y_test.shape)

plt.imshow(X_train[0, :, :, 0], cmap='gist_gray')
plt.show()

##### Shrunken babys

In [None]:
from PIL import Image
import glob
import pandas as pd
import matplotlib.pyplot as plt

# Loads shrunken baby dataset
clf.datahandler.train_test_split = 0.1
clf.datahandler.train_val_split = 0.1
clf.datahandler.no_of_examples = 836
num_classes = 10

image_paths = glob.glob(r"/content/drive/My Drive/deepxps/datasets/shrunken_baby_ds/*.png")

X = [np.array(Image.open(im)) for im in image_paths[:clf.datahandler.no_of_examples]]
X = [np.expand_dims(image, -1) for image in X] # add extra dimension to each image (126,126) --> (126,126,1)
X = np.array(X) # convert list of images to single array [(126,126,1)] --> (836, 126, 126, 1)

# Read labels from file.
#labels = pd.read_csv(r"/content/drive/My Drive/deepxps/datasets/shrunken_baby_ds/shrunken_baby_labels.csv").to_numpy()
labels = pd.read_csv(r"/content/drive/My Drive/deepxps/datasets/shrunken_baby_ds/shrunken_baby_labels.csv").to_numpy()[:clf.datahandler.no_of_examples]
#y = pd.read_csv(r"/content/drive/My Drive/deepxps/datasets/shrunken_baby_ds/shrunken_baby_labels_new.csv").to_numpy()[:clf.datahandler.no_of_examples]

# Get labels from data
child_positions = [np.where(X[i] > 0.2) for i in range(clf.datahandler.no_of_examples)]
y = np.array([np.max(child_positions[i][0]) - np.min(child_positions[i][0]) for i in range(clf.datahandler.no_of_examples)])
y = np.expand_dims(y, -1)

# Shrunk data by half.
from skimage.measure import block_reduce
#X = X[:,5:-21,13:-13,:]
X_new = []
for image in X:
    reduced_image = block_reduce(image,
                                 block_size=(2, 2, 1),
                                 func=np.mean)
    X_new.append(reduced_image)
X_new = np.array(X_new)
child_positions_shrunk = [np.where(X_new[i] > 0.2) for i in range(clf.datahandler.no_of_examples)]
y_shrunk = np.array([np.max(child_positions_shrunk[i][0]) - np.min(child_positions_shrunk[i][0]) for i in range(clf.datahandler.no_of_examples)])
y_shrunk = np.expand_dims(y_shrunk, -1)

# Normalize data
X = X.astype('float32') / 255
X_new = X_new.astype('float32') / 255

# Plot original and shrunken data
for i in range(5):
    r = np.random.randint(0,X.shape[0]) 
    fig, ax = plt.subplots(nrows=1, ncols=2)
    
    child_position = np.where(X[r] > 0.2)
    min_height, max_height = np.min(child_position[0]), np.max(child_position[0])
    min_height_hor = child_position[1][np.argmin(child_position[0])]
    max_height_hor = child_position[1][np.argmax(child_position[0])]
    child_position_new = np.where(X_new[r] > 50/255.0)
    min_height_new, max_height_new = np.min(child_position_new[0]), np.max(child_position_new[0])
    min_height_hor_new = child_position_new[1][np.argmin(child_position_new[0])]
    max_height_hor_new = child_position_new[1][np.argmax(child_position_new[0])]
    real_size = int(y[r])
    shrunk_size = max_height_new - min_height_new
    print(f"Child no. {r}, real size: {real_size}, new size: {shrunk_size}, factor: {np.round(real_size/shrunk_size,2)}")

    _ = ax[0].imshow(np.squeeze(X[r]))
    _ = ax[0].scatter(min_height_hor, min_height,  s=50, c='red', marker='.')
    _ = ax[0].scatter(max_height_hor, max_height,  s=50, c='blue', marker='.')
    _ = ax[1].imshow(np.squeeze(X_new[r]))
    _ = ax[1].scatter(min_height_hor_new, min_height_new,  s=50, c='red', marker='.')
    _ = ax[1].scatter(max_height_hor_new, max_height_new,  s=50, c='blue', marker='.')
    plt.show()

# Store data in clf.datahandler object
clf.datahandler.X, clf.datahandler.y = X_new, y_shrunk

(
    clf.datahandler.X_train,
    clf.datahandler.X_val,
    clf.datahandler.X_test,
    clf.datahandler.y_train,
    clf.datahandler.y_val,
    clf.datahandler.y_test,
 ) = clf.datahandler._split_test_val_train(
     clf.datahandler.X,
     clf.datahandler.y)

clf.datahandler.input_shape = (clf.datahandler.X_train.shape[1:])
clf.datahandler.num_classes = 1
clf.datahandler.labels = ["sizes"]

print("X.shape =", clf.datahandler.X.shape)
print("y.shape =", clf.datahandler.y.shape)
print("X_train.shape =", clf.datahandler.X_train.shape)
print("y_train.shape =", clf.datahandler.y_train.shape)
print("X_val.shape =", clf.datahandler.X_val.shape)
print("y_val.shape =", clf.datahandler.y_val.shape)
print("X_test.shape =", clf.datahandler.X_test.shape)
print("y_test.shape =", clf.datahandler.y_test.shape)
print("\n")

##### Housing prices

In [None]:
clf.datahandler.train_test_split = 0.1
clf.datahandler.train_val_split = 0.1
clf.datahandler.no_of_examples = 100

from keras.datasets import boston_housing
(X_train, y_train), (X_test, y_test) = boston_housing.load_data()

X = np.expand_dims(
    np.concatenate((X_train, X_test), axis=0), 
    -1)[:no_of_examples]
y = np.expand_dims(
    np.concatenate((y_train, y_test), axis=0),
    -1)[:no_of_examples]
clf.datahandler.X, clf.datahandler.y = X, y

(
    X_train,
    X_val,
    X_test,
    y_train,
    y_val,
    y_test,
 ) = clf.datahandler._split_test_val_train(X, y)


clf.datahandler.X_train, clf.datahandler.y_train = X_train, y_train
clf.datahandler.X_val, clf.datahandler.y_val = X_val, y_val
clf.datahandler.X_test, clf.datahandler.y_test = X_test, y_test
clf.datahandler.input_shape = (clf.datahandler.X_train.shape[1:])
clf.datahandler.num_classes = 1
clf.datahandler.labels = ["prizes"]

print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)
print("X_test.shape =", X_test.shape)
print("y_test.shape =", y_test.shape)

### Design the model

In [None]:
try:
    importlib.reload(models)
    print("Models module was reloaded.")
except:
    import xpsdeeplearning.network.models as models
    print("Models module was loaded.")

In [None]:
    def __init__(self, shape, loc, scale, *, lengthscale=1.0):
        """Samples `.p` from a correlated Gaussian of dimension
        `shape[-2]*shape[-1]`"""
        loc = torch.tensor(loc).unsqueeze(0)


        super().__init__(shape, loc=loc, scale=scale, distance_matrix=d,
                         lengthscale=lengthscale)

    # we have to move the kernel evaluation into the _dist, so that the hierarchical versions of this prior work
    # for fixed parameters, we could consider precomputing it to save a bit of time here
    _dist = SquaredExponentialNormal

class SquaredExponentialNormal(td.MultivariateNormal):
    """Multivariate Normal with a squared exponential kernel as covariance"""
    def __init__(self, loc, scale, distance_matrix, lengthscale):


def prior_fn(loc):
    lengthscale = 1.0
    shape = 
    
    # generates all the points in a grid from (0,0) to (shape[-2], shape[-1])
    p = np.mgrid[:shape[-2], :shape[-1]].reshape(2, -1).T
    # computes the matrix of Euclidean distances between all the points in p
    distance_matrix = np.sum((p[:, None, :] - p[None, :, :]) ** 2.0, 2) ** 0.5
    cov = tf.math.exp.exp(- distance_matrix / lengthscale) * scale ** 2.0
    #cov = torch.cholesky(cov)

    return tfp.distributions.MultivariateNormalFullCovariance(
         loc=loc,
         covariance_matrix=cov)


class SpatialBayesianCNN(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    """
    def __init__(
        self, 
        inputshape,
        num_classes,
        kernel_prior_fn,
        kl_divergence_fn,
        task,
        ):   
        if len(inputshape) == 2:
            conv_layer = tfp.layers.Convolution1DFlipout
            strides = 1
            average_pool_layer = layers.AveragePooling1D
        elif len(inputshape) == 3:
            conv_layer = tfp.layers.Convolution2DFlipout
            strides = (1,1)
            average_pool_layer =  layers.AveragePooling2D

        if (task == "regression" or task == "multi_class_detection"):
            if num_classes == 1:
                output_act = None
            else:
                output_act = "sigmoid"
    
        elif task == "classification":
            output_act = "softmax"

        ## Change activation in Bayesian layers?
        prob_act = "relu"

        self.input_1 = layers.Input(
            shape = inputshape,
            name="input_1")   
        self.conv_1_short = conv_layer(
            filters=12,
            kernel_size=5,
            strides=strides,
            padding='same',
            kernel_prior_fn=kernel_prior_fn,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name='conv_1_short')(self.input_1)
        self.conv_1_medium = conv_layer(
            filters=12,
            kernel_size=10,
            strides=strides,
            padding='same',
            kernel_prior_fn=kernel_prior_fn,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,            
            activation=prob_act,
            name='conv_1_medium')(self.input_1)
        self.conv_1_long = conv_layer(
            filters=12,
            kernel_size=15,
            strides=strides,
            padding='same',
            kernel_prior_fn=kernel_prior_fn,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name='conv_1_long')(self.input_1)
        
        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers)

        self.conv_2 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            kernel_prior_fn=kernel_prior_fn,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name='conv_2')(merged_sublayers)
        self.conv_3 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            kernel_prior_fn=kernel_prior_fn,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name="conv_3")(self.conv_2)
        self.average_pool_1 = average_pool_layer(
            name='average_pool_1')(self.conv_3)
        
        self.flatten_1 = layers.Flatten(name='flatten1')(self.average_pool_1)
        self.drop_1 = layers.Dropout(
            rate=0.2,
            name='drop_1')(self.flatten_1)
        self.dense_1 = tfp.layers.DenseFlipout(
            units=4000,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name='dense_1')(self.flatten_1)
                           
        self.dense_2 = tfp.layers.DenseFlipout(
            units=num_classes,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=output_act,
            name='dense_2')(self.dense_1)

        self.outputs = self.dense_2 
        # self.outputs = tfp.layers.IndependentNormal(event_shape=num_classes)(self.dense_2)

        no_of_inputs = len(sublayers)

        super(SpatialBayesianCNN, self).__init__(
            inputs=self.input_1,
            outputs=self.outputs,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='SpatialBayesianCNN')

In [None]:
class BayesianCNN(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    """
    def __init__(
        self, 
        inputshape,
        num_classes,
        kl_divergence_fn,
        task,
        ):   
        if len(inputshape) == 2:
            conv_layer = tfp.layers.Convolution1DFlipout
            strides = 1
            average_pool_layer = layers.AveragePooling1D
        elif len(inputshape) == 3:
            conv_layer = tfp.layers.Convolution2DFlipout
            strides = (1,1)
            average_pool_layer =  layers.AveragePooling2D

        if (task == "regression" or task == "multi_class_detection"):
            if num_classes == 1:
                output_act = None
            else:
                output_act = "sigmoid"
    
        elif task == "classification":
            output_act = "softmax"

        ## Change activation in Bayesian layers?
        prob_act = "relu"

        self.input_1 = layers.Input(
            shape = inputshape,
            name="input_1")   
        self.conv_1_short = conv_layer(
            filters=12,
            kernel_size=5,
            strides=strides,
            padding='same',
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name='conv_1_short')(self.input_1)
        self.conv_1_medium = conv_layer(
            filters=12,
            kernel_size=10,
            strides=strides,
            padding='same',
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,            
            activation=prob_act,
            name='conv_1_medium')(self.input_1)
        self.conv_1_long = conv_layer(
            filters=12,
            kernel_size=15,
            strides=strides,
            padding='same',
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name='conv_1_long')(self.input_1)
        
        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers)

        self.conv_2 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name='conv_2')(merged_sublayers)
        self.conv_3 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name="conv_3")(self.conv_2)
        self.average_pool_1 = average_pool_layer(
            name='average_pool_1')(self.conv_3)
        
        self.flatten_1 = layers.Flatten(name='flatten1')(self.average_pool_1)
        self.drop_1 = layers.Dropout(
            rate=0.2,
            name='drop_1')(self.flatten_1)
        self.dense_1 = tfp.layers.DenseFlipout(
            units=4000,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name='dense_1')(self.flatten_1)
                           
        self.dense_2 = tfp.layers.DenseFlipout(
            units=num_classes,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=output_act,
            name='dense_2')(self.dense_1)

        self.outputs = self.dense_2 
        # self.outputs = tfp.layers.IndependentNormal(event_shape=num_classes)(self.dense_2)
        
# =============================================================================
#         s0, s1 = tf.split(self.dense_2, num_classes)       
#         self.outputs = tfp.layers.DistributionLambda(lambda t: tfd.Normal(t[0], t[1]))(s0, s1)
#         self.dense_2 = tfp.layers.DenseFlipout(
#             units=tfp.layers.IndependentNormal.params_size(num_classes), 
#             activation=output_act,
#             kernel_divergence_fn=kl_divergence_fn,
#             bias_divergence_fn=kl_divergence_fn)(self.dense_1)
# =============================================================================
# =============================================================================
#         self.dense_2 = layers.Dense(
#             units=num_classes+num_classes,
#             activation=output_act,
#             name='dense_2')(self.dense_1)
#         self.output = tfp.layers.DistributionLambda(
#             lambda t: tfd.LogNormal(loc=t[..., :1], scale=tf.math.softplus(0.05 * t[..., 1:]))),])
# =============================================================================

        no_of_inputs = len(sublayers)

        super(BayesianCNN, self).__init__(
            inputs=self.input_1,
            outputs=self.outputs,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='BayesianCNN')
        
class LastLayerBayesianCNN(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    """
    def __init__(
        self, 
        inputshape,
        num_classes,
        kl_divergence_fn,
        task,
        ):   
        if len(inputshape) == 2:
            conv_layer = tfp.layers.Convolution1DFlipout
            strides = 1
            average_pool_layer = layers.AveragePooling1D
        elif len(inputshape) == 3:
            conv_layer = tfp.layers.Convolution2DFlipout
            strides = (1,1)
            average_pool_layer =  layers.AveragePooling2D

        if (task == "regression" or task == "multi_class_detection"):
            if num_classes == 1:
                output_act = None
            else:
                output_act = "sigmoid"
    
        elif task == "classification":
            output_act = "softmax"

        ## Change activation in Bayesian layers?
        prob_act = "relu"

        self.input_1 = layers.Input(
            shape = inputshape,
            name="input_1")
        self.conv_1_short = layers.Conv1D(
            filters=12,
            kernel_size=5,
            strides=strides,
            padding='same',
            activation='relu',
            name='conv_1_short')(self.input_1)
        self.conv_1_medium = layers.Conv1D(
            filters=12,
            kernel_size=10,
            strides=strides,
            padding='same',
            activation='relu',
            name='conv_1_medium')(self.input_1)
        self.conv_1_long = layers.Conv1D(
            filters=12,
            kernel_size=15,
            strides=strides,
            padding='same',
            activation='relu',
            name='conv_1_long')(self.input_1)
        
        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers)

        self.conv_2 = layers.Conv1D(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            activation='relu',
            name='conv_2')(merged_sublayers)
        self.conv_3 = layers.Conv1D(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            activation='relu',
            name="conv_3")(self.conv_2)
        self.average_pool_1 = average_pool_layer(
            name='average_pool_1')(self.conv_3)
        self.flatten_1 = layers.Flatten(
            name='flatten1')(self.average_pool_1)
        self.drop_1 = layers.Dropout(
            rate=0.2,
            name='drop_1')(self.flatten_1)
        self.dense_1 = layers.Dense(
            units=4000,
            activation='relu',
            name='dense_1')(self.flatten_1)
                           
        self.dense_2 = tfp.layers.DenseFlipout(
            units=num_classes,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=output_act,
            name='dense_2')(self.dense_1)

        self.outputs = self.dense_2 

        no_of_inputs = len(sublayers)

        super(LastLayerBayesianCNN, self).__init__(
            inputs=self.input_1,
            outputs=self.outputs,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='LastLayerBayesianCNN')

#### Not used

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K

class CNN(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    """
    def __init__(
        self, 
        inputshape,
        num_classes,
        task,
        ):   
        if len(inputshape) == 2:
            conv_layer = layers.Conv1D
            strides = 1
            average_pool_layer = layers.AveragePooling1D
        elif len(inputshape) == 3:
            conv_layer = layers.Conv2D
            strides = (1,1)
            average_pool_layer =  layers.AveragePooling2D

        if (task == "regression" or task == "multi_class_detection"):
            if num_classes == 1:
                output_act = None
            else:
                output_act = "sigmoid"
    
        elif task == "classification":
            output_act = "softmax"
        
        self.input_1 = layers.Input(
            shape = inputshape,
            name="input_1")
        self.conv_1_short = conv_layer(
            filters=12,
            kernel_size=5,
            strides=strides,
            padding='same',
            activation='relu',
            name='conv_1_short')(self.input_1)
        self.conv_1_medium = conv_layer(
            filters=12,
            kernel_size=10,
            strides=strides,
            padding='same',
            activation='relu',
            name='conv_1_medium')(self.input_1)
        self.conv_1_long = conv_layer(
            filters=12,
            kernel_size=15,
            strides=strides,
            padding='same',
            activation='relu',
            name='conv_1_long')(self.input_1)
        
        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers)

        self.conv_2 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            activation='relu',
            name='conv_2')(merged_sublayers)
        self.conv_3 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            activation='relu',
            name="conv_3")(self.conv_2)
        self.average_pool_1 = average_pool_layer(
            name='average_pool_1')(self.conv_3)
        self.flatten_1 = layers.Flatten(
            name='flatten1')(self.average_pool_1)
        self.drop_1 = layers.Dropout(
            rate=0.2,
            name='drop_1')(self.flatten_1)
        self.dense_1 = layers.Dense(
            units=4000,
            activation='relu',
            name='dense_1')(self.flatten_1)    
        self.dense_2 = layers.Dense(
            units=num_classes,
            activation=output_act,
            name='dense_2')(self.dense_1)
              
        if task == "regression":
            self.outputs = layers.Lambda(
                lambda x: x/tf.reshape(K.sum(x, axis=-1),(-1,1)),
                name = 'output_normalization')(self.dense_2)
        
        else:
            self.outputs = self.dense_2

        no_of_inputs = len(sublayers)

        super(CNN, self).__init__(
            inputs=self.input_1,
            outputs=self.outputs,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='CNN')

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K

class DropoutCNN(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    """
    def __init__(
        self, 
        inputshape,
        num_classes,
        task,
        ):
        drop_rate = 0.3 # change dropout rate here
        
        if len(inputshape) == 2:
            conv_layer = layers.Conv1D
            strides = 1
            average_pool_layer = layers.AveragePooling1D
        elif len(inputshape) == 3:
            conv_layer = layers.Conv2D
            strides = (1,1)
            average_pool_layer =  layers.AveragePooling2D

        if (task == "regression" or task == "multi_class_detection"):
            if num_classes == 1:
                output_act = None
            else:
                output_act = "sigmoid"
    
        elif task == "classification":
            output_act = "softmax"
        
        self.input_1 = layers.Input(
            shape = inputshape,
            name="input_1")
        self.conv_1_short = conv_layer(
            filters=12,
            kernel_size=5,
            strides=strides,
            padding='same',
            activation='relu',
            name='conv_1_short')(self.input_1)
        self.conv_1_medium = conv_layer(
            filters=12,
            kernel_size=10,
            strides=strides,
            padding='same',
            activation='relu',
            name='conv_1_medium')(self.input_1)
        self.conv_1_long = conv_layer(
            filters=12,
            kernel_size=15,
            strides=strides,
            padding='same',
            activation='relu',
            name='conv_1_long')(self.input_1)
        
        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers, name='conv_concat')

        self.conv_1_drop = layers.Dropout(
            rate=drop_rate,
            name='conv_1_drop')(merged_sublayers, training=True)     
        self.conv_2 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            activation='relu',
            name='conv_2')(self.conv_1_drop)
        self.conv_2_drop = layers.Dropout(
            rate=drop_rate,
            name='conv_2_drop')(self.conv_2, training=True)              
        self.conv_3 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            activation='relu',
            name="conv_3")(self.conv_2_drop)
        self.conv_3_drop = layers.Dropout(
            rate=drop_rate,
            name='conv_3_drop')(self.conv_3, training=True)              
        self.average_pool_1 = average_pool_layer(
            name='average_pool_1')(self.conv_3_drop)
        self.flatten_1 = layers.Flatten(
            name='flatten_1')(self.average_pool_1)
        self.dense_1 = layers.Dense(
            units=4000,
            activation='relu',
            name='dense_1')(self.flatten_1)  
        self.dense_2 = layers.Dense(
            units=num_classes,
            activation=output_act,
            name='dense_2')(self.dense_1)
              
        if task == "regression":
            self.outputs = layers.Lambda(
                lambda x: x/tf.reshape(K.sum(x, axis=-1),(-1,1)),
                name = 'output_normalization')(self.dense_2)
        
        else:
            self.outputs = self.dense_2

        no_of_inputs = len(sublayers)

        super(DropoutCNN, self).__init__(
            inputs=self.input_1,
            outputs=self.outputs,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='DropoutCNN')

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K

class CNN2(models.EmptyModel):
    def __init__(
        self, 
        inputshape,
        num_classes,
        task,
        ):
        """
        https://github.com/DoctorLoop/BayesianDeepLearning/blob/master/Chapter3_TensorFlowProbability_BayesianConvNets.ipynb
        """

        if len(inputshape) == 2:
            conv_layer = layers.Conv1D
            max_pool_layer = layers.MaxPool1D
            pool_size=4

        elif len(inputshape) == 3:
            conv_layer = layers.Conv2D
            max_pool_layer =  layers.MaxPool2D      
            pool_size=(4,4)

        if task == "regression":
            if num_classes == 1:
                output_act = None
            else:
                output_act = "sigmoid"
        elif task == "classification":
            output_act = "softmax"
      
        self.input_1 = layers.Input(
            shape = inputshape,
            name="input_1")
        self.conv_1 = conv_layer(
            filters=16,
            kernel_size=5,
            activation = "relu",
            name="conv_1")(self.input_1)
        self.max_pool_1 = max_pool_layer(
            pool_size=pool_size
        )(self.conv_1)
        self.conv_2 = conv_layer(
            filters=32,
            kernel_size=3,
            activation = "relu",
            name='conv_2')(self.max_pool_1)
        self.max_pool_2 = max_pool_layer(
            pool_size=pool_size
        )(self.conv_2)
        self.flatten_1 = layers.Flatten(
            name="flatten_1"
        )(self.max_pool_2),
        self.dense_1 = tfp.layers.DenseFlipout(
            units=num_classes,
            activation = output_act,
            name='dense_1')(self.flatten_1)              

        no_of_inputs = 1

        super(CNN2, self).__init__(
            inputs=self.input_1,
            outputs=self.dense_1,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='CNN2')

class BayesianCNN2(models.EmptyModel):
    def __init__(
        self, 
        inputshape,
        num_classes,
        kl_divergence_fn,
        task,
        ):

        if len(inputshape) == 2:
            conv_layer = tfp.layers.Convolution1DFlipout
            max_pool_layer = layers.MaxPool1D
            pool_size=4

        elif len(inputshape) == 3:
            conv_layer = tfp.layers.Convolution2DFlipout
            max_pool_layer =  layers.MaxPool2D      
            pool_size=(4,4)

        if (task == "regression" or task == "multi_class_detection"):
            if num_classes == 1:
                output_act = None
            else:
                output_act = "sigmoid"
        elif task == "classification":
            output_act = "softmax"

        ## Change activation in Bayesian layers?
        prob_act = "relu"
      
        self.input_1 = layers.Input(
            shape = inputshape,
            name="input_1")
        self.conv_1 = conv_layer(
            filters=16,
            kernel_size=5,
            activation = prob_act,
            kernel_divergence_fn=kernel_divergence_fn,
            bias_divergence_fn=kl_divergence_function,
            name="conv_1")(self.input_1)
        self.max_pool_1 = max_pool_layer(
            pool_size=pool_size
        )(self.conv_1)
        self.conv_2 = conv_layer(
            filters=32,
            kernel_size=3,
            activation = prob_act,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_function,
            name='conv_2')(self.max_pool_1)
        self.max_pool_2 = max_pool_layer(
            pool_size=pool_size
        )(self.conv_2)
        self.flatten_1 = layers.Flatten(
            name="flatten_1"
        )(self.max_pool_2),
        self.dense_1 = tfp.layers.DenseFlipout(
            units=num_classes,
            activation = output_act,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_function,
            name='dense_1')(self.flatten_1)              

        no_of_inputs = 1

        super(BayesianCNN2, self).__init__(
            inputs=self.input_1,
            outputs=self.dense_1,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='BayesianCNN2')

#### Design Kullback-Leibler divergence function

In [None]:
kl_divergence_fn = (
    lambda q, p, _: (tfp.distributions.kl_divergence(q, p)) /
    tf.cast(clf.datahandler.X_train.shape[0], dtype=tf.float32)
    )

#### Build the model

In [None]:
#clf.model = CNN(
#    inputshape=clf.datahandler.input_shape,
#    num_classes=clf.datahandler.num_classes,
#    task=clf.task)

clf.model = DropoutCNN(
    inputshape=clf.datahandler.input_shape,
    num_classes=clf.datahandler.num_classes,
    task=clf.task)

#clf.model = BayesianCNN(
#    inputshape=clf.datahandler.input_shape,
#    num_classes=clf.datahandler.num_classes,
#    kl_divergence_fn=kl_divergence_fn,
#    task=clf.task)

#clf.model = LastLayerBayesianCNN(
#    inputshape=clf.datahandler.input_shape,
#    num_classes=clf.datahandler.num_classes,
#    kl_divergence_fn=kl_divergence_fn,
#    task=clf.task)

#clf.model = CNN2(
#    inputshape=clf.datahandler.input_shape,
#    num_classes=clf.datahandler.num_classes,
#    task=clf.task)

# clf.model = BayesianCNN2(
#     inputshape=clf.datahandler.input_shape,
#     num_classes=clf.datahandler.num_classes,
#     kl_divergence_fn=kl_divergence_fn,
#     task=clf.task)

# =============================================================================

# Alternative: Build model from available models in models.py
# =============================================================================
# clf.model = models.ProbabilisticClassificationCNN2D(
#     clf.datahandler.input_shape,
#     clf.datahandler.num_classes,
#     kl_divergence_fn,
#     bias_divergence_fn)
# =============================================================================

### Test models and fit calls (delete later)

In [None]:
def normal_sp(params):
  no_of_params = int(params.shape[-1]/2)
  loc = params[:,:no_of_params]
  scale = 1e-3 + tf.math.softplus(0.05 * params[:,no_of_params:])
  return tfd.Normal(loc=loc, scale=scale)

class BayesianVICNN(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    """
    def __init__(
        self, 
        inputshape,
        num_classes,
        kl_divergence_fn,
        task,
        ):   
        if len(inputshape) == 2:
            conv_layer = tfp.layers.Convolution1DFlipout
            strides = 1
            average_pool_layer = layers.AveragePooling1D
        elif len(inputshape) == 3:
            conv_layer = tfp.layers.Convolution2DFlipout
            strides = (1,1)
            average_pool_layer =  layers.AveragePooling2D

        if (task == "regression" or task == "multi_class_detection"):
            if num_classes == 1:
                output_act = None
            else:
                output_act = "sigmoid"
    
        elif task == "classification":
            output_act = "softmax"

        ## Change activation in Bayesian layers?
        prob_act = "relu"

        self.input_1 = layers.Input(
            shape = inputshape,
            name="input_1")   
        self.conv_1_short = conv_layer(
            filters=12,
            kernel_size=5,
            strides=strides,
            padding='same',
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name='conv_1_short')(self.input_1)
        self.conv_1_medium = conv_layer(
            filters=12,
            kernel_size=10,
            strides=strides,
            padding='same',
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,            
            activation=prob_act,
            name='conv_1_medium')(self.input_1)
        self.conv_1_long = conv_layer(
            filters=12,
            kernel_size=15,
            strides=strides,
            padding='same',
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name='conv_1_long')(self.input_1)
        
        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers)

        self.conv_2 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name='conv_2')(merged_sublayers)
        self.conv_3 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name="conv_3")(self.conv_2)
        self.average_pool_1 = average_pool_layer(
            name='average_pool_1')(self.conv_3)
        
        self.flatten_1 = layers.Flatten(name='flatten1')(self.average_pool_1)
        self.drop_1 = layers.Dropout(
            rate=0.2,
            name='drop_1')(self.flatten_1)
        self.dense_1 = tfp.layers.DenseFlipout(
            units=4000,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn,
            activation=prob_act,
            name='dense_1')(self.flatten_1)
                           
        self.dense_2 = tfp.layers.DenseFlipout(
            units=num_classes+num_classes, 
            activation=output_act,
            kernel_divergence_fn=kl_divergence_fn,
            bias_divergence_fn=kl_divergence_fn)(self.dense_1)
        self.outputs = tfp.layers.DistributionLambda(normal_sp)(self.dense_2)       

        no_of_inputs = len(sublayers)

        super(BayesianVICNN, self).__init__(
            inputs=self.input_1,
            outputs=self.outputs,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='BayesianVICNN')

model_vi = BayesianVICNN(
    inputshape=clf.datahandler.input_shape,
    num_classes=clf.datahandler.num_classes,
    kl_divergence_fn=kl_divergence_fn,
    task=clf.task)
model_vi.compile(Adam(learning_rate=0.01), loss=NLL, metrics=["accuracy"]) 

features_train = clf.datahandler.X_train[:20]
labels_train = clf.datahandler.y_train[:20]

features_val = clf.datahandler.X_val[:5]
labels_val = clf.datahandler.y_val[:5]

hist =  model_vi.fit(features_train,
                     labels_train,
                     epochs=5000,
                     verbose=1,
                     validation_data=(features_val, labels_val),
                     batch_size=512)

In [None]:
features_test = clf.datahandler.X_test[:10]
labels_test = clf.datahandler.y_test[:10]

predictions = np.array([model_vi.predict(features_test) for i in range(50)])
predictions.mean(axis=0)
predictions.std(axis=0)#.shape
#labels_test

#### Design loss 

In [None]:
# =============================================================================
# def _neg_log_likelihood_bayesian(y_true, y_pred):
#     labels_distribution = tfp.distributions.OneHotCategorical(logits=y_pred)
#     return -tf.reduce_mean(labels_distribution.log_prob(y_true))
# =============================================================================

# =============================================================================
# def loss_fn(y_pred, y_true):
#     return tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true,logits=y_pred)  
# =============================================================================

# =============================================================================
# def NLL(y_true, y_pred):
#     # Since y_pred is distribution object, we can call log_prob for sample data
#     return -y_pred.log_prob(y_true)
# =============================================================================

# =============================================================================
# def neg_log_likelihood(y_obs, y_pred, sigma=noise):
#     dist = tfp.distributions.Normal(loc=y_pred, scale=sigma)
#     return K.sum(-dist.log_prob(y_obs))
# =============================================================================
## How to model sigma?

### Compile and summarize the model

In [None]:
from tensorflow.keras.optimizers import Adam
import tensorflow.keras.losses as tf_losses
import tensorflow.keras.metrics as tf_metrics

learning_rate = 1e-05
optimizer = Adam(learning_rate = learning_rate) 

if clf.task == "regression":
    #loss = tf_losses.MeanAbsoluteError()
    #metrics = [tf_metrics.MeanSquaredError(name="mse")]
    loss = tf_losses.MeanSquaredError()
    metrics = [tf_metrics.MeanAbsoluteError(name="mae")]
    
elif clf.task == "classification":
    loss = tf_losses.CategoricalCrossentropy()
    metrics = [tf_metrics.CategoricalCrossentropy(name="accuracy")]
    
elif clf.task == "multi_class_detection":
    loss =  tf_losses.BinaryCrossentropy()
    metrics = [tf_metrics.BinaryAccuracy(name="accuracy", threshold=0.7)]
    
clf.model.compile(loss=loss,
                  optimizer=optimizer,
                  metrics=metrics)

# Plot summary and save model plot.
clf.summary()
clf.save_and_print_model_image()

### Show initial weight distributions

In [None]:
clf.plot_weight_distribution(kind="prior", to_file=True)
clf.plot_weight_distribution(kind="posterior", to_file=True)

### Show initial predictions

In [None]:
no_of_predictions = 10

print("Train:")
prob_pred_train_initial = clf.predict_probabilistic(
    dataset="train",
    no_of_predictions=no_of_predictions
)

for i, pred in enumerate(prob_pred_train_initial[:10]):
   if i < clf.datahandler.y_train.shape[0]:
       print(f"Ground truth: {np.round(clf.datahandler.y_train[i],3)},",
             f"Mean prediction: {np.mean(pred, axis = 0)} +/- {np.std(pred, axis = 0)}")

print("Test:")
prob_pred_test_initial = clf.predict_probabilistic(
    dataset="test",
    no_of_predictions=no_of_predictions
)

for i, pred in enumerate(prob_pred_test_initial[:10]):
   if i < clf.datahandler.y_test.shape[0]: 
       print(f"Ground truth: {np.round(clf.datahandler.y_test[i],3)},",
             f"Mean prediction: {np.mean(pred, axis = 0)} +/- {np.std(pred, axis = 0)}")

### Train

In [None]:
epochs = 250
batch_size = 32
validation_freq = 1

hist = clf.train(checkpoint=True,
                 early_stopping=False,
                 tb_log=True, 
                 csv_log=True,
                 hyperparam_log=True,
                 #cb_parameters={"es_patience":15,},
                 epochs=epochs, 
                 batch_size=batch_size,
                 validation_freq=validation_freq,
                 verbose=2)

sound = False
if sound:
    from google.colab import output
    output.eval_js('new Audio("http://soundbible.com/grab.php?id=1795&type=mp3").play()')

### Plot loss

In [None]:
graph = clfutils.TrainingGraphs(clf.logging.history, clf.logging.fig_dir)
graph.plot_loss(to_file = True)
if clf.task != "regression":
    graph.plot_accuracy(to_file = False)

### Evaluate on test data

In [None]:
if clf.task == 'regression':
    test_loss = clf.evaluate()
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))

else:
    score = clf.evaluate()
    test_loss, test_accuracy = score[0], score[1]
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))
    print('Test accuracy: ' + str(np.round(test_accuracy, decimals=3)))

###  Predict on train and test data

In [None]:
no_of_predictions = 10

print("Train:")
prob_pred_train = clf.predict_probabilistic(
    dataset="train",
    no_of_predictions=no_of_predictions
)

for i, pred in enumerate(prob_pred_train[:10]):
   print(f"Ground truth: {np.round(clf.datahandler.y_train[i],3)},",
         f"Mean prediction: {np.mean(pred, axis = 0)} +/- {np.std(pred, axis = 0)}")

print("Test:")
prob_pred_test = clf.predict_probabilistic(
    dataset="test",
    no_of_predictions=no_of_predictions
)

for i, pred in enumerate(prob_pred_test[:10]):
   print(f"Ground truth: {np.round(clf.datahandler.y_test[i],3)},",
         f"Mean prediction: {np.mean(pred, axis = 0)} +/- {np.std(pred, axis = 0)}")

### Show some predictions

#### 10 random training samples

In [None]:
clf.plot_random(no_of_spectra=10, dataset='train', with_prediction=True)  

#### 10 random test samples

In [None]:
clf.plot_random(no_of_spectra=10, dataset='test', with_prediction=True)    

### Show wrong/worst predictions

In [None]:
if clf.task == 'classification':
    clf.show_wrong_classification()
else:
    clf.show_worst_predictions(no_of_spectra = 20)  

### Show posterior weight distribution after training update

In [None]:
clf.plot_weight_distribution(kind="prior", to_file=True)
clf.plot_weight_distribution(kind="posterior", to_file=True)

### Show distribution of probabilistic predictions

In [None]:
clf.plot_prob_predictions(dataset="test",
                          no_of_spectra=10,
                          to_file=True)

### Save model and results

In [None]:
#clf.save_model()
clf.pickle_results()

### Generate report

In [None]:
dir_name = clf.time + '_' + clf.exp_name
rep = clfutils.Report(dir_name)  
rep.write()

## Continue training

### Load custom modules

In [None]:
try:
    import importlib
    importlib.reload(classifier)
    importlib.reload(clfutils)
    print("\n Modules were reloaded.")
except:
    import xpsdeeplearning.network.classifier as classifier
    import xpsdeeplearning.network.utils as clfutils
    print("Modules were loaded.")

### Reload classifier from previous run

In [None]:
runpath = r"/content/drive/My Drive/deepxps/runs/20220118_14h46m_Ni_2_classes_long_linear_comb_small_gas_phase_multi_class_detection_CNN_bayesian_relu"
clf = classifier.restore_clf_from_logs(runpath)

### Load and inspect the data

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test,\
    sim_values_train, sim_values_val, sim_values_test =\
        clf.load_data_preprocess(input_filepath=clf.logging.hyperparams['input_filepath'],
                                 no_of_examples=clf.logging.hyperparams['no_of_examples'],
                                 train_test_split=clf.logging.hyperparams['train_test_split'],
                                 train_val_split=clf.logging.hyperparams['train_val_split'])
                
# Check how the examples are distributed across the classes.
class_distribution = clf.datahandler.check_class_distribution(clf.task)
clf.plot_class_distribution()
clf.plot_random(no_of_spectra = 10, dataset = 'train')  

#### Other data

##### Only use classification data

In [None]:
### Only use classification data        
if clf.task == "classification":
    X_train, X_val, X_test, y_train, y_val, y_test, \
        sim_values_train, sim_values_val, sim_values_test =\
            clf.datahandler._only_keep_classification_data()
    
    clf.plot_random(no_of_spectra = 10, dataset = 'train')  

elif clf.task == "regression":
    print("Dataset was not changed.")

##### MNIST

In [None]:
### Loads MNIST dataset.###
import matplotlib.pyplot as plt
clf.datahandler.train_test_split = 0.1
clf.datahandler.train_val_split = 0.1
clf.datahandler.no_of_examples = 4000

print('Loading MNIST dataset')
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

X = np.expand_dims(
    np.concatenate((X_train, X_test), axis=0), 
    -1)[:no_of_examples]
X = X.astype('float32') / 255
y = np.expand_dims(
    np.concatenate((y_train, y_test), axis=0),
    -1)[:no_of_examples]
clf.datahandler.X, clf.datahandler.y = X, y

(
    X_train,
    X_val,
    X_test,
    y_train,
    y_val,
    y_test,
 ) = clf.datahandler._split_test_val_train(X, y)

clf.datahandler.X_train, clf.datahandler.y_train = X_train, y_train
clf.datahandler.X_val, clf.datahandler.y_val = X_val, y_val
clf.datahandler.X_test, clf.datahandler.y_test = X_test, y_test
clf.datahandler.input_shape = (clf.datahandler.X_train.shape[1:])
clf.datahandler.num_classes = 10
clf.datahandler.labels = list(range(clf.datahandler.num_classes))

print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)
print("X_test.shape =", X_test.shape)
print("y_test.shape =", y_test.shape)

plt.imshow(X_train[0, :, :, 0], cmap='gist_gray')
plt.show()

##### Shrunken babys

In [None]:
# Loads shrunken baby dataset
from PIL import Image
import glob
import pandas as pd
import matplotlib.pyplot as plt

clf.datahandler.train_test_split = 0.1
clf.datahandler.train_val_split = 0.1
clf.datahandler.no_of_examples = 40
num_classes = 10

image_paths = glob.glob(r"/content/drive/My Drive/deepxps/datasets/shrunken_baby_ds/*.png")

X = [np.array(Image.open(im)) for im in image_paths[:clf.datahandler.no_of_examples]]
X = [np.expand_dims(image, -1) for image in X] # add extra dimension to each image (126,126) --> (126,126,1)
X = np.array(X) # convert list of images to single array [(126,126,1)] --> (836, 126, 126, 1)

from skimage.measure import block_reduce
#X = X[:,5:-21,13:-13,:]
X_new = []
for image in X:
    reduced_image = block_reduce(image,
                                 block_size=(2, 2, 1),
                                 func=np.mean)
    X_new.append(reduced_image)
X_new = np.array(X_new)

# Normalize data
X = X.astype('float32') / 255
X_new = X_new.astype('float32') / 255


#y_old = pd.read_csv(r"/content/drive/My Drive/deepxps/datasets/shrunken_baby_ds/shrunken_baby_labels.csv").to_numpy()[:clf.datahandler.no_of_examples]

#y = pd.read_csv(r"/content/drive/My Drive/deepxps/datasets/shrunken_baby_ds/shrunken_baby_labels_new.csv").to_numpy()[:clf.datahandler.no_of_examples]
child_positions = [np.where(X[i] > 0.2) for i in range(clf.datahandler.no_of_examples)]
y = np.array([np.max(child_positions[i][0]) - np.min(child_positions[i][0]) for i in range(clf.datahandler.no_of_examples)])
y = np.expand_dims(y, -1)

clf.datahandler.X, clf.datahandler.y = X_new, y

(
    X_train,
    X_val,
    X_test,
    y_train,
    y_val,
    y_test,
 ) = clf.datahandler._split_test_val_train(X_new, y)

# Train-test split
#num_train_val = int((1-test_split)*X.shape[0])
#(X_train_val, X_test) = X[:num_train_val], X[num_train_val:]
#(y_train_val, y_test) = y[:num_train_val], y[num_train_val:]

# Train-val split\n","num_train = int((1-val_split)*X_train_val.shape[0])
#(X_train, X_val) = X_train_val[:num_train], X_train_val[num_train:]
#(y_train, y_val) = y_train_val[:num_train], y_train_val[num_train:]

clf.datahandler.X_train, clf.datahandler.y_train = X_train, y_train
clf.datahandler.X_val, clf.datahandler.y_val = X_val, y_val
clf.datahandler.X_test, clf.datahandler.y_test = X_test, y_test
clf.datahandler.input_shape = (clf.datahandler.X_train.shape[1:])
clf.datahandler.num_classes = 1
clf.datahandler.labels = ["sizes"]

print("X.shape =", clf.datahandler.X.shape)
print("y.shape =", clf.datahandler.y.shape)
print("X_train.shape =", clf.datahandler.X_train.shape)
print("y_train.shape =", clf.datahandler.y_train.shape)
print("X_val.shape =", clf.datahandler.X_val.shape)
print("y_val.shape =", clf.datahandler.y_val.shape)
print("X_test.shape =", clf.datahandler.X_test.shape)
print("y_test.shape =", clf.datahandler.y_test.shape)
print("\n")

# shrunk = []
# for i in range(clf.datahandler.no_of_examples):
#     child_position = np.where(X_new[i] > 0.2)
#     min_height, max_height = np.min(child_position[0]), np.max(child_position[0])
#     shrunk.append(max_height - min_height)
# shrunk = np.expand_dims(np.array(shrunk),-1)

for i in range(5):
    r = i#np.random.randint(0,X.shape[0]) 
    fig, ax = plt.subplots(nrows=1, ncols=2)
    
    child_position = np.where(X[r] > 0.2)
    min_height, max_height = np.min(child_position[0]), np.max(child_position[0])
    min_height_hor = child_position[1][np.argmin(child_position[0])]
    max_height_hor = child_position[1][np.argmax(child_position[0])]
    child_position_new = np.where(X_new[r] > 50/255.0)
    min_height_new, max_height_new = np.min(child_position_new[0]), np.max(child_position_new[0])
    min_height_hor_new = child_position_new[1][np.argmin(child_position_new[0])]
    max_height_hor_new = child_position_new[1][np.argmax(child_position_new[0])]
    real_size = int(y[r])
    shrunk_size = max_height_new - min_height_new
    print(f"Child no. {r}, real size: {real_size}, new size: {shrunk_size}, factor: {np.round(real_size/shrunk_size,2)}")

    _ = ax[0].imshow(np.squeeze(X[r]))
    _ = ax[0].scatter(min_height_hor, min_height,  s=50, c='red', marker='.')
    _ = ax[0].scatter(max_height_hor, max_height,  s=50, c='blue', marker='.')
    _ = ax[1].imshow(np.squeeze(X_new[r]))
    _ = ax[1].scatter(min_height_hor_new, min_height_new,  s=50, c='red', marker='.')
    _ = ax[1].scatter(max_height_hor_new, max_height_new,  s=50, c='blue', marker='.')
    plt.show()

##### Housing prices

In [None]:
clf.datahandler.train_test_split = 0.1
clf.datahandler.train_val_split = 0.1
clf.datahandler.no_of_examples = 100

from keras.datasets import boston_housing
(X_train, y_train), (X_test, y_test) = boston_housing.load_data()

X = np.expand_dims(
    np.concatenate((X_train, X_test), axis=0), 
    -1)[:no_of_examples]
y = np.expand_dims(
    np.concatenate((y_train, y_test), axis=0),
    -1)[:no_of_examples]
clf.datahandler.X, clf.datahandler.y = X, y

(
    X_train,
    X_val,
    X_test,
    y_train,
    y_val,
    y_test,
 ) = clf.datahandler._split_test_val_train(X, y)


clf.datahandler.X_train, clf.datahandler.y_train = X_train, y_train
clf.datahandler.X_val, clf.datahandler.y_val = X_val, y_val
clf.datahandler.X_test, clf.datahandler.y_test = X_test, y_test
clf.datahandler.input_shape = (clf.datahandler.X_train.shape[1:])
clf.datahandler.num_classes = 1
clf.datahandler.labels = ["prizes"]

print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)
print("X_test.shape =", X_test.shape)
print("y_test.shape =", y_test.shape)

### Load the model

In [None]:
### Currently not working, does not load prior/posterior distributions ####
from tensorflow.python.keras import backend as K
clf.load_model(compile_model = True)
### Come back later to check on this ###

In [None]:
### Current alternative ### 
# Use the same model defined above and load the weights independently
# Need to run the cell with the definition of the model class above once
model_class = BayesianCNN # CHANGE HERE

kl_divergence_fn = (
    lambda q, p, _: (tfp.distributions.kl_divergence(q, p)) /
    tf.cast(clf.datahandler.X_train.shape[0], dtype=tf.float32)
    )

clf.model = model_class(inputshape=clf.datahandler.input_shape,
                        num_classes=clf.datahandler.num_classes,
                        kl_divergence_fn=kl_divergence_fn,
                        task=clf.task)

# LOAD WEIGHTS
weights_file = os.path.join(clf.logging.model_dir,
                            "weights.h5")
clf.model.load_weights(weights_file)

# Compile and summarize the model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanAbsoluteError, MeanSquaredError, CategoricalCrossentropy, BinaryCrossentropy
from tensorflow.keras.metrics import MeanSquaredError, CategoricalCrossentropy, BinaryAccuracy
    
learning_rate = clf.logging.hyperparams["learning_rate"]
optimizer = Adam(learning_rate = learning_rate) 

if clf.task == "regression":
    loss = MeanAbsoluteError()
    #loss = MeanSquaredError()
    metrics=[MeanSquaredError(name="mse")]
    
elif clf.task == "classification":
    loss = CategoricalCrossentropy()
    metrics = [CategoricalCrossentropy(name="accuracy")]
    
elif clf.task == "multi_class_detection":
    loss = BinaryCrossentropy()
    metrics = metrics = [BinaryAccuracy(name="accuracy", threshold=0.7)]
    
clf.model.compile(loss=loss,
                  optimizer=optimizer,
                  metrics=metrics)

### Summarize the model

In [None]:
# Plot summary and save model plot.
clf.summary()
clf.save_and_print_model_image()

### Show current weight distributions

In [None]:
clf.plot_weight_distribution(kind="prior", to_file=True)
clf.plot_weight_distribution(kind="posterior", to_file=True)

### Show predictions with current model

In [None]:
no_of_predictions = 10

print("Train:")
prob_pred_train_intermediate = clf.predict_probabilistic(
    dataset="train",
    no_of_predictions=no_of_predictions
)

for i, pred in enumerate(prob_pred_train_intermediate[:10]):
   print(f"Ground truth: {np.round(clf.datahandler.y_train[i],3)},",
         f"Mean prediction: {np.mean(pred, axis = 0)} +/- {np.std(pred, axis = 0)}")

print("Test:")
prob_pred_test_intermediate = clf.predict_probabilistic(
    dataset="test",
    no_of_predictions=no_of_predictions
)

for i, pred in enumerate(prob_pred_test_intermediate[:10]):
   print(f"Ground truth: {np.round(clf.datahandler.y_test[i],3)},",
         f"Mean prediction: {np.mean(pred, axis = 0)} +/- {np.std(pred, axis = 0)}")

### Train

In [None]:
epochs = 1000

#new_learning_rate = 1e-05

hist = clf.train(checkpoint=True,
                 early_stopping=False,
                 tb_log=True, 
                 csv_log=True,
                 hyperparam_log=True,
                 epochs=epochs, 
                 batch_size=clf.logging.hyperparams['batch_size'],
                 verbose=2,)
                 #new_learning_rate=new_learning_rate)

### Plot loss

In [None]:
graph = clfutils.TrainingGraphs(clf.logging.history, clf.logging.fig_dir)
graph.plot_loss(to_file = True)
if clf.task == "regression":
    graph.plot_mse(to_file = True)
else:
    graph.plot_accuracy(to_file = False)

### Evaluate on test data

In [None]:
if clf.task == "regression":
    test_loss = clf.evaluate()
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))
    
else:
    score = clf.evaluate()
    test_loss, test_accuracy = score[0], score[1]
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))
    print('Test accuracy: ' + str(np.round(test_accuracy, decimals=3)))

###  Predict on train and test data

In [None]:
no_of_predictions = 100

print("Train:")
prob_pred_train_intermediate = clf.predict_probabilistic(
    dataset="train",
    no_of_predictions=no_of_predictions
)

for i, pred in enumerate(prob_pred_train_intermediate[:10]):
   print(f"Ground truth: {np.round(clf.datahandler.y_train[i],3)},",
         f"Mean prediction: {np.mean(pred, axis = 0)} +/- {np.std(pred, axis = 0)}")

print("Test:")
prob_pred_test_intermediate = clf.predict_probabilistic(
    dataset="test",
    no_of_predictions=no_of_predictions
)

for i, pred in enumerate(prob_pred_test_intermediate[:10]):
   print(f"Ground truth: {np.round(clf.datahandler.y_test[i],3)},",
         f"Mean prediction: {np.mean(pred, axis = 0)} +/- {np.std(pred, axis = 0)}")

### Show some predictions

#### 10 random training samples

In [None]:
clf.plot_random(no_of_spectra=10, dataset='train', with_prediction=True)  

#### 10 random test samples

In [None]:
clf.plot_random(no_of_spectra=10, dataset='test', with_prediction=True)    

### Show wrong/worst predictions

In [None]:
if clf.task == 'classification':
    clf.show_wrong_classification()
elif clf.task == 'regression':
    clf.show_worst_predictions(no_of_spectra=20)  

### Show posterior weight distribution after training update

In [None]:
clf.plot_weight_distribution(kind="posterior", to_file=True)

### Show distribution of probabilistic predictions

In [None]:
clf.plot_prob_predictions(dataset="test",
                          no_of_spectra=20,
                          to_file=True)

### Plot updates to weight and bias uncertainties

In [None]:
init_model = BayesianCNN(
    inputshape=clf.datahandler.input_shape,
    num_classes=clf.datahandler.num_classes,
    kl_divergence_fn=kl_divergence_fn,
    task=clf.task)

trained_model = CNN(
    inputshape=clf.datahandler.input_shape,
    num_classes=clf.datahandler.num_classes,
    task=clf.task)

weights_file = os.path.join(os.getcwd(),
                            "weights.h5")
trained_model.load_weights(weights_file)

init_bayesian_conv_layers = [layer for layer in init_model.layers if "Conv1DFlipout" in str(layer.__class__)]
trained_conv_layers = [layer for layer in trained_model.layers if "Conv1D" in str(layer.__class__)]
trained_bayesian_conv_layers = [layer for layer in clf.model.layers if "Conv1DFlipout" in str(layer.__class__)]

init_bayesian_dense_layers = [layer for layer in init_model.layers if "Dense" in str(layer.__class__)]
trained_dense_layers = [layer for layer in trained_model.layers if "Dense" in str(layer.__class__)]
trained_bayesian_dense_layers = [layer for layer in clf.model.layers if "Dense" in str(layer.__class__)]

In [None]:
## Plot weight uncertainties and updates

import itertools
import seaborn as sns
import matplotlib.colors as mcolors
import matplotlib.pyplot as plt

from matplotlib.cm import get_cmap
names = "Set1", "Set2", "Accent", "Dark2"
colors = []
for name in names:
    cmap = get_cmap(name)  # type: matplotlib.colors.ListedColormap
    colors.extend(cmap.colors)  # type: list

nrows = len(init_bayesian_conv_layers)
ncols=12

fig1, axs1 = plt.subplots(nrows, ncols, figsize=(40,20))
fig2, axs2 = plt.subplots(nrows, ncols, figsize=(40,10))

for l, (layer_init, layer_trained, bayesian_layer_trained) in enumerate(zip(init_bayesian_conv_layers, trained_conv_layers, trained_bayesian_conv_layers)): 
    print(f"\n {layer_init.name}")
    posterior_mean_init = layer_init.kernel_posterior.mean().numpy().transpose(2,1,0)
    posterior_stddev_init = layer_init.kernel_posterior.stddev().numpy().transpose(2,1,0)

    posterior_mean_trained = bayesian_layer_trained.kernel_posterior.mean().numpy().transpose(2,1,0)
    posterior_stddev_trained = bayesian_layer_trained.kernel_posterior.stddev().numpy().transpose(2,1,0)

    kernel_trained = layer_trained.get_weights()[0].transpose(2,1,0)
    
    for f, (posterior_filter_mean_init,
            posterior_filter_stddev_init,
            posterior_filter_mean_trained,
            posterior_filter_stddev_trained,
            ) in enumerate(zip(posterior_mean_init, posterior_stddev_init, posterior_mean_trained, posterior_stddev_trained)):
        print(f"Filter {f}")
        mean_posterior_kernel_mean_init = float(np.mean(posterior_filter_mean_init))
        mean_posterior_kernel_stddev_init = float(np.mean(posterior_filter_stddev_init))
 
        mean_posterior_kernel_mean_trained = float(np.mean(posterior_filter_mean_trained))
        mean_posterior_kernel_stddev_trained = float(np.mean(posterior_filter_stddev_trained)) 

        weight_dist_init = tfp.distributions.Normal(loc=mean_posterior_kernel_mean_init, scale=mean_posterior_kernel_stddev_init)
        weight_dist_trained = tfp.distributions.Normal(loc=mean_posterior_kernel_mean_trained, scale=mean_posterior_kernel_stddev_trained)
        
        color_iter = itertools.cycle(colors)

        _ = sns.histplot(weight_dist_init.sample(10000), ax=axs1[l,f], bins=50, color="b", kde=True)
        _ = sns.histplot(weight_dist_trained.sample(10000), ax=axs1[l,f], bins=50, color="r", kde=True)

        _ = axs1[l,f].set_xlim([-3,3])
        _ = axs1[l,f].set_title(f"{layer_init.name}, f{f}", fontdict={'fontsize': 10})
        _ = axs1[l,f].legend(["init", "trained"])

        for k, (kernel_mean_init, kernel_stddev_init, kernel_mean_trained,  kernel_stddev_trained) in enumerate(zip(posterior_filter_mean_init, posterior_filter_stddev_init, posterior_filter_mean_trained, posterior_filter_stddev_trained)):
            c = next(color_iter)
            trained_weight = np.mean(kernel_trained[f], axis=1)[k]
            _ = axs2[l, f].plot(np.array([kernel_mean_init, kernel_mean_trained]), np.array([0,1]), color=c)
            _ = axs2[l, f].scatter(trained_weight, np.array(2), color=c)
            _ = axs2[l,f].set_title(f"{layer_init.name}, f{f}", fontdict={'fontsize': 10})
            _ = axs2[l,f].set_yticks([0, 1, 2]) 
            _ = axs2[l, f].set_yticklabels(["init", "trained_bayesian","trained"])
            #print(f"Kernel {k}: Init posterior:  {kernel_mean_init} ± {kernel_stddev_init}, Trained posterior:  {kernel_mean_trained} ± {kernel_stddev_trained}")

        #print(f"Init posterior:  {posterior_filter_mean_init} ± {posterior_filter_stddev_init}, Trained posterior:  {posterior_filter_mean_trained} ± {posterior_filter_stddev_trained}")
        print(f"Init posterior:  {mean_posterior_kernel_mean_init} ± {mean_posterior_kernel_stddev_init}, Trained posterior:  {mean_posterior_kernel_mean_trained} ± {mean_posterior_kernel_stddev_trained}")

print("Fig. 1: Samples from posterior kernel distributions (averaged across filter)")
print("Fig. 2: Shift of posterior kernel means after training.")

fig1.tight_layout()
fig2.tight_layout()
plt.show()

In [None]:
## Plot bias uncertainties and updates

import matplotlib.colors as mcolors
import matplotlib.pyplot as plt

from matplotlib.cm import get_cmap
names = "Set1", "Set2"
colors = []
for name in names:
    cmap = get_cmap(name)  # type: matplotlib.colors.ListedColormap
    colors.extend(cmap.colors)  # type: list

no_of_layers = len(init_bayesian_conv_layers)

fig3, axs3 = plt.subplots(nrows=no_of_layers, ncols=12, figsize=(20,10))
fig4, axs4 = plt.subplots(nrows=1, ncols=no_of_layers, figsize=(20,5))

for l, (layer_init, layer_trained, bayesian_layer_trained) in enumerate(zip(init_bayesian_conv_layers, trained_conv_layers, trained_bayesian_conv_layers)): 
    print(f"\n {layer_init.name}")

    biases_trained = layer_trained.get_weights()[1]
       
    posterior_mean_init = layer_init.bias_posterior.mean().numpy()
    posterior_stddev_init = layer_init.bias_posterior.stddev().numpy()

    posterior_mean_trained = bayesian_layer_trained.bias_posterior.mean().numpy()
    posterior_stddev_trained = bayesian_layer_trained.bias_posterior.stddev().numpy()

    legend = [f"Filter {i}" for i in range(posterior_mean_init.shape[0])]    
    color_iter = iter(colors)

    for b, (bias_posterior_mean_init, bias_posterior_stddev_init, bias_posterior_mean_trained,  bias_posterior_stddev_trained) in enumerate(zip(posterior_mean_init, posterior_stddev_init, posterior_mean_trained, posterior_stddev_trained)):       
        print(f" Filter {b}")
        bias_dist_init = tfp.distributions.Normal(loc=bias_posterior_mean_init, scale=bias_posterior_stddev_init)
        bias_dist_trained = tfp.distributions.Normal(loc=bias_posterior_mean_trained, scale=bias_posterior_stddev_trained)
        
        _ = sns.histplot(bias_dist_init.sample(10000), ax=axs3[l,b], bins=50, color="b") #kde=True)
        _ = sns.histplot(bias_dist_trained.sample(10000), ax=axs3[l,b], bins=50, color="r") #kde=True)
        _ = axs3[l, b].legend(["init", "trained"])  
        _ = axs3[l, b].set_title(f"{layer_init.name}, f{b}", fontdict={'fontsize': 10})
        
        c = next(color_iter)
        _ = axs4[l].plot(np.array([bias_posterior_mean_init, bias_posterior_mean_trained]), np.array([0,1]), color=c)
        _ = axs4[l].scatter(np.array(biases_trained[b]), np.array(2), color=c)
        _ = axs2[l,f].set_yticks([0, 1, 2]) 
        _ = axs2[l, f].set_yticklabels(["init", "trained_bayesian","trained"])
        _ = axs4[l].set_title(f"Layer {l}")
        _ = axs4[l].legend(legend)

        print(f"Init posterior:  {bias_posterior_mean_init} ± {bias_posterior_stddev_init}, Trained posterior:  {bias_posterior_mean_trained} ± {bias_posterior_stddev_trained}")

print("Fig. 1: Samples from posterior bias distribution")
print("Fig. 2: Shift of posterior bias means after training.")
fig3.tight_layout()
fig4.tight_layout()
plt.show()

### Save model and data

In [None]:
#clf.save_model()
clf.pickle_results()

### Generate report

In [None]:
dir_name = clf.time + '_' + clf.exp_name
rep = clfutils.Report(dir_name)  
rep.write()

## Prepare website upload

In [None]:
from xpsdeeplearning.network.prepare_upload import Uploader

dataset_path = clf.logging.hyperparams["input_filepath"].rsplit(".",1)[0] + "_metadata.json"
uploader = Uploader(clf.logging.root_dir, dataset_path)
uploader.prepare_upload_params()
uploader.save_upload_params()

## Save output of notebook

In [None]:
from IPython.display import Javascript, display
from nbconvert import HTMLExporter

def save_notebook():
    display(Javascript("IPython.notebook.save_notebook()"),
            include=['application/javascript'])

def output_HTML(read_file, output_file):
    import codecs
    import nbformat
    exporter = HTMLExporter()
    # read_file is '.ipynb', output_file is '.html'
    output_notebook = nbformat.read(read_file, as_version=4)
    output, resources = exporter.from_notebook_node(output_notebook)
    codecs.open(output_file, 'w', encoding='utf-8').write(output)

import time
import os

time.sleep(20)
save_notebook()
print('Notebook saved!')
time.sleep(30)
current_file = '/content/drive/My Drive/deepxps/xpsdeeplearning/notebooks/train_prob.ipynb'
output_file = os.path.join(clf.logging.log_dir,'train_prob_out.html')
output_HTML(current_file, output_file)
print('HTML file saved!')