# Train a deep CNN on XPS data on Google Colab

In this notebook, we will train a deep convolutional network on iron XPS spectra made up of linear combinations of single iron reference spectra.

## Setup

### Mount google drive, change working directory

In [None]:
# Mount drive
from google.colab import drive
import os

drive.mount('/content/drive')

# Change working path
os.chdir('/content/drive/My Drive/deepxps')

### Install packages and import modules

In [None]:
%%capture
# Install packages
!pip install python-docx

# Import standard modules and magic commands
import datetime
import numpy as np
import pytz
import importlib

# Set random seed for reproducible loading
np.random.seed(502)

# Magic commands
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Disable tf warnings
os.environ['TF_CPP_MIN_LOG_LEVEL']='3'

import tensorflow as tf
import tensorflow_probability as tfp
tfd = tfp.distributions
tf.keras.backend.clear_session()

### Check TensorFlow and TensorFlow Probability versions

In [None]:
f"TF version: {tf.__version__}."
f"TFP version: {tfp.__version__}."

### Check hardware

In [None]:
from tensorflow.python.profiler import profiler_client

if tf.test.gpu_device_name():
    print("Found GPU: {}".format(tf.test.gpu_device_name()))
else:
    print("Found no GPU.")
try:
    tpu_profile_service_address = os.environ['COLAB_TPU_ADDR'].replace('8470', '8466')
    print("Found TPU: {}".format(profiler_client.monitor(tpu_profile_service_address, 100, 2)))
except:
    print("Found no TPU.")

## Initial training

### Load custom modules

In [None]:
try:
    importlib.reload(classifier)
    importlib.reload(clfutils)
    print("Modules were reloaded.")
except:
    import xpsdeeplearning.network.classifier as classifier
    import xpsdeeplearning.network.utils as clfutils
    print("Modules were loaded.")

### Set up the parameters & folder structure



In [None]:
time = datetime.datetime.now().astimezone(pytz.timezone('Europe/Berlin')).strftime("%Y%m%d_%Hh%Mm")
exp_name = "Ni_2_classes_long_linear_comb_small_gas_phase_regression_CNN_bayesian"
#exp_name = "MNIST_bayesian_classification"
#exp_name = "shrunken_babys_bayesian_regression"

clf = classifier.Classifier(time=time,
                            exp_name=exp_name,
                            task="classification",
                            intensity_only=True)

### If labels not saved with data ###
# =============================================================================
# labels = ['Fe metal', 'FeO', 'Fe3O4', 'Fe2O3']
# clf = classifier.Classifier(time=time,
#                            exp_name=exp_name,
#                            task='regression',
#                            intensity_only=True,
#                            labels=labels)
# =============================================================================

### Load and inspect the data

In [None]:
input_filepath = r'/content/drive/My Drive/deepxps/datasets/20210528_Ni_linear_combination_small_gas_phase.h5'

train_test_split = 0.2
train_val_split = 0.2
no_of_examples = 1000#20#00000

X_train, X_val, X_test, y_train, y_val, y_test,\
    aug_values_train, aug_values_val, aug_values_test =\
        clf.load_data_preprocess(input_filepath=input_filepath,
                                 no_of_examples=no_of_examples,
                                 train_test_split=train_test_split,
                                 train_val_split=train_val_split)
               
# Check how the examples are distributed across the classes.
class_distribution = clf.datahandler.check_class_distribution(clf.task)
clf.plot_class_distribution()
clf.plot_random(no_of_spectra=10, dataset='train')  

In [None]:
### Only use classification data
if clf.task == "classification":
    indices_train = np.where(clf.datahandler.y_train == 0.0)[0]
    indices_val = np.where(clf.datahandler.y_val == 0.0)[0]
    indices_test= np.where(clf.datahandler.y_test == 0.0)[0]

    X_train, y_train = clf.datahandler.X_train[indices_train], clf.datahandler.y_train[indices_train]
    X_val, y_val = clf.datahandler.X_val[indices_val], clf.datahandler.y_val[indices_val]
    X_test, y_test = clf.datahandler.X_test[indices_test], clf.datahandler.y_test[indices_test]

    clf.datahandler.X_train, clf.datahandler.y_train = X_train, y_train
    clf.datahandler.X_val, clf.datahandler.y_val = X_val, y_val 
    clf.datahandler.X_test, clf.datahandler.y_test = X_test, y_test

    #clf.datahandler.y_train[0:5]
    #clf.datahandler.y_val[0:5]
    #clf.datahandler.y_test[0:5]

    num_train = 250
    num_val = 50
    num_test = 50

    clf.datahandler.X_train, clf.datahandler.y_train = clf.datahandler.X_train[0:num_train], clf.datahandler.y_train[0:num_train]
    clf.datahandler.X_val, clf.datahandler.y_val = clf.datahandler.X_train[0:num_val], clf.datahandler.y_train[0:num_val]
    clf.datahandler.X_test, clf.datahandler.y_test = clf.datahandler.X_train[0:num_test], clf.datahandler.y_train[0:num_test]
    clf.plot_random(no_of_spectra = 10, dataset = 'train')  

    print(f"Remaining no. of training examples: {clf.datahandler.y_train.shape[0]}")
    print(f"Remaining no. of val examples: {clf.datahandler.y_val.shape[0]}")
    print(f"Remaining no. of test examples: {clf.datahandler.y_test.shape[0]}")

elif clf.task == "regression":
    print("Dataset was not changed.")

In [None]:
### Loads MNIST dataset.###
import matplotlib.pyplot as plt
num_classes = 10

batch_size = 128
val_split = 0.2


print('Loading MNIST dataset')
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()

X_train = np.expand_dims(X_train, -1)[:3000]
X_test = np.expand_dims(X_test, -1)[:500]

y_train = tf.keras.utils.to_categorical(y_train, num_classes)[:3000]
y_test = tf.keras.utils.to_categorical(y_test, num_classes)[:500]

# Normalize data
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255

# Train-val split
num_train = int((1-val_split)*X_train.shape[0])
(X_train, X_val) = X_train[:num_train], X_train[num_train:]
(y_train, y_val) = y_train[:num_train], y_train[num_train:]


clf.datahandler.X_train, clf.datahandler.y_train = X_train, y_train
clf.datahandler.X_val, clf.datahandler.y_val = X_val, y_val
clf.datahandler.X_test, clf.datahandler.y_test = X_test, y_test
clf.datahandler.input_shape =  (clf.datahandler.X_train.shape[1:])
clf.datahandler.num_classes = num_classes
clf.datahandler.labels = list(range(num_classes))

print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)
print("X_test.shape =", X_test.shape)
print("y_test.shape =", y_test.shape)

plt.imshow(X_train[0, :, :, 0], cmap='gist_gray')
plt.show()

In [None]:
## Loads shrunken baby dataset
from PIL import Image
import glob
import pandas as pd
import matplotlib.pyplot as plt

val_split = 0.1
test_split = 0.1
no_of_examples = 100#0

image_paths = glob.glob(r"/content/drive/My Drive/deepxps/datasets/shrunken_baby_ds/*.png")

X = [np.array(Image.open(im)) for im in image_paths]
X = [np.expand_dims(image, -1) for image in X] # add extra dimension to each image (126,126) --> (126,126,1)
X = np.array(X)[:no_of_examples] # convert list of images to single array [(126,126,1)] --> (836, 126, 126, 1)

from skimage.measure import block_reduce

X = X[:,5:-21,13:-13,:]
new_X = []
for image in X:
    reduced_image = block_reduce(image,
                                 block_size=(3, 3, 1),
                                 func=np.mean)
    new_X.append(reduced_image)
X = np.array(new_X)
X /= np.max(X, axis=1)

y = pd.read_csv(r"/content/drive/My Drive/deepxps/datasets/shrunken_baby_ds/shrunken_baby_labels.csv").to_numpy()[:no_of_examples]

# Normalize data
#X = X.astype('float32') / 255
#y = y.astype('float32')/np.max(y)

# Train-test split
num_train_val = int((1-test_split)*X.shape[0])
(X_train_val, X_test) = X[:num_train_val], X[num_train_val:]
(y_train_val, y_test) = y[:num_train_val], y[num_train_val:]

# Train-val split
num_train = int((1-val_split)*X_train_val.shape[0])
(X_train, X_val) = X_train_val[:num_train], X_train_val[num_train:]
(y_train, y_val) = y_train_val[:num_train], y_train_val[num_train:]

clf.datahandler.X, clf.datahandler.y = X, y
clf.datahandler.X_train, clf.datahandler.y_train = X_train, y_train
clf.datahandler.X_val, clf.datahandler.y_val = X_val, y_val
clf.datahandler.X_test, clf.datahandler.y_test = X_test, y_test
clf.datahandler.input_shape =  (clf.datahandler.X_train.shape[1:])
clf.datahandler.num_classes = 1
clf.datahandler.labels = ["sizes"]

print("No. of examples: ", X.shape)
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)
print("X_val.shape =", X_val.shape)
print("y_val.shape =", y_val.shape)
print("X_test.shape =", X_test.shape)
print("y_test.shape =", y_test.shape)

for i in range(10):
    r = np.random.randint(0,X.shape[0])
    plt.imshow(np.squeeze(X[r]))
    plt.show()

### Design the model

In [None]:
try:
    importlib.reload(models)
    print("Models module was reloaded.")
except:
    import xpsdeeplearning.network.models as models
    print("Models module was loaded.")

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K
from tensorflow_probability.python.layers import util as tfp_layers_util

class BayesianCNN(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    This is to be used for regression on all labels. -> sigmoid 
    activation in the last layer.
    """
    def __init__(
        self, 
        inputshape,
        num_classes,
        kl_divergence_fn,
        task,
        ):
        if len(inputshape) == 2:
            conv_layer = tfp.layers.Convolution1DFlipout
            strides = 1
            average_pool_layer = layers.AveragePooling1D

        elif len(inputshape) == 3:
            conv_layer = tfp.layers.Convolution2DFlipout
            strides = (1,1)
            average_pool_layer =  layers.AveragePooling2D

        if task == "regression":
            if num_classes == 1:
                output_act = None
            else:
                output_act = "sigmoid"
        elif task == "classification":
            output_act = "softmax"

        self.input_1 = layers.Input(shape = inputshape)
                
        self.conv_1_short = conv_layer(
            filters=12,
            kernel_size=5,
            strides=strides,
            padding='same',
            kernel_divergence_fn=kl_divergence_fn,
            activation='relu',
            name='conv_1_short')(self.input_1)
        self.conv_1_medium = conv_layer(
            filters=12,
            kernel_size=10,
            strides=strides,
            padding='same',
            kernel_divergence_fn=kl_divergence_fn,
            activation='relu',
            name='conv_1_medium')(self.input_1)
        self.conv_1_long = conv_layer(
            filters=12,
            kernel_size=15,
            strides=strides,
            padding='same',
            kernel_divergence_fn=kl_divergence_fn,
            activation='relu',
            name='conv_1_long')(self.input_1)
        
        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers)

        self.conv_2 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            kernel_divergence_fn=kl_divergence_fn,
            activation='relu',
            name='conv_2')(merged_sublayers)
        self.conv_3 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            kernel_divergence_fn=kl_divergence_fn,
            activation='relu',
            name="conv_3")(self.conv_2)
        self.average_pool_1 = average_pool_layer(
            name='average_pool_1')(self.conv_3)
        
        self.flatten_1 = layers.Flatten(name='flatten1')(self.average_pool_1)
        self.drop_1 = layers.Dropout(rate=0.2,
                                     name='drop_1')(self.flatten_1)
        self.dense_1 = tfp.layers.DenseFlipout(
            units=4000,
            kernel_divergence_fn=kl_divergence_fn,
            activation='relu',
            name='dense_1')(self.flatten_1)
                           
        self.dense_2 = tfp.layers.DenseFlipout(
            units=num_classes,
            kernel_divergence_fn=kl_divergence_fn,
            activation=output_act,
            name='dense_2')(self.dense_1)

        no_of_inputs = len(sublayers)

        super(BayesianCNN, self).__init__(
            inputs=self.input_1,
            outputs=self.dense_2,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='BayesianCNN')

#### Not used

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K

tf.keras.backend.clear_session()
class ProbabilityCNNTest2D(models.EmptyModel):
    """
    
    This is to be used for regression on all labels. -> sigmoid 
    activation in the last layer.
    """
    def __init__(self, inputshape, num_classes, kl_divergence_function):   
        self.input_1 = tf.keras.Input(shape = inputshape)
        self.conv_1 = tfp.layers.Convolution2DFlipout(
            filters=16,
            kernel_size=5,
            strides=(1,1),
            padding="same", 
            activation="relu",
            name="conv_1", 
            kernel_divergence_fn=kl_divergence_function)(self.input_1)    
        self.mp_1 = layers.MaxPool2D(
            strides=(4,4), 
            pool_size=(4,4), 
            padding="same")(self.conv_1)
        self.conv_2 = tfp.layers.Convolution2DFlipout(
            filters=32,
            kernel_size=3, 
            strides=(1,1),
            padding="same",
            activation="relu", 
            name="conv_2",
            kernel_divergence_fn=kl_divergence_function)(self.mp_1)
        self.mp_2 = layers.MaxPool2D(
            strides=(4,4), 
            pool_size=(4,4), 
            padding="same")(self.conv_2)
        self.flatten = layers.Flatten()(self.mp_2)
        self.dense_1 = tfp.layers.DenseFlipout(
            units=num_classes,
            kernel_divergence_fn=kl_divergence_function)(self.flatten)
        no_of_inputs = 1

        super(ProbabilityCNNTest2D, self).__init__(inputs=self.input_1,
                                                 outputs=self.dense_1,
                                                 inputshape=inputshape,
                                                 num_classes=num_classes,
                                                 no_of_inputs=no_of_inputs,
                                                 name='ProbabilityCNNTest2D')

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K

class ProbabilisticClassificationCNN(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    This is to be used for classification -> softmax activation in the
    last layer.
    """

    def __init__(self,
                 inputshape, 
                 num_classes,
                 kl_divergence_function):

        self.input_1 = layers.Input(shape=inputshape)

        self.conv_1_short = tfp.layers.Convolution1DFlipout(
            filters=4,
            kernel_size=5,
            strides=1,
            padding="same",
            kernel_divergence_fn=kl_divergence_function,
            bias_divergence_fn=kl_divergence_function,
            activation="relu",
            name="conv_1_short",
        )(self.input_1)
        self.conv_1_medium = tfp.layers.Convolution1DFlipout(
            filters=4,
            kernel_size=10,
            strides=1,
            padding="same",
            kernel_divergence_fn=kl_divergence_function,
            bias_divergence_fn=kl_divergence_function,
            activation="relu",
            name="conv_1_medium",
        )(self.input_1)
        self.conv_1_long = tfp.layers.Convolution1DFlipout(
            filters=4,
            kernel_size=15,
            strides=1,
            kernel_divergence_fn=kl_divergence_function,
            bias_divergence_fn=kl_divergence_function,
            padding="same",
            activation="relu",
            name="conv_1_long",
        )(self.input_1)

        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers)

        self.conv_2 = tfp.layers.Convolution1DFlipout(
            filters=4,
            kernel_size=10,
            strides=1,
            kernel_divergence_fn=kl_divergence_function,
            bias_divergence_fn=kl_divergence_function,
            padding="valid",
            activation="relu",
            name="conv_2",
        )(merged_sublayers)
        self.conv_3 = tfp.layers.Convolution1DFlipout(
            filters=10,
            kernel_size=10,
            strides=1,
            padding="valid",
            activation="relu",
            name="conv_3",
        )(self.conv_2)
        self.average_pool_1 = layers.AveragePooling1D(name="average_pool_1")(
            self.conv_3
        )
        self.flatten_1 = layers.Flatten(name="flatten1")(self.average_pool_1)
        self.drop_1 = layers.Dropout(rate=0.2, name="drop_1")(self.flatten_1)
        self.dense_1 = tfp.layers.DenseFlipout(
            units=1000,
            kernel_divergence_fn=kl_divergence_function,
            bias_divergence_fn=kl_divergence_function, 
            activation="relu",
            name="dense_1"
        )(self.drop_1)
        self.dense_2 = tfp.layers.DenseFlipout(
            units=num_classes,
            kernel_divergence_fn=kl_divergence_function,
            bias_divergence_fn=kl_divergence_function, 
            activation="softmax", 
            name="dense_2"
        )(self.dense_1)
        #self.dist_outputs = tfp.layers.OneHotCategorical(
        #    event_size=num_classes,
        #    convert_to_tensor_fn=tfp.distributions.Distribution.sample,
        #    )(self.dense_2)
        
        no_of_inputs = len(sublayers)

        super(ProbabilisticClassificationCNN, self).__init__(
            inputs=self.input_1,
            outputs=self.dense_2,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name="ProbabilisticClassificationCNN",
        )

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K

class ProbabilisticClassificationCNN2D(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    This is to be used for classification -> softmax activation in the
    last layer.
    
    2D model for e.g. MNIST.
    Implements Bayes by Backprop.
    """

    def __init__(self,
                 inputshape, 
                 num_classes,
                 kl_divergence_function):

        self.input_1 = layers.Input(shape=inputshape)

        self.conv_1_short = tfp.layers.Convolution2DFlipout(
            filters=4,
            kernel_size=5,
            strides=1,
            padding="same",
            kernel_divergence_fn=kl_divergence_function,
            bias_divergence_fn=kl_divergence_function,
            activation="relu",
            name="conv_1_short",
        )(self.input_1)
        self.conv_1_medium = tfp.layers.Convolution2DFlipout(
            filters=4,
            kernel_size=10,
            padding="same",
            kernel_divergence_fn=kl_divergence_function,
            bias_divergence_fn=kl_divergence_function,
            activation="relu",
            name="conv_1_medium",
        )(self.input_1)
        self.conv_1_long = tfp.layers.Convolution2DFlipout(
            filters=4,
            kernel_size=15,
            kernel_divergence_fn=kl_divergence_function,
            bias_divergence_fn=kl_divergence_function,
            padding="same",
            activation="relu",
            name="conv_1_long",
        )(self.input_1)

        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers)

        self.conv_2 = tfp.layers.Convolution2DFlipout(
            filters=4,
            kernel_size=5,
            kernel_divergence_fn=kl_divergence_function,
            bias_divergence_fn=kl_divergence_function,
            padding="valid",
            activation="relu",
            name="conv_2",
        )(merged_sublayers)

        self.average_pool_1 = layers.AveragePooling2D(name="average_pool_1")(
            self.conv_2
        )

        self.flatten_1 = layers.Flatten(name="flatten1")(self.average_pool_1)
        self.drop_1 = layers.Dropout(rate=0.2, name="drop_1")(self.flatten_1)
        self.dense_1 = tfp.layers.DenseFlipout(
            units=1000,
            kernel_divergence_fn=kl_divergence_function,
            bias_divergence_fn=kl_divergence_function, 
            activation="relu",
            name="dense_1"
        )(self.drop_1)
        self.dense_2 = tfp.layers.DenseFlipout(
            units=num_classes,
            kernel_divergence_fn=kl_divergence_function,
            bias_divergence_fn=kl_divergence_function, 
            activation="softmax", 
            name="dense_2"
        )(self.dense_1)
        
        no_of_inputs = len(sublayers)

        super(ProbabilisticClassificationCNN2D, self).__init__(
            inputs=self.input_1,
            outputs=self.dense_2,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name="ProbabilisticClassificationCNN2D",
        )

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K

class ProbabilisticCNN2D(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    This is to be used for regression on all labels. -> sigmoid 
    activation in the last layer.
    """
    def __init__(self, 
                 inputshape,
                 num_classes,
                 kl_divergence_function,
                 ):
        self.input_1 = layers.Input(shape = inputshape)
                
        self.conv_1_short = tfp.layers.Convolution2DFlipout(
            filters=12,
            kernel_size=5,
            strides=(1,1),
            padding='same',
            kernel_divergence_fn=kl_divergence_function,
            activation='relu',
            name='conv_1_short')(self.input_1)
        self.conv_1_medium = tfp.layers.Convolution2DFlipout(
            filters=12,
            kernel_size=10,
            strides=(1,1),
            padding='same',
            kernel_divergence_fn=kl_divergence_function,
            activation='relu',
            name='conv_1_medium')(self.input_1)
        self.conv_1_long = tfp.layers.Convolution2DFlipout(
            filters=12,
            kernel_size=15,
            strides=(1,1),
            padding='same',
            kernel_divergence_fn=kl_divergence_function,
            activation='relu',
            name='conv_1_long')(self.input_1)
        
        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers)

        self.conv_2 = tfp.layers.Convolution2DFlipout(
            filters=10,
            kernel_size=5,
            strides=(1,1),
            padding='valid',
            kernel_divergence_fn=kl_divergence_function,
            activation='relu',
            name='conv_2')(merged_sublayers)
        self.conv_3 = tfp.layers.Convolution2DFlipout(
            filters=10,
            kernel_size=5,
            strides=(1,1),
            padding='valid',
            kernel_divergence_fn=kl_divergence_function,
            activation='relu',
            name="conv_3")(self.conv_2)
        self.average_pool_1 = layers.AveragePooling2D(
            name='average_pool_1')(self.conv_3)
        
        self.flatten_1 = layers.Flatten(name='flatten1')(self.average_pool_1)
        #self.drop_1 = layers.Dropout(rate=0.2,
        #                             name='drop_1')(self.flatten_1)
        self.dense_1 = tfp.layers.DenseFlipout(
            units=4000,
            kernel_divergence_fn=kl_divergence_function,
            activation='relu',
            name='dense_1')(self.flatten_1)
                           
        self.dense_2 = tfp.layers.DenseFlipout(
            units=num_classes,
            kernel_divergence_fn=kl_divergence_function,
            activation=None,#"sigmoid",
            name='dense_2')(self.dense_1)
        
        #self.output_norm = layers.Lambda(
        #    lambda x: x/tf.reshape(K.sum(x, axis=-1),(-1,1)),
        #    name = 'output_normalization')(self.dense_2)

        no_of_inputs = len(sublayers)

        super(ProbabilisticCNN2D, self).__init__(
            inputs=self.input_1,
            outputs=self.dense_2,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='ProbabilisticCNN2D')
      

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K

class ProbabilisticCNNNoAct2D(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    This is to be used for regression on all labels. -> sigmoid 
    activation in the last layer.
    """
    def __init__(self, 
                 inputshape,
                 num_classes,
                 kl_divergence_function,
                 ):
        self.input_1 = layers.Input(shape = inputshape)
                
        self.conv_1_short = tfp.layers.Convolution2DFlipout(
            filters=12,
            kernel_size=5,
            strides=(1,1),
            padding='same',
            kernel_divergence_fn=kl_divergence_function,
            activation='relu',
            name='conv_1_short')(self.input_1)
        self.conv_1_medium = tfp.layers.Convolution2DFlipout(
            filters=12,
            kernel_size=10,
            strides=(1,1),
            padding='same',
            kernel_divergence_fn=kl_divergence_function,
            activation='relu',
            name='conv_1_medium')(self.input_1)
        self.conv_1_long = tfp.layers.Convolution2DFlipout(
            filters=12,
            kernel_size=15,
            strides=(1,1),
            padding='same',
            kernel_divergence_fn=kl_divergence_function,
            activation='relu',
            name='conv_1_long')(self.input_1)
        
        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers)

        self.conv_2 = tfp.layers.Convolution2DFlipout(
            filters=10,
            kernel_size=5,
            strides=(1,1),
            padding='valid',
            kernel_divergence_fn=kl_divergence_function,
            activation='relu',
            name='conv_2')(merged_sublayers)
        self.conv_3 = tfp.layers.Convolution2DFlipout(
            filters=10,
            kernel_size=5,
            strides=(1,1),
            padding='valid',
            kernel_divergence_fn=kl_divergence_function,
            activation='relu',
            name="conv_3")(self.conv_2)
        self.average_pool_1 = layers.AveragePooling2D(
            name='average_pool_1')(self.conv_3)
        
        self.flatten_1 = layers.Flatten(name='flatten1')(self.average_pool_1)
        #self.drop_1 = layers.Dropout(rate=0.2,
        #                             name='drop_1')(self.flatten_1)
        self.dense_1 = tfp.layers.DenseFlipout(
            units=4000,
            kernel_divergence_fn=kl_divergence_function,
            activation='relu',
            name='dense_1')(self.flatten_1)
                           
        self.dense_2 = tfp.layers.DenseFlipout(
            units=num_classes,
            kernel_divergence_fn=kl_divergence_function,
            activation=None, # no activation in output layer
            name='dense_2')(self.dense_1)
        
        #self.output_norm = layers.Lambda(
        #    lambda x: x/tf.reshape(K.sum(x, axis=-1),(-1,1)),
        #    name = 'output_normalization')(self.dense_2)

        no_of_inputs = len(sublayers)

        super(ProbabilisticCNNNoAct2D, self).__init__(
            inputs=self.input_1,
            outputs=self.dense_2,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='ProbabilisticCNNNoAct2D')

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K
from tensorflow_probability.python.layers import util as tfp_layers_util

class BayesianCNN(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    This is to be used for regression on all labels. -> sigmoid 
    activation in the last layer.
    """
    def __init__(
        self, 
        inputshape,
        num_classes,
        kl_divergence_fn,
        kernel_prior_fn=tfp.layers.default_multivariate_normal_fn,
        kernel_posterior_fn=tfp_layers_util.default_mean_field_normal_fn(),
        ):
        if len(inputshape) == 2:
            conv_layer = tfp.layers.Convolution1DFlipout
            strides = 1
            average_pool_layer = layers.AveragePooling1D

        elif len(inputshape) == 3:
            conv_layer = tfp.layers.Convolution2DFlipout
            strides = (1,1)
            average_pool_layer =  layers.AveragePooling2D

        self.input_1 = layers.Input(shape = inputshape)
                
        self.conv_1_short = conv_layer(
            filters=12,
            kernel_size=5,
            strides=strides,
            padding='same',
            kernel_prior_fn=kernel_prior_fn,
            kernel_posterior_fn=kernel_posterior_fn,
            kernel_divergence_fn=kl_divergence_fn,
            activation='relu',
            name='conv_1_short')(self.input_1)
        self.conv_1_medium = conv_layer(
            filters=12,
            kernel_size=10,
            strides=strides,
            padding='same',
            kernel_prior_fn=kernel_prior_fn,
            kernel_posterior_fn=kernel_posterior_fn,
            kernel_divergence_fn=kl_divergence_fn,
            activation='relu',
            name='conv_1_medium')(self.input_1)
        self.conv_1_long = conv_layer(
            filters=12,
            kernel_size=15,
            strides=strides,
            padding='same',
            kernel_prior_fn=kernel_prior_fn,
            kernel_posterior_fn=kernel_posterior_fn,
            kernel_divergence_fn=kl_divergence_fn,
            activation='relu',
            name='conv_1_long')(self.input_1)
        
        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers)

        self.conv_2 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            kernel_prior_fn=kernel_prior_fn,
            kernel_posterior_fn=kernel_posterior_fn,            
            kernel_divergence_fn=kl_divergence_fn,
            activation='relu',
            name='conv_2')(merged_sublayers)
        self.conv_3 = conv_layer(
            filters=10,
            kernel_size=5,
            strides=strides,
            padding='valid',
            kernel_prior_fn=kernel_prior_fn,
            kernel_posterior_fn=kernel_posterior_fn,            
            kernel_divergence_fn=kl_divergence_fn,
            activation='relu',
            name="conv_3")(self.conv_2)
        self.average_pool_1 = average_pool_layer(
            name='average_pool_1')(self.conv_3)
        
        self.flatten_1 = layers.Flatten(name='flatten1')(self.average_pool_1)
        self.drop_1 = layers.Dropout(rate=0.2,
                                     name='drop_1')(self.flatten_1)
        self.dense_1 = tfp.layers.DenseFlipout(
            units=4000,
            kernel_divergence_fn=kl_divergence_fn,
            activation='relu',
            name='dense_1')(self.drop_1)
                           
        self.dense_2 = tfp.layers.DenseFlipout(
            units=num_classes,
            kernel_divergence_fn=kl_divergence_fn,
            activation="softmax", # no activation in output layer
            name='dense_2')(self.dense_1)
        #self.outputs = tfp.layers.IndependentNormal(
        #    event_shape=num_classes,
        #    name = "output_dist")(self.dense_2)

        #self.output_norm = layers.Lambda(
        #    lambda x: x/tf.reshape(K.sum(x, axis=-1),(-1,1)),
        #    name = 'output_normalization')(self.dense_2)

        no_of_inputs = len(sublayers)

        super(BayesianCNN, self).__init__(
            inputs=self.input_1,
            outputs=self.dense_2,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='BayesianCNN')

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K
from tensorflow_probability.python.layers import util as tfp_layers_util

class BayesianCNN2(models.EmptyModel):
    def __init__(
        self, 
        inputshape,
        num_classes,
        kl_divergence_fn,
        ):
        if len(inputshape) == 2:
            conv_layer = tfp.layers.Convolution1DFlipout
            global_avg_pool_layer = layers.GlobalAveragePooling1D

        elif len(inputshape) == 3:
            conv_layer = tfp.layers.Convolution2DFlipout
            average_pool_layer =  layers.GlobalAveragePooling2D      
      
        self.input_1 = layers.Input(shape = inputshape)
                
        self.conv_1 = conv_layer(
            filters=12,
            kernel_size=3,
            activation = "relu",
            kernel_divergence_fn = kernel_divergence_fn,
            name="conv_1")(self.input_1)

        self.conv_2 = conv_layer(
            filters=24,
            kernel_size=5,
            activation = "relu",
            kernel_divergence_fn=kl_divergence_fn,
            name='conv_2')(self.conv_1)

        self.conv_3 = conv_layer(
            filters=48,
            kernel_size=7,
            activation = "relu",
            kernel_divergence_fn=kl_divergence_fn,
            name='conv_3')(self.conv_1)    

        self.global_avg_pool_1 = average_pool_layer(
            name="global_avg_pool_1"
        )(self.conv_3)  

        self.dense_1 = tfp.layers.DenseFlipout(
            units=48,
            activation = "relu",
            kernel_divergence_fn=kl_divergence_fn,
            name='dense_1')(self.global_avg_pool_1)  
        self.dense_2 = tfp.layers.DenseFlipout(
            units=num_classes,
            activation = "softmax",
            kernel_divergence_fn=kl_divergence_fn,
            name='dense_2')(self.dense_1)              
        #self.outputs = tfp.layers.OneHotCategorical(
        #    event_size=num_classes,
        #    name="output_dist")(self.dense_2)

        no_of_inputs = 1

        super(BayesianCNN2, self).__init__(
            inputs=self.input_1,
            outputs=self.dense_2,
            inputshape=inputshape,
            num_classes=num_classes,
            no_of_inputs=no_of_inputs,
            name='BayesianCNN2')

#### Design prior, posterior and KL divergence function

In [None]:
kl_divergence_fn = (
    lambda q, p, _: (tfp.distributions.kl_divergence(q, p)) /
    tf.cast(clf.datahandler.X_train.shape[0], dtype=tf.float32)
    )

# =============================================================================
# #kernel_prior_fn=tfp.layers.default_multivariate_normal_fn,
# #kernel_posterior_fn=tfp_layers_util.default_mean_field_normal_fn(),
# 
# # Define the prior weight distribution as Normal of mean=0 and stddev=1.
# # Note that, in this example, the we prior distribution is not trainable,
# # as we fix its parameters.
# def kernel_prior_fn(kernel_size, bias_size, dtype=None):
#     n = kernel_size + bias_size
#     prior_model = keras.Sequential(
#         [
#             tfp.layers.DistributionLambda(
#                 lambda t: tfp.distributions.MultivariateNormalDiag(
#                     loc=tf.zeros(n), scale_diag=tf.ones(n)
#                 )
#             )
#         ]
#     )
#     return prior_model
# 
# 
# # Define variational posterior weight distribution as multivariate Gaussian.
# # Note that the learnable parameters for this distribution are the means,
# # variances, and covariances.
# def kernel_posterior_fn(kernel_size, bias_size, dtype=None):
#     n = kernel_size + bias_size
#     posterior_model = keras.Sequential(
#         [
#             tfp.layers.VariableLayer(
#                 tfp.layers.MultivariateNormalTriL.params_size(n), dtype=dtype
#             ),
#             tfp.layers.MultivariateNormalTriL(n),
#         ]
#     )
#     return posterior_model
# =============================================================================

#### Build the model

In [None]:
clf.model = BayesianCNN(
    inputshape=clf.datahandler.input_shape,
    num_classes=clf.datahandler.num_classes,
    kl_divergence_fn=kl_divergence_fn,
    task=clf.task)
#    kernel_prior_fn=kernel_posterior_fn,
#    kernel_posterior_fn=kernel_posterior_fn)

# =============================================================================
# clf.model = ProbabilisticCNN(clf.datahandler.input_shape,
#                              clf.datahandler.num_classes,
#                              kl_divergence_fn)
# 
# clf.model = ProbabilisticClassificationCNN2D(clf.datahandler.input_shape,
#                                              clf.datahandler.num_classes,
#                                              kl_divergence_fn)
# clf.model = ProbabilisticClassificationMLP(
#     inputshape=clf.datahandler.input_shape,
#     num_classes=clf.datahandler.num_classes,
#     kl_divergence_fn=kl_divergence_fn)
# 
# clf.model = ProbabilisticRegressionMLP(
#     inputshape=clf.datahandler.input_shape,
#     num_classes=clf.datahandler.num_classes,
#     kl_divergence_fn=kl_divergence_fn)
# 
# clf.model = RegressionMLP(
#     inputshape=clf.datahandler.input_shape,
#     num_classes=clf.datahandler.num_classes)
# =============================================================================

# Alternative: Build model from available models in models.py
# =============================================================================
#clf.model = models.RegressionCNN(clf.datahandler.input_shape, 
#                                 clf.datahandler.num_classes)
# =============================================================================
# =============================================================================
# models.clf.model = ClassificationCNN(clf.datahandler.input_shape,
#                              clf.datahandler.num_classes)
# =============================================================================

# =============================================================================
#clf.model = models.ClassificationCNN2D(clf.datahandler.input_shape,
#                                       clf.datahandler.num_classes)
# =============================================================================

# =============================================================================
# clf.model = models.ProbabilisticClassificationCNN2D(
#     clf.datahandler.input_shape,
#     clf.datahandler.num_classes,
#     kl_divergence_fn,
#     bias_divergence_fn)
# =============================================================================

# =============================================================================
# clf.model = models.ResNet1D(clf.datahandler.input_shape,
#                             clf.datahandler.num_classes,
#                            ap=True)
# =============================================================================
# =============================================================================
# clf.model = models.ResNet1D(clf.datahandler.input_shape,
#                             clf.datahandler.num_classes,
#                             ap=True)
# =============================================================================


#### Design loss 

In [None]:
# =============================================================================
# def _neg_log_likelihood_bayesian(y_true, y_pred):
#     labels_distribution = tfp.distributions.OneHotCategorical(logits=y_pred)
#     return -tf.reduce_mean(labels_distribution.log_prob(y_true))
# =============================================================================

### Compile and summarize the model

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanAbsoluteError, MeanSquaredError
from tensorflow.keras.losses import CategoricalCrossentropy

learning_rate = 1e-04
optimizer = Adam(learning_rate = learning_rate) 

if clf.task == "regression":
    mae = MeanAbsoluteError()
    mse = MeanSquaredError()
    clf.model.compile(loss=mse,
                      optimizer=optimizer,
                      metrics=["mse"])
    # =============================================================================
    # mse = MeanSquaredError()
    # clf.model.compile(loss = mse, optimizer = optimizer)
    # =============================================================================
    
elif clf.task == "classification":
    categorical_crossentropy = CategoricalCrossentropy()
    clf.model.compile(loss=categorical_crossentropy,
                      optimizer=optimizer,
                      metrics=["accuracy",
                               "categorical_crossentropy"])

# Plot summary and save model plot.
clf.summary()
clf.save_and_print_model_image()

### Show initial weight distributions

In [None]:
clf.plot_weight_distribution(kind="prior", to_file=True)
clf.plot_weight_distribution(kind="posterior", to_file=True)

### Show initial predictions

In [None]:
pred_train_initial, pred_test_initial = clf.predict()

print('Train:')
for i in range(5):
    print('real: ' + str(np.round(y_train[i],3)),
          'pred: ' + str(pred_train_initial[i]))
print('Test:')
for i in range(5):
    print('real: ' + str(np.round(y_test[i],3)),
          'pred: ' + str(pred_test_initial[i]))

In [None]:
no_of_predictions = 100

prob_pred_test_initial = clf.predict_probabilistic(
    dataset="test",
    no_of_predictions=no_of_predictions
)

for i, pred in enumerate(prob_pred_test_initial[:10]):
   print(f"Ground truth: {np.round(clf.datahandler.y_test[i],3)},",
         f"Mean prediction: {np.mean(pred, axis = 0)} +/- {np.std(pred, axis = 0)}")

### Train

In [None]:
epochs = 1000
batch_size = 32
validation_freq = 1

hist = clf.train(checkpoint=True,
                 early_stopping=False,
                 tb_log=True, 
                 csv_log=True,
                 hyperparam_log=True,
                 #cb_parameters={"es_patience":15,},
                 epochs=epochs, 
                 batch_size=batch_size,
                 validation_freq=validation_freq,
                 verbose=2)

sound = False
if sound:
    from google.colab import output
    output.eval_js('new Audio("http://soundbible.com/grab.php?id=1795&type=mp3").play()')

### Plot loss

In [None]:
graph = clfutils.TrainingGraphs(clf.logging.history, clf.logging.fig_dir)
graph.plot_loss(to_file=True)
if clf.task == "classification":
    graph.plot_accuracy(to_file=True)
    graph.plot_metric("categorical_crossentropy",
                      to_file=True)
if clf.task == "regression":
    graph.plot_mse(to_file = True)


### Evaluate on test data

In [None]:
if clf.task == 'classification':
    score = clf.evaluate()
    test_loss, test_accuracy = score[0], score[1]
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))
    print('Test accuracy: ' + str(np.round(test_accuracy, decimals=3)))
elif clf.task == 'regression':
    test_loss = clf.evaluate()
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))

###  Predict on train and test data

In [None]:
pred_train, pred_test = clf.predict()
if clf.task == 'classification':
    pred_train_classes, pred_test_classes = clf.predict_classes()

print('Train:')
for i in range(5):
    print('real: ' + str(np.round(y_train[i],3)),
          'pred: ' + str(pred_train[i]))
print('Test:')
for i in range(5):
    print('real: ' + str(np.round(y_test[i],3)),
          'pred: ' + str(pred_test[i]))

In [None]:
no_of_predictions = 100

prob_pred_test = clf.predict_probabilistic(
    dataset="test",
    no_of_predictions=no_of_predictions
)

for i, pred in enumerate(prob_pred_test[:10]):
   print(f"Ground truth: {np.round(clf.datahandler.y_test[i],3)},",
         f"Mean prediction: {np.mean(pred, axis = 0)} +/- {np.std(pred, axis = 0)}")

### Show some predictions

#### 10 random training samples

In [None]:
clf.plot_random(no_of_spectra=10, dataset='train', with_prediction=True)  

#### 10 random test samples

In [None]:
clf.plot_random(no_of_spectra=10, dataset='test', with_prediction=True)    

### Show wrong/worst predictions

In [None]:
if clf.task == 'classification':
    clf.show_wrong_classification()
elif clf.task == 'regression':
    clf.show_worst_predictions(no_of_spectra=20)  

### Show posterior weight distribution after training update

In [None]:
clf.plot_weight_distribution(kind="prior", to_file=True)
clf.plot_weight_distribution(kind="posterior", to_file=True)

### Show distribution of probabilistic predictions

In [None]:
clf.plot_prob_predictions(dataset="test",
                          no_of_spectra=10,
                          to_file=True)

### Save model and results

In [None]:
#clf.save_model()
clf.pickle_results()

### Generate report

In [None]:
dir_name = clf.time + '_' + clf.exp_name
rep = clfutils.Report(dir_name)  
rep.write()

## Continue training

### Load custom modules

In [None]:
try:
    import importlib
    importlib.reload(classifier)
    importlib.reload(clfutils)
    print("\n Modules were reloaded.")
except:
    import xpsdeeplearning.network.classifier as classifier
    import xpsdeeplearning.network.utils as clfutils
    print("Modules were loaded.")

### Reload classifier from previous run

In [None]:
runpath = r"/content/drive/My Drive/deepxps/runs/20210727_12h29m_MNIST_bayesian_classification"
clf = classifier.restore_clf_from_logs(runpath)

### Load and inspect the data

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test,\
    aug_values_train, aug_values_val, aug_values_test =\
        clf.load_data_preprocess(input_filepath=clf.logging.hyperparams['input_filepath'],
                                 no_of_examples=clf.logging.hyperparams['no_of_examples'],
                                 train_test_split=clf.logging.hyperparams['train_test_split'],
                                 train_val_split=clf.logging.hyperparams['train_val_split'])
                
# Check how the examples are distributed across the classes.
class_distribution = clf.datahandler.check_class_distribution(clf.task)
clf.plot_class_distribution()
clf.plot_random(no_of_spectra = 10, dataset = 'train')  

In [None]:
### Only use classification data
if clf.task == "classification":
    indices_train = np.where(clf.datahandler.y_train == 0.0)[0]
    indices_val = np.where(clf.datahandler.y_val == 0.0)[0]
    indices_test= np.where(clf.datahandler.y_test == 0.0)[0]

    X_train, y_train = clf.datahandler.X_train[indices_train], clf.datahandler.y_train[indices_train]
    X_val, y_val = clf.datahandler.X_val[indices_val], clf.datahandler.y_val[indices_val]
    X_test, y_test = clf.datahandler.X_test[indices_test], clf.datahandler.y_test[indices_test]

    clf.datahandler.X_train, clf.datahandler.y_train = X_train, y_train
    clf.datahandler.X_val, clf.datahandler.y_val = X_val, y_val 
    clf.datahandler.X_test, clf.datahandler.y_test = X_test, y_test

    #clf.datahandler.y_train[0:5]
    #clf.datahandler.y_val[0:5]
    #clf.datahandler.y_test[0:5]

    num_train = 50#0
    num_val = 10#0
    num_test = 10#0

    clf.datahandler.X_train, clf.datahandler.y_train = clf.datahandler.X_train[0:num_train], clf.datahandler.y_train[0:num_train]
    clf.datahandler.X_val, clf.datahandler.y_val = clf.datahandler.X_train[0:num_val], clf.datahandler.y_train[0:num_val]
    clf.datahandler.X_test, clf.datahandler.y_test = clf.datahandler.X_train[0:num_test], clf.datahandler.y_train[0:num_test]
    clf.plot_random(no_of_spectra = 10, dataset = 'train')  

    print(f"Remaining no. of training examples: {clf.datahandler.y_train.shape[0]}")
    print(f"Remaining no. of val examples: {clf.datahandler.y_val.shape[0]}")
    print(f"Remaining no. of test examples: {clf.datahandler.y_test.shape[0]}")

elif clf.task == "regression":
    print("Dataset was not changed.")

### Load the model

In [None]:
### Currently not working, does not load prior/posterior distributions ####
# clf.load_model(compile_model = True)
### Come back later to check on this ###

In [None]:
### Current alternative ### 
# Use the same model defined above and load the weights independently
# Need to run the cell with the definition of the model class above once
model_class = BayesianCNN # CHANGE HERE

kl_divergence_fn = (
    lambda q, p, _: (tfp.distributions.kl_divergence(q, p)) /
    tf.cast(clf.datahandler.X_train.shape[0], dtype=tf.float32)
    )

clf.model = model_class(clf.datahandler.input_shape,
                        clf.datahandler.num_classes,
                        kl_divergence_fn)

# LOAD WEIGHTS
weights_file = os.path.join(clf.logging.model_dir,
                            "weights.h5")
clf.model.load_weights(weights_file)

# Compile and summarize the model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanAbsoluteError, MeanSquaredError
from tensorflow.keras.losses import CategoricalCrossentropy

learning_rate = 1e-04
optimizer = Adam(learning_rate = learning_rate) 

if clf.task == "regression":
    mae = MeanAbsoluteError()
    mse = MeanSquaredError()
    clf.model.compile(loss=mse,
                      optimizer=optimizer,
                      metrics=["mse"])
    # =============================================================================
    # mse = MeanSquaredError()
    # clf.model.compile(loss = mse, optimizer = optimizer)
    # =============================================================================
    
elif clf.task == "classification":
    categorical_crossentropy = CategoricalCrossentropy()
    clf.model.compile(loss=categorical_crossentropy,
                      optimizer=optimizer,
                      metrics=["accuracy",
                               "categorical_crossentropy"])

### Summarize the model

In [None]:
# Plot summary and save model plot.
clf.summary()
clf.save_and_print_model_image()

### Show current weight distributions

In [None]:
clf.plot_weight_distribution(kind="prior", to_file=True)
clf.plot_weight_distribution(kind="posterior", to_file=True)

### Show predictions with current model

In [None]:
pred_train_intermediate, pred_test_intermediate = clf.predict()

print('Train:')
for i in range(5):
    print('real: ' + str(np.round(y_train[i],3)),
          'pred: ' + str(pred_train_intermediate[i]))
print('Test:')
for i in range(5):
    print('real: ' + str(np.round(y_test[i],3)),
          'pred: ' + str(pred_test_intermediate[i]))

In [None]:
no_of_predictions = 100

prob_pred_test_intermediate = clf.predict_probabilistic(
    dataset="test",
    no_of_predictions=no_of_predictions
)

for i, pred in enumerate(prob_pred_test_intermediate[:10]):
   print(f"Ground truth: {np.round(clf.datahandler.y_test[i],3)},",
         f"Mean prediction: {np.mean(pred, axis = 0)} +/- {np.std(pred, axis = 0)}")

### Train

In [None]:
epochs = 1000

new_learning_rate = 1e-03
validation_freq = 100

hist = clf.train(checkpoint=True,
                 early_stopping=False,
                 tb_log=True, 
                 csv_log=True,
                 hyperparam_log=True,
                 epochs=epochs, 
                 batch_size=clf.logging.hyperparams['batch_size'],
                 validation_freq=validation_freq,
                 verbose=2,)
                 #new_learning_rate=new_learning_rate)

### Plot loss

In [None]:
graph = clfutils.TrainingGraphs(clf.logging.history, clf.logging.fig_dir)
graph.plot_loss(to_file = True)
if clf.task == "classification":
    graph.plot_accuracy(to_file = False)
graph.plot_mse(to_file = True)

### Evaluate on test data

In [None]:
if clf.task == 'classification':
    score = clf.evaluate()
    test_loss, test_accuracy = score[0], score[1]
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))
    print('Test accuracy: ' + str(np.round(test_accuracy, decimals=3)))
elif clf.task == 'regression':
    test_loss = clf.evaluate()
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))

###  Predict on train and test data

In [None]:
pred_train, pred_test = clf.predict()
if clf.task == 'classification':
    pred_train_classes, pred_test_classes = clf.predict_classes()

print('Train:')
for i in range(5):
    print('real: ' + str(np.round(y_train[i],3)),
          'pred: ' + str(pred_train[i]))
print('Test:')
for i in range(5):
    print('real: ' + str(np.round(y_test[i],3)),
          'pred: ' + str(pred_test[i]))

In [None]:
start = 0
stop = 5
no_of_predictions = 100

prob_pred_test = clf.predict_probabilistic(
    dataset="test",
    no_of_predictions=no_of_predictions
)

for i, pred in enumerate(prob_pred_test[:10]):
   print(f"Ground truth: {np.round(clf.datahandler.y_test[i],3)},",
         f"Mean prediction: {np.mean(pred, axis = 0)} +/- {np.std(pred, axis = 0)}")

### Show some predictions

#### 10 random training samples

In [None]:
clf.plot_random(no_of_spectra=10, dataset='train', with_prediction=True)  

#### 10 random test samples

In [None]:
clf.plot_random(no_of_spectra=10, dataset='test', with_prediction=True)    

### Show wrong/worst predictions

In [None]:
if clf.task == 'classification':
    clf.show_wrong_classification()
elif clf.task == 'regression':
    clf.show_worst_predictions(no_of_spectra=20)  

### Show posterior weight distribution after training update

In [None]:
clf.plot_weight_distribution(kind="posterior", to_file=True)

### Show distribution of probabilistic predictions

In [None]:
clf.plot_prob_predictions(dataset="test",
                          no_of_spectra=20,
                          to_file=True)

### Save model and data

In [None]:
#clf.save_model()
clf.pickle_results()

### Generate report

In [None]:
dir_name = clf.time + '_' + clf.exp_name
rep = clfutils.Report(dir_name)  
rep.write()

## Prepare website upload

In [None]:
from xpsdeeplearning.network.prepare_upload import Uploader

dataset_path = clf.logging.hyperparams["input_filepath"].rsplit(".",1)[0] + "_metadata.json"
uploader = Uploader(clf.logging.root_dir, dataset_path)
uploader.prepare_upload_params()
uploader.save_upload_params()

## Save output of notebook

In [None]:
from IPython.display import Javascript, display
from nbconvert import HTMLExporter

def save_notebook():
    display(Javascript("IPython.notebook.save_notebook()"),
            include=['application/javascript'])

def output_HTML(read_file, output_file):
    import codecs
    import nbformat
    exporter = HTMLExporter()
    # read_file is '.ipynb', output_file is '.html'
    output_notebook = nbformat.read(read_file, as_version=4)
    output, resources = exporter.from_notebook_node(output_notebook)
    codecs.open(output_file, 'w', encoding='utf-8').write(output)

import time
import os

time.sleep(20)
save_notebook()
print('Notebook saved!')
time.sleep(30)
current_file = '/content/drive/My Drive/deepxps/xpsdeeplearning/train_prob.ipynb'
output_file = os.path.join(clf.logging.log_dir,'train_prob_out.html')
output_HTML(current_file, output_file)
print('HTML file saved!')