## Mount the drive

In [1]:
# mount the drive

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Removing previous trained models

In [2]:
import os

dir_name = '/content/drive/MyDrive/MasterThesis/CaliforniaHousingDatasetTests/GBP_explanation/deep_ensemble/'
test = os.listdir(dir_name)

for item in test:
    if item.endswith('.h5'):
        print(f'deleting {item}')
        os.remove(os.path.join(dir_name, item))

## Imports

In [3]:
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt 
import tensorflow as tf

tf.compat.v1.disable_eager_execution()

## Installing keras uncertainty

In [4]:
# keras_uncertainty imports 
# clone and install this library 

!git clone https://github.com/mvaldenegro/keras-uncertainty.git
!pip install --user git+https://github.com/mvaldenegro/keras-uncertainty.git

%cd keras-uncertainty

fatal: destination path 'keras-uncertainty' already exists and is not an empty directory.
Collecting git+https://github.com/mvaldenegro/keras-uncertainty.git
  Cloning https://github.com/mvaldenegro/keras-uncertainty.git to /tmp/pip-req-build-cypgv_xh
  Running command git clone -q https://github.com/mvaldenegro/keras-uncertainty.git /tmp/pip-req-build-cypgv_xh
/content/keras-uncertainty


## Keras uncertainty specific imports

In [5]:
import numpy as np 
import tensorflow as tf 
from tensorflow.keras.models import load_model
import random
import pandas as pd 
import os 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import math 
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input

import keras_uncertainty
#from keras_uncertainty.models import StochasticRegressor, TwoHeadStochasticRegressor
#from keras_uncertainty.models.DeepEnsembleClassifier import DeepEnsemble

#from keras_uncertainty.layers import DropConnectDense, VariationalDense, FlipoutDense, StochasticDropout
from keras_uncertainty.metrics import gaussian_interval_score
from keras_uncertainty.losses import regression_gaussian_nll_loss, regression_gaussian_beta_nll_loss
import matplotlib.pyplot as plt

np.set_printoptions(suppress=True) 

import tensorflow as tf
#tf.compat.v1.disable_eager_execution()

# 28022022 the code works without disabling the eager execution (dont know why) 

# if eager execution is not disabled following error occurs:
# TypeError: You are passing KerasTensor(type_spec=TensorSpec(shape=(), 
#dtype=tf.float32, name=None), name='Placeholder:0', description="created 
#by layer 'tf.cast_4'"), an intermediate Keras symbolic input/output, 
#to a TF API that does not allow registering custom dispatchers, 
#such as `tf.cond`, `tf.function`, gradient tapes, or `tf.map_fn`. 
#Keras Functional model construction only supports TF API calls that 
#*do* support dispatching, such as `tf.math.add` or `tf.reshape`. 
#Other APIs cannot be called directly on symbolic Kerasinputs/outputs. 
#You can work around this limitation by putting the operation in a custom 
#Keras layer `call` and calling that layer on this symbolic input/output.

Keras Uncertainty will use standalone Keras backend

## Load data

In [6]:
# load the california housing data from csv
train_file = '/content/sample_data/california_housing_train.csv'
test_file = '/content/sample_data/california_housing_test.csv'

train_combined = pd.read_csv(train_file)
test = pd.read_csv(test_file)

# split the data in validation and test (from test.csv)
train, val = train_test_split(train_combined, test_size=0.25)

feature_names = list(train_combined.columns)
print(feature_names)

# assign the target variable
target = 'median_house_value'

# extract the target label in all sets
train_labels_df= train[target]
val_labels_df = val[target]
test_labels_df = test[target]

# extract the data from all sets 
train_data_df = train.drop(columns=target, axis=1)
val_data_df = val.drop(columns=target, axis=1)
test_data_df = test.drop(columns=target, axis=1)

train_data_unnormalized = train_data_df.to_numpy()
train_labels_unnormalized = train_labels_df.to_numpy()

val_data_unnormalized = val_data_df.to_numpy()
val_labels_unnormalized = val_labels_df.to_numpy()

test_data_unnormalized = test_data_df.to_numpy()
test_labels_unnormalized = test_labels_df.to_numpy()

# normalize the data using minmax 
minmax = MinMaxScaler() 

train_data = minmax.fit_transform(train_data_unnormalized)
train_label_temp = np.expand_dims(train_labels_unnormalized, axis=1)
train_labels = minmax.fit_transform(train_label_temp)

val_data = minmax.fit_transform(val_data_unnormalized)
val_label_temp = np.expand_dims(val_labels_unnormalized, axis=1)
val_labels = minmax.fit_transform(val_label_temp)

test_data = minmax.fit_transform(test_data_unnormalized)
test_label_temp = np.expand_dims(test_labels_unnormalized, axis=1)
test_labels = minmax.fit_transform(test_label_temp)


print('Training data shape \n', train_data.shape)
print('Training labels shape \n', train_labels.shape)
#print('Training data \n ', train_data)
#print('Training labels \n ', train_labels)

print('Validation data shape \n ',val_data.shape)
print('Validation labels shape \n ', val_labels.shape)
#print('Validation data \n ', val_data)
#print('Validation labels \n ', val_labels)

print('Test data shape \n ', test_data.shape)
print('Test labels shape \n ', test_labels.shape)
#print('Test data \n ', test_data)
#print('Test labels \n ', test_labels)# load the california housing data from csv

['longitude', 'latitude', 'housing_median_age', 'total_rooms', 'total_bedrooms', 'population', 'households', 'median_income', 'median_house_value']
Training data shape 
 (12750, 8)
Training labels shape 
 (12750, 1)
Validation data shape 
  (4250, 8)
Validation labels shape 
  (4250, 1)
Test data shape 
  (3000, 8)
Test labels shape 
  (3000, 1)


## DeepEnsemble and DeepEnsembleRegressor

In [7]:
import numpy as np

import os
import yaml
import keras_uncertainty.backend as K
from pydoc import locate

class AdversarialExampleGenerator:
    pass

METADATA_FILENAME = "metadata.yml"

class DeepEnsemble:
    def __init__(self, model_fn=None, num_estimators=None, models=None, needs_test_estimators=False):
        self.needs_test_estimators = needs_test_estimators

        if models is None:
            assert model_fn is not None and num_estimators is not None
            assert num_estimators > 0
            
            self.num_estimators = num_estimators
            self.train_estimators = [None] * num_estimators 
            self.test_estimators = [None] * num_estimators

            for i in range(self.num_estimators):
                if self.needs_test_estimators:
                    estimators = model_fn()

                    if type(estimators) is not tuple:
                        raise ValueError("model_fn should return a tuple")

                    if len(estimators) != 2:
                        raise ValueError("model_fn returned a tuple of unexpected size ({} vs 2)".format(len(estimators)))

                    train_est, test_est = estimators
                    self.train_estimators[i] = train_est
                    self.test_estimators[i] = test_est
                else:
                    est = model_fn()
                    self.train_estimators[i] = est
                    self.test_estimators[i] = est

        else:
            assert model_fn is None and num_estimators is None

            self.train_estimators = models
            self.test_estimators = models

            self.num_estimators = len(models)

    def save(self, folder, filename_pattern="model-ensemble-{}.hdf5"):
        """
            Save a Deep Ensemble into a folder, using individual HDF5 files for each ensemble member.
            This allows for easily loading individual ensembles. Metadata is saved to allow loading of the whole ensemble.
        """

        if not os.path.exists(folder):
            os.makedirs(folder)

        model_metadata = {}

        for i in range(self.num_estimators):
            filename = os.path.join(folder, filename_pattern.format(i))
            self.test_estimators[i].save(filename)

            print("Saved estimator {} to {}".format(i, filename))

            model_metadata[i] = filename_pattern.format(i)

        metadata = {"models": model_metadata, "class": self.__module__}

        with open(os.path.join(folder, METADATA_FILENAME), 'w') as outfile:
            yaml.dump(metadata, outfile)
            

    @staticmethod
    def load(folder):
        """
            Load a Deep Ensemble model from a folder containing individual HDF5 files.
        """
        metadata = {}

        with open(os.path.join(folder, METADATA_FILENAME)) as infile:
            metadata = yaml.full_load(infile)

        models = []

        for _, filename in metadata["models"].items():
            models.append(keras.models.load_model(os.path.join(folder, filename)))

        clazz = locate(metadata["class"])

        return clazz(models=models)  

class DeepEnsembleRegressor(DeepEnsemble):
    """
        Implementation of a Deep Ensemble for regression.
        Uses two models, one for training and another for inference/testing. The user has to provide a model function that returns
        the train and test models, and use the provided deep_ensemble_nll_loss for training.
    """
    def __init__(self, model_fn=None, num_estimators=None, models=None):
        """
            Builds a Deep Ensemble given a function to make model instances, and the number of estimators.
            For training it uses a model that only outputs the mean, while the loss uses both the mean and variance produced by the model.
            For testing, a model that shares weights with the training model is used, but the testing model outputs both mean and variance. The final
            prediction is made with a mixture of gaussians, where each gaussian is one trained model instance.
        """
        super().__init__(model_fn=model_fn, num_estimators=num_estimators, models=models,
                         needs_test_estimators=True)

    def fit(self, X, y, epochs=10, batch_size=32, **kwargs):
        """
            Fits the Deep Ensemble, each estimator is fit independently on the same data.
        """

        for i in range(self.num_estimators):
            self.train_estimators[i].fit(X, y, epochs=epochs, batch_size=batch_size, **kwargs)
    
    def fit_generator(self, generator, epochs=10, **kwargs):
        """
            Fits the Deep Ensemble, each estimator is fit independently on the same data.
        """

        for i in range(self.num_estimators):
            self.train_estimators[i].fit_generator(generator, epochs=epochs, **kwargs)
            

    def predict_output(self, X, batch_size=32, output_scaler=None, num_ensembles=None, disentangle_uncertainty=False, num_samples=10, **kwargs):
        """
            Makes a prediction. Predictions from each estimator are used to build a gaussian mixture and its mean and standard deviation returned.
        """
        
        means = []
        variances = []

        if num_ensembles is None:
            estimators = self.test_estimators
        else:
            estimators = self.test_estimators[:num_ensembles]

        if "verbose" not in kwargs:
            kwargs["verbose"] = 0

        for estimator in estimators:
            mean, var  = estimator.predict(X, batch_size=batch_size, **kwargs)

            if output_scaler is not None:
                mean = output_scaler.inverse_transform(mean)

                # This should work but not sure if its 100% correct
                # Its not clear how to do inverse scaling of the variance
                sqrt_var = np.sqrt(var)
                var = output_scaler.inverse_transform(sqrt_var)
                var = np.square(var)

            means.append(mean)
            variances.append(var)

        means = np.array(means)
        variances = np.array(variances)
        
        mixture_mean = np.mean(means, axis=0)
        mixture_var  = np.mean(variances + np.square(means), axis=0) - np.square(mixture_mean)
        mixture_var[mixture_var < 0.0] = 0.0
                
        if disentangle_uncertainty:
            epi_var = np.var(means, axis=0)
            ale_var = np.mean(variances, axis=0)

            return mixture_mean, np.sqrt(ale_var), np.sqrt(epi_var)

        sample = np.random.normal(mixture_mean, np.sqrt(mixture_var), num_samples)

        return sample, mixture_mean, np.sqrt(mixture_var)

    def predict_generator(self, generator, steps=None, num_ensembles=None, **kwargs):
        """
            Makes a prediction. Predictions from each estimator are used to build a gaussian mixture and its mean and standard deviation returned.
        """
        
        means = []
        variances = []

        if num_ensembles is None:
            estimators = self.test_estimators
        else:
            estimators = self.test_estimators[:num_ensembles]

        for estimator in estimators:
            mean, var  = estimator.predict_generator(generator, steps=steps, **kwargs)
            means.append(mean)
            variances.append(var)

        means = np.array(means)
        variances = np.array(variances)
        
        mixture_mean = np.mean(means, axis=0)
        mixture_var  = np.mean(variances + np.square(means), axis=0) - np.square(mixture_mean)
        mixture_var[mixture_var < 0.0] = 0.0
                
        return mixture_mean, np.sqrt(mixture_var)

## Define Ensemble model

In [8]:
# ENSEMBLE MODEL
def train_ensemble_model(x_train, y_train, x_val, y_val, x_test, epochs, num_estimators):
    #obtained from hyperparameter optimization
    def model_fn():
        K.clear_session()
        inp = Input(shape=(8,))
        x = Dense(32, activation='relu')(inp)
        x = Dense(32, activation='relu')(x)
        mean = Dense(1, activation='linear')(x)
        var = Dense(1, activation='softplus')(x)

        train_model = Model(inp, mean)
        pred_model = Model(inp, [mean, var])
        train_model.compile(loss=regression_gaussian_nll_loss(var), optimizer='adam', metrics=['mae'])

        return train_model, pred_model 

    ens_model = DeepEnsembleRegressor(model_fn, num_estimators=num_estimators)

    # train a model with stochasticdropout() layer
    history = ens_model.fit(x_train, y_train, validation_data=(x_val, y_val), verbose=2, epochs=epochs)
    #saving the model that has custom layers
    ens_model.save('/content/drive/MyDrive/MasterThesis/CaliforniaHousingDatasetTests/GBP_explanation/deep_ensemble/ensemble_model_epochs_'+str(epochs)+'_num_estimators_'+str(num_estimators)+'.h5') 
    ens_model.summary()

    # plotting the training and validation curves
    plt.plot(history.history['loss'], label='train loss')
    plt.plot(history.history['val_loss'], label='val loss')
    plt.legend()
    plt.grid()
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.title('loss curves')
    plt.show()

    plt.plot(history.history['mae'], label='train mae')
    plt.plot(history.history['val_mae'], label='val mae')
    plt.legend()
    plt.grid()
    plt.xlabel('epochs')
    plt.ylabel('mae')
    plt.title('mae curves')
    plt.show()

    pred_samples, pred_mean, pred_std = ens_model.predict_output(x_test, num_samples=num_samples)
    print('pred_samples shape ', pred_samples.shape)
    print('pred_mean shape ', pred_mean.shape)
    print('pred_std shape ', pred_std.shape)

    # return the predicted_samples, predicted_mean, predicted_std and model 
    return pred_samples, pred_mean, pred_std, ens_model


epochs=3
num_estimators=5

# running this command creates the stochastic dropout model, trains it and generates the predicted_samples, predicted_mean and predicted_std for the test set (all 3000 examples)
prediction_samples, prediction_mean, prediction_std, ensemble_model = train_ensemble_model(train_data, train_labels, val_data, val_labels, test_data, epochs=epochs, num_estimators=num_estimators)
print('prediction mean :\n', prediction_mean)
print('prediction mean shape ', prediction_mean.shape)
print('prediction std :\n', prediction_std)
print('prediction std shape ', prediction_std.shape)
print('prediction samples :\n', prediction_samples)
print('prediction samples shape ', prediction_samples.shape)

ValueError: ignored

In [None]:
# Analysis of the input 
num_of_samples_to_be_explained = 1
start_index = np.random.randint(0, test_data.shape[0])
print('start_index : ', start_index)

test_input = test_data[start_index:start_index+num_of_samples_to_be_explained]
print('test_input shape :', test_input.shape)

test_input_adj = np.expand_dims(test_input, axis=-1)
print('test_input_adj shape :', test_input_adj.shape)

pred_samples, pred_mean, pred_std = dropout_model.predict_output(test_data)
print(pred_samples.shape) 
print(pred_mean.shape)
print(pred_std.shape)

In [None]:
# function to visualize the ground truth with the predicted value and (corridor of uncertainty)

def plot(ground_truth, prediction_mean, prediction_std, path, indices_to_be_plotted):
    plt.figure(figsize=(30, 4))
    plt.plot(range(ground_truth.shape[0]),  ground_truth, color='k', label='ground truth', marker='o')
    plt.plot(range(ground_truth.shape[0]), prediction_mean, color='r', label='prediction', marker='o')
   
    y_pred_mean = prediction_mean.reshape((-1,))
    y_pred_std = prediction_std.reshape((-1,))
    y_pred_up_1 = y_pred_mean + y_pred_std
    y_pred_down_1 = y_pred_mean - y_pred_std

    plt.fill_between(range(ground_truth.shape[0]), y_pred_down_1, y_pred_up_1, color=(0, 0, 0.9, 0.7), label='corridor of uncertainty ($\pm$ 1 $\sigma$) ', alpha=0.5)
    #plt.plot(range(ground_truth.shape[0]), y_pred_mean, '.', color=(0, 0.9, 0.0, 0.8), markersize=0.2, label='Mean')

    #plt.set_title('{}\nInterval Score: {:.2f}'.format(key, score))
    #plt.set_ylim([-20.0, 20.0])

    #plt.axvline(x=-4.0, color='black', linestyle='dashed')
    #plt.axvline(x= 4.0, color='black', linestyle='dashed')
    #plt.get_xaxis().set_ticks([])
    #plt.get_yaxis().set_ticks([])    

    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.legend()
    plt.grid()
    plt.xticks(range(len(indices_to_be_plotted)), indices_to_be_plotted, rotation=45)
    plt.xlabel('Input sample #')
    plt.ylabel('Target Variable (normalized)')
    plt.title('Ground Truth and Dropout Model Prediction')
    plt.savefig(path)
    plt.show()

start_index = np.random.randint(test_data.shape[0]-150)
random = range(start_index, start_index+100)
print('indices to be plotted \n', random)
test_labels_plot= np.asarray([float(test_labels[i]) for i in random])
mean_dropout_plot = np.asarray([float(pred_mean[i]) for i in random])
std_dropout_plot = np.asarray([float(pred_std[i]) for i in random])

plot(test_labels_plot, mean_dropout_plot, std_dropout_plot, '/content/drive/MyDrive/MasterThesis/CaliforniaHousingDatasetTests/GBP_explanation/dropout/output_plots/dropout_gt_vs_prediction.pdf', random)