# Hyperparameter optimization of a CNN for XPS data in Keras using Talos

In this notebook, we will search the hyperparameter space of a convolutional network for photoemission spectra made up of linear combinations of single reference spectra using Talos (https://autonomio.github.io/docs_talos/)

## Setup

### Mount google drive, change working directory

In [None]:
# Mount drive
from google.colab import drive
import os

drive.mount('/content/drive')

# Change working path
os.chdir('/content/drive/My Drive/deepxps')

### Install packages and import modules

In [None]:
%%capture
# Install packages
#!pip install git+https://github.com/autonomio/talos@1.0

# Using forked Talos v1.0
!pip install git+git://github.com/lukaspie/talos.git@1.0#egg=talos 

# =============================================================================
# If forked repo is present on Google Drive.
# os.chdir('/content/drive/My Drive/app/talos')
# ! git checkout 1.0
# !pip install .
# os.chdir('/content/drive/My Drive/app')
# !pwd
# =============================================================================

!pip install python-docx
#!pip install tensorflow==2.3.0 as tf

# Import standard modules and magic commands
import tensorflow as tf
import datetime
import numpy as np
import pytz
import importlib

# Magic commands
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Disable tf warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

### Set seeds and restart session to ensure reproducibility

In [None]:
def reset_seeds_and_session(seed=1):
   os.environ['PYTHONHASHSEED']=str(seed)
   tf.random.set_seed(seed)
   np.random.seed(seed)

   session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                                           inter_op_parallelism_threads=1)
   sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(),
                               config=session_conf)
   tf.compat.v1.keras.backend.set_session(sess) 

reset_seeds_and_session(seed=1)

### Check TensorFlow version

In [None]:
try:
    f"TF version: {tf.__version__}."
except:
    f"TF version: {tf.VERSION}."

### Load custom modules

In [None]:
try:
    importlib.reload(classifier)
    importlib.reload(opt)
    importlib.reload(clfutils)
    print('Modules were reloaded.')
except:
    import xpsdeeplearning.network.classifier as classifier
    import xpsdeeplearning.network.optimization as opt
    import xpsdeeplearning.network.utils as clfutils
    print('Modules were loaded.')

### Set up the parameters & folder structure



In [None]:
time = datetime.datetime.now().astimezone(pytz.timezone('Europe/Berlin')).strftime("%Y%m%d_%Hh%Mm")
exp_name = 'Fe_4_classes_variable_linear_comb_gas_phase_combined_data_talos'

hyperopt = opt.Hyperoptimization(time = time,
                                 exp_name = exp_name)
hyperopt.initialize_clf(task = 'regression',
                        intensity_only = False)

### Load and inspect the data

In [None]:
input_filepath = r'/content/drive/My Drive/deepxps/datasets/20210222_Fe_linear_combination_small_gas_phase.h5'
train_test_split = 0.2
train_val_split = 0.2
no_of_examples = 2000

X_train, X_val, X_test, y_train, y_val, y_test,\
    aug_values_train, aug_values_val, aug_values_test =\
        hyperopt.clf.load_data_preprocess(input_filepath = input_filepath,
                                          no_of_examples = no_of_examples,
                                          train_test_split = train_test_split,
                                          train_val_split = train_val_split)
               
# Check how the examples are distributed across the classes.
class_distribution = hyperopt.clf.datahandler.check_class_distribution(hyperopt.clf.task)
hyperopt.clf.plot_class_distribution()
hyperopt.clf.plot_random(no_of_spectra = 10, dataset = 'train')  

## Model design

In [None]:
try:
    importlib.reload(models)
    print('Models module was reloaded.')
except:
    import xpsdeeplearning.network.models as models
    print('Models module was loaded.')

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Flatten, concatenate, Lambda
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import AveragePooling1D, MaxPooling1D
from tensorflow.keras.layers import LayerNormalization
from tensorflow.python.keras import backend as K

class CustomCNNTalos(models.EmptyModel):
    def __init__(self, inputshape, num_classes, params):      
        input_1 = Input(shape = inputshape)
                
        conv_1_short = Conv1D(
            int(params['conv_1_short_filters']),
            int(params['conv_1_short_kernel_size']),
            padding = 'same',
            activation = str(params['conv_1_short_activation']))(input_1)
        conv_1_medium = Conv1D(
            int(c),
            int(params['conv_1_medium_kernel_size']),
            padding = 'same',
            activation = str(params['conv_1_medium_activation']))(input_1)
        conv_1_long = Conv1D(
            int(params['conv_1_long_filters']),
            int(params['conv_1_long_kernel_size']),
            padding = 'same',
            activation = str(params['conv_1_long_activation']))(input_1)
        sublayers = [conv_1_short, conv_1_medium, conv_1_long]
        merged_sublayers = concatenate(sublayers)

        conv_2 = Conv1D(
            int(params['conv_2_filters']),
            int(params['conv_2_kernel_size']),
            padding = 'same',
            activation = str(params['conv_2_activation']))(merged_sublayers)
        conv_3 = Conv1D(
            int(params['conv_3_filters']),
            int(params['conv_3_kernel_size']),
            padding = 'same',
            activation = str(params['conv_3_activation']))(conv_2)
        average_pool_1 = AveragePooling1D()(conv_3)
        flatten_1 = Flatten()(average_pool_1)
        drop_1 = Dropout(float(params['drop_1_rate']))(flatten_1)
        dense_1 = Dense(
            int(params['dense_1_units']),
            activation = str(params['dense_1_activation']))(drop_1)
        dense_2 = Dense(num_classes, activation = 'sigmoid')(dense_1)
        
        output = Lambda(lambda x: x/tf.reshape(K.sum(x, axis=-1),(-1,1)),
                        name = 'normalization')(dense_2)

        no_of_inputs = len(sublayers)

        super(CustomCNNTalos, self).__init__(inputs = input_1,
                                             outputs = output,
                                             inputshape = inputshape,
                                             num_classes = num_classes,
                                             no_of_inputs = no_of_inputs,
                                             name = 'Custom_CNN_Talos')

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError, MeanAbsoluteError

init_params = {'conv_1_short_filters' : 12,
               'conv_1_short_kernel_size' : 5,
               'conv_1_short_activation' : 'relu',
               'conv_1_medium_filters' : 12,
               'conv_1_medium_kernel_size' : 10,
               'conv_1_medium_activation' : 'relu',
               'conv_1_long_filters' : 12,
               'conv_1_long_kernel_size' : 15,
               'conv_1_long_activation' : 'relu',
               'conv_2_filters' : 10,
               'conv_2_kernel_size' : 5,
               'conv_2_activation' : 'relu',
               'conv_3_filters' : 10,
               'conv_3_kernel_size' : 5,
               'conv_3_activation' : 'relu',
               'drop_1_rate' : 0.2,
               'dense_1_units' : 4000,
               'dense_1_activation' : 'relu',
               'optimizer' : Adam,
               'learning_rate': 1e-05,
               'loss_function' : MeanAbsoluteError
              }

hyperopt.clf.model = CustomCNNTalos(hyperopt.clf.datahandler.input_shape,
                                    hyperopt.clf.datahandler.num_classes,
                                    init_params)

# Compile model with loss function and optimizer from parameter dictionary.
hyperopt.clf.model.compile(
    loss = init_params['loss_function'](),
    optimizer = init_params['optimizer'](init_params['learning_rate']))

# Plot summary and save model plot.
hyperopt.clf.summary()
hyperopt.clf.save_and_print_model_image()

## Hyperparameter optimization

### Parameter Scan

In [None]:
opt_params = {'conv_1_short_filters' : [6, 12, 18],
               'conv_1_short_kernel_size' : [3, 5, 7],
               'conv_1_short_activation' : ['relu'],
               'conv_1_medium_filters' : [6, 12, 18],
               'conv_1_medium_kernel_size' : [8, 10, 12],
                'conv_1_medium_activation' : ['relu'],
               'conv_1_long_filters' : [6, 12, 18],
               'conv_1_long_kernel_size' : [13 ,15, 17],
               'conv_1_long_activation' : ['relu'],
               'conv_2_filters' : [5, 10, 15],
               'conv_2_kernel_size' : [3, 5, 7],
               'conv_2_activation' : ['relu'],
               'conv_3_filters' : [5, 10, 15],
               'conv_3_kernel_size' : [3, 5, 7],
               'conv_3_activation' : ['relu'],
               'drop_1_rate' : [0.1, 0.2],
               'dense_1_units' : [2000, 4000, 6000],
               'dense_1_activation' : ['relu'],
               'optimizer' : [Adam],
               'learning_rate': [1e-05, 3e-04, 1e-04],
               'loss_function' : [MeanAbsoluteError],
               'epochs' : [50],
               'batch_size' : [16, 32]
                }

In [None]:
test_params = {'conv_1_short_filters' : [12],
               'conv_1_short_kernel_size' : [5],
               'conv_1_short_activation' : ['relu'],
               'conv_1_medium_filters' : [12],
               'conv_1_medium_kernel_size' : [10],
                'conv_1_medium_activation' : ['relu'],
               'conv_1_long_filters' : [12],
               'conv_1_long_kernel_size' : [15],
               'conv_1_long_activation' : ['relu'],
               'conv_2_filters' : [10],
               'conv_2_kernel_size' : [5],
               'conv_2_activation' : ['relu'],
               'conv_3_filters' : [10],
               'conv_3_kernel_size' : [5],
               'conv_3_activation' : ['relu'],
               'drop_1_rate' : [0.14],
               'dense_1_units' : [4000],
               'dense_1_activation' : ['relu'],
               'optimizer' : [Adam],
               'learning_rate': [1e-05],
               'loss_function' : [MeanAbsoluteError],
               'epochs' : [2],
               'batch_size' : [16, 32]
                }

In [None]:
# RANDOMNESS ARGUMENTS
random_method = 'quantum'

# LIMITER ARGUMENTS
performance_target = None #None or list [metric, threshold, loss or not] 
fraction_limit = None #float
round_limit = None #int
time_limit = '2021-02-25 18:00' #Format "%Y-%m-%d %H:%M". CET -4

# OPTIMIZER ARGUMENTS
reduction_method = 'correlation'
reduction_interval = 30
reduction_window = 20
reduction_threshold = 0.2

hyperopt.scan_parameter_space(test_params,
                              random_method = random_method,
                              seed = seed, 
                              performance_target = performance_target,
                              fraction_limit = fraction_limit,
                              round_limit = round_limit,
                              time_limit = time_limit,
                              reduction_method = reduction_method,
                              reduction_interval = reduction_interval,
                              reduction_window = reduction_window,
                              reduction_threshold = reduction_threshold)

### Analysis of scan results

In [None]:
hyperopt.initialize_analyzer()

# Show the df with the best parameters highlighted.
def _highlight_best(row):
    if row['val_loss'] == hyperopt.analyzer._minimum_value('val_loss'):
        return ['background-color: yellow']*hyperopt.analyzer.df.shape[1]
    else:
        return ['background-color: white']*hyperopt.analyzer.df.shape[1]
    
hyperopt.analyzer.df.style.apply(_highlight_best, axis=1)

#### Line plot of a chosen metric

In [None]:
line_data = hyperopt.analyzer.create_line_data(metric = 'val_loss')
line_plot = opt.LinePlot(line_data)
line_plot.plot() 
line_plot.to_file(hyperopt.fig_dir)

#### Histogram of a metric across all rounds

In [None]:
hist_data = hyperopt.analyzer.create_hist_data(metric = 'val_loss')
hist_plot = opt.HistPlot(hist_data)
hist_plot.plot() 
hist_plot.to_file(hyperopt.fig_dir)

#### Correlation matrix

In [None]:
corr_data = hyperopt.analyzer.correlate(metric = 'val_loss')
corr_plot = opt.CorrPlot(corr_data)
corr_plot.plot() 
corr_plot.to_file(hyperopt.fig_dir)

#### Kernel density estimator plot for one metric

In [None]:
x_kde_1 = 'val_loss'

kde_data_1 = hyperopt.analyzer.create_kde_data(x_kde_1)
kde_plot_1 = opt.KDEPlot(data = kde_data_1,
                         x = x_kde_1)
kde_plot_1.plot()
kde_plot_1.to_file(hyperopt.fig_dir)

#### Kernel density estimator plot for two metrics

In [None]:
x_kde_2 = 'val_loss'
y_kde_2 = 'loss'

kde_data_2 = hyperopt.analyzer.create_kde_data(x_kde_2,y_kde_2)
kde_plot_2 = opt.KDEPlot(data = kde_data_2,
                              x = x_kde_2,
                              y = y_kde_2)
kde_plot_2.plot()
kde_plot_2.to_file(hyperopt.fig_dir)

#### Bar plot with four parameters

In [None]:
x_bar = 'learning_rate'
y_bar = 'val_loss'
hue_bar = 'conv_3_filters'
col_bar = 'dense_1_units'

#drop_1_rate, , conv_3_kernel_size, dense_1_units

bar_data = hyperopt.analyzer.create_bar_data(x_bar, y_bar, hue_bar, col_bar)
bar_plot = opt.BarPlot(bar_data,
                       x_bar,
                       y_bar,
                       hue_bar,
                       col_bar)
bar_plot.plot()
bar_plot.to_file(hyperopt.fig_dir)

## Training and testing of best model from parameter space

### Load the model with the best performance

In [None]:
metric = 'val_loss'
best_params = hyperopt.get_best_params(metric = metric)
hyperopt.load_model_from_scans(best = True, metric = metric)
# Plot summary and save model plot.
hyperopt.clf.summary()

In [None]:
hyperopt.clf.datahandler.batch_size = hyperopt.best_params['batch_size']
hyperopt.clf.datahandler.epochs = hyperopt.best_params['epochs']

### Train with best parameters

In [None]:
epochs = 25 #hyperopt.best_params['epochs']
batch_size = int(hyperopt.best_params['batch_size'])

hist = hyperopt.clf.train(checkpoint = True,
                          early_stopping = False,
                          tb_log = True, 
                          csv_log = True,
                          hyperparam_log = True,
                          epochs = epochs, 
                          batch_size = batch_size,
                          verbose = 1)

### Plot loss after training

In [None]:
graph = clfutils.TrainingGraphs(hyperopt.clf.logging.history, 
                                hyperopt.clf.logging.fig_dir)
graph.plot_loss(to_file=True)

### Evaluate on test data

In [None]:
test_loss = hyperopt.clf.evaluate()
print('Test loss: ' + str(test_loss))

###  Predict on train and test data

In [None]:
pred_train, pred_test = hyperopt.clf.predict()

###  Save the best model

In [None]:
#hyperopt.clf.save_model()
hyperopt.clf.pickle_results()

### Show some predictions

#### 10 random training samples

In [None]:
hyperopt.clf.plot_random(no_of_spectra = 10, dataset = 'train', with_prediction = True)  

#### 10 random test samples

In [None]:
hyperopt.clf.plot_random(no_of_spectra = 10, dataset = 'test', with_prediction = True)  

## Prepare website upload

In [None]:
from xpsdeeplearning.network.prepare_upload import Uploader

dataset_path = hyperopt.clf.logging.hyperparams["input_filepath"].rsplit(".",1)[0] + "_metadata.json"
uploader = Uploader(hyperopt.clf.logging.root_dir, dataset_path)
uploader.prepare_upload_params()
uploader.save_upload_params()

## Save output of notebook

In [None]:
from IPython.display import Javascript, display
from nbconvert import HTMLExporter

def save_notebook():
    display(Javascript("IPython.notebook.save_notebook()"),
            include=['application/javascript'])

def output_HTML(read_file, output_file):
    import codecs
    import nbformat
    exporter = HTMLExporter()
    # read_file is '.ipynb', output_file is '.html'
    output_notebook = nbformat.read(read_file, as_version=4)
    output, resources = exporter.from_notebook_node(output_notebook)
    codecs.open(output_file, 'w', encoding='utf-8').write(output)

import time
time.sleep(30)
save_notebook()
print('Notebook saved!')
time.sleep(30)
current_file = '/content/drive/My Drive/app/xpsdeeplearning/notebooks/talos_optimization.ipynb'
output_file = os.path.join(hyperopt.clf.log_dir,'talos_optimization_out.html')
output_HTML(current_file, output_file)
print('HTML file saved!')