# Train a deep CNN on XPS data on Google Colab

In this notebook, we will train a deep convolutional network on iron XPS spectra made up of linear combinations of single iron reference spectra.

## Setup

### Mount google drive, change working directory

In [None]:
# Mount drive
from google.colab import drive
import os

drive.mount('/content/drive')

# Change working path
os.chdir('/content/drive/My Drive/deepxps')

### Install packages and import modules

In [None]:
%%capture
# Install packages
!pip install python-docx

# Import standard modules and magic commands
import datetime
import numpy as np
import pytz
import importlib

# Magic commands
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Disable tf warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf

### Check TensorFlow version

In [None]:
tf.__version__

### Check TPU connection

In [None]:
from tensorflow.python.profiler import profiler_client

tpu_profile_service_address = os.environ['COLAB_TPU_ADDR'].replace('8470', '8466')
print(profiler_client.monitor(tpu_profile_service_address, 100, 2))

## Initial training

### Load custom modules

In [None]:
try:
    importlib.reload(classifier)
    importlib.reload(clfutils)
    print('Modules were reloaded.')
except:
    import xpsdeeplearning.network.classifier as classifier
    import xpsdeeplearning.network.utils as clfutils
    print('Modules were loaded.')

### Set up the parameters & folder structure



In [None]:
np.random.seed(502)
time = datetime.datetime.now().astimezone(pytz.timezone('Europe/Berlin')).strftime("%Y%m%d_%Hh%Mm")
exp_name = 'Pd_2_classes_linear_comb_small_gas_phase'

clf = classifier.Classifier(time = time,
                            exp_name = exp_name,
                            task = 'regression',
                            intensity_only = True)

### If labels not saved with data ###
# =============================================================================
# labels = ['Fe metal', 'FeO', 'Fe3O4', 'Fe2O3']
# clf = classifier.Classifier(time = time,
#                            exp_name = exp_name,
#                            task = 'regression',
#                            intensity_only = True,
#                            labels = labels)
# =============================================================================

### Load and inspect the data

In [None]:
input_filepath = r'/content/drive/My Drive/deepxps/datasets/20210308_Pd_linear_combination_small_gas_phase.h5'

train_test_split = 0.2
train_val_split = 0.2
no_of_examples = 200000

X_train, X_val, X_test, y_train, y_val, y_test,\
    aug_values_train, aug_values_val, aug_values_test =\
        clf.load_data_preprocess(input_filepath = input_filepath,
                                 no_of_examples = no_of_examples,
                                 train_test_split = train_test_split,
                                 train_val_split = train_val_split)
               
# Check how the examples are distributed across the classes.
class_distribution = clf.datahandler.check_class_distribution(clf.task)
clf.plot_class_distribution()
clf.plot_random(no_of_spectra = 10, dataset = 'train')  

### Design the model

In [None]:
try:
    importlib.reload(models)
    print('Models module was reloaded.')
except:
    import xpsdeeplearning.network.models as models
    print('Models module was loaded.')

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K

class ResNet1D(models.EmptyModel):
    """
    Instantiates the ResNet50 architecture in 1D similar to the original 
    ResNet paper. Using the functional API in Keras.
    
    Implementation of the popular ResNet50 the following architecture:
    CONV1D -> BATCHNORM -> RELU -> MAXPOOL -> CONVBLOCK -> IDBLOCK*2 ->
    CONVBLOCK -> IDBLOCK*3 -> CONVBLOCK -> IDBLOCK*5 -> CONVBLOCK -> 
    IDBLOCK*2 -> AVGPOOL (optional) -> OUTPUTLAYER
    """
    def __init__(self,
                 inputshape,
                 num_classes,
                 ap=False,
                 no_of_inputs=1):
        """
        Parameters
        ----------
        num_classes : int
            Number of output classes.
        ap : bool, optional
            If ap, then an AveragePooling1D layer is added after the
            residual blocks. The default is False.
        no_of_inputs : int, optional
            Number of input layers. The default is 1.
            (not working here)

        Returns
        -------
        None.

        """
        self.ap = ap
        
        self.input_1 = layers.Input(shape=inputshape,
                                    name='input_1')
    
        # Zero-Padding
        self.zero_pad_1 = layers.ZeroPadding1D(padding=3)(self.input_1)
        
        # Stage 1
        self.conv_1 = layers.Conv1D(filters=64,
                                    kernel_size=2,
                                    padding='valid',
                                    kernel_initializer=glorot_uniform(seed=0),
                                    name='stage1_conv')(self.zero_pad_1)
        self.batch_1 = layers.BatchNormalization(
            axis=1,
            name='stage1_bn')(self.conv_1)
        self.act_1 = layers.Activation(
            activation='relu',
            name='stage1_act')(self.batch_1)
        self.max_pool_1 = layers.MaxPooling1D(
            pool_size=1,
            strides=1,
            name='stage1_max_pool')(self.act_1)
        
        # Stage 2
        self.conv_block_2a = models.ConvBlock(filters=[32,32,128],
                                       kernel_size_2=4,
                                       stage=2,
                                       block='a')(self.max_pool_1)
        self.id_block_2b = models.IdentityBlock(filters=[32,32,128],
                                         kernel_size_2=1,
                                         stage=2,
                                         block='b')(self.conv_block_2a)
        self.id_block_2c = models.IdentityBlock(filters=[32,32,128],
                                         kernel_size_2=1,
                                         stage=2,
                                         block='c')(self.id_block_2b)

        # Stage 3
        self.conv_block_3a = models.ConvBlock(filters=[64,64,256],
                                       kernel_size_2=3,
                                       stage=3,
                                       block='a')(self.id_block_2c)
        self.id_block_3b = models.IdentityBlock(filters=[64,64,256],
                                         kernel_size_2=3,
                                         stage=3,
                                         block='b')(self.conv_block_3a)
        self.id_block_3c = models.IdentityBlock(filters=[64,64,256],
                                         kernel_size_2=3,
                                         stage=3,
                                         block='c')(self.id_block_3b)
        self.id_block_3d = models.IdentityBlock(filters=[64,64,256],
                                         kernel_size_2=3,
                                         stage=3,
                                         block='d')(self.id_block_3c)
        
# =============================================================================
#         # Stage 4
#         self.conv_block_4a = models.ConvBlock(filters=[128,128,1024],
#                                        kernel_size_2=3,
#                                        stage=4,
#                                        block='a')(self.id_block_3d)
#         self.id_block_4b = models.IdentityBlock(filters=[128,128,1024],
#                                          kernel_size_2=3,
#                                          stage=4,
#                                          block='b')(self.conv_block_4a)
#         self.id_block_4c = models.IdentityBlock(filters=[128,128,1024],
#                                          kernel_size_2=3,
#                                          stage=4,
#                                           block='c')(self.id_block_4b)
#         self.id_block_4d = models.IdentityBlock(filters=[128,128,1024],
#                                          kernel_size_2=3,
#                                          stage=4,
#                                          block='d')(self.id_block_4c)
#         self.id_block_4e = models.IdentityBlock(filters=[128,128,1024],
#                                          kernel_size_2=3,
#                                          stage=4,
#                                          block='e')(self.id_block_4d)
#         self.id_block_4f = models.IdentityBlock(filters=[128,128,1024],
#                                          kernel_size_2=3,
#                                          stage=4,
#                                          block='f')(self.id_block_4e)
# 
#         # Stage 5
#         self.conv_block_5a = models.ConvBlock(filters=[256,56,1024],
#                                        kernel_size_2=1,
#                                        stage=5,
#                                        block='a')(self.id_block_4f)
#         self.id_block_5b = models.IdentityBlock(filters=[256,56,1024],
#                                          kernel_size_2=1,
#                                          stage=5,
#                                          block='b')(self.conv_block_5a)
#         self.id_block_5c = models.IdentityBlock(filters=[256,56,1024],
#                                          kernel_size_2=1,
#                                          stage=5,
#                                          block='c')(self.id_block_5b)
# =============================================================================

        # Average pooling
        if self.ap:
            self.avg_pool = layers.AveragePooling1D(
                pool_size=3,
                name='avg_pool')(self.id_block_3d)
            self.flatten = layers.Flatten(name='flatten')(self.avg_pool)
        
        else:
            self.flatten = layers.Flatten(name='flatten')(self.id_block_3d)

        # output layer
        self.dense = layers.Dense(units=num_classes,
                                  activation='sigmoid',
                                  kernel_initializer = glorot_uniform(seed=0),
                                  name='dense')(self.flatten)
        
        # output norm
        self.output_norm = layers.Lambda(
            lambda x: x/tf.reshape(K.sum(x, axis=-1),(-1,1)),
            name = 'output_norm')(self.dense)
        
        super(ResNet1D, self).__init__(inputs=self.input_1,
                                        outputs=self.output_norm,
                                        inputshape=inputshape,
                                        num_classes=num_classes,
                                        no_of_inputs=no_of_inputs, 
                                        name ='ResNet1D')

In [None]:
from tensorflow.keras import layers
from tensorflow.keras.initializers import glorot_uniform
from tensorflow.python.keras import backend as K

class RegressionCNN(models.EmptyModel):
    """
    A CNN with three convolutional layers of different kernel size at 
    the beginning. Works well for learning across scales.
    
    This is to be used for regression on all labels. -> sigmoid 
    activation in the last layer.
    """
    def __init__(self, inputshape, num_classes):      
        self.input_1 = layers.Input(shape = inputshape)
                


        self.conv_1_short = layers.Conv1D(filters=12,
                                          kernel_size=5,
                                          strides=1,
                                          padding='same',
                                          activation='relu',
                                          name='conv_1_short')(self.input_1)
        self.conv_1_medium = layers.Conv1D(filters=12,
                                           kernel_size=10,
                                           strides=1,
                                           padding='same',
                                           activation='relu',
                                           name='conv_1_medium')(self.input_1)
        self.conv_1_long = layers.Conv1D(filters=12,
                                         kernel_size=15,
                                         strides=1,
                                         padding='same',
                                         activation='relu',
                                         name='conv_1_long')(self.input_1)
        
        sublayers = [self.conv_1_short, self.conv_1_medium, self.conv_1_long]
        merged_sublayers = layers.concatenate(sublayers)

        self.conv_2 = layers.Conv1D(filters=10,
                                    kernel_size=5,
                                    strides=1,
                                    padding='valid',
                                    activation='relu',
                                    name='conv_2')(merged_sublayers)
        self.conv_3 = layers.Conv1D(filters=10,
                                    kernel_size=5,
                                    strides=1,
                                    padding='valid',
                                    activation='relu',
                                    name="conv_3")(self.conv_2)
        self.average_pool_1 = layers.AveragePooling1D(
            name='average_pool_1')(self.conv_3)
        
        self.flatten_1 = layers.Flatten(name='flatten1')(self.average_pool_1)
        self.drop_1 = layers.Dropout(rate=0.2,
                                     name='drop_1')(self.flatten_1)
        self.dense_1 = layers.Dense(units=4000,
                                    activation='relu',
                                    name='dense_1')(self.drop_1)
        self.dense_2 = layers.Dense(units=num_classes,
                                    activation='sigmoid',
                                    name='dense_2')(self.dense_1)
        
        self.output_norm = layers.Lambda(
            lambda x: x/tf.reshape(K.sum(x, axis=-1),(-1,1)),
            name = 'output_normalization')(self.dense_2)

        no_of_inputs = len(sublayers)

        super(RegressionCNN, self).__init__(inputs=self.input_1,
                                            outputs=self.output_norm,
                                            inputshape=inputshape,
                                            num_classes=num_classes,
                                            no_of_inputs=no_of_inputs,
                                            name='RegressionCNN')

#### Build the model

In [None]:
clf.model = RegressionCNN(clf.datahandler.input_shape,
                          clf.datahandler.num_classes)
# =============================================================================
# clf.model = ResNet1D(clf.datahandler.input_shape,
#                      clf.datahandler.num_classess,
#                      ap=True)
# =============================================================================

# Alternative: Build model from available models in models.py
# =============================================================================
# clf.model = models.RegressionCNN(clf.datahandler.input_shape, 
#                                  clf.datahandler.num_classes)
# =============================================================================
# =============================================================================
# clf.model = models.ResNet1D(clf.datahandler.input_shape,
#                             clf.datahandler.num_classess,
#                             ap=True)
# =============================================================================

### Compile and summarize the model

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanAbsoluteError, MeanSquaredError
from tensorflow.keras.losses import CategoricalCrossentropy

learning_rate = 1e-05
optimizer = Adam(learning_rate = learning_rate) 

if clf.task == 'regression':
    mae = MeanAbsoluteError()
    clf.model.compile(loss = mae, optimizer = optimizer)
    # =============================================================================
    # mse = MeanSquaredError()
    # clf.model.compile(loss = mse, optimizer = optimizer)
    # =============================================================================
    
elif clf.task == 'classification':
    categorical_crossentropy = CategoricalCrossentropy()
    clf.model.compile(loss = categorical_crossentropy,
                      optimizer = optimizer,
                      metrics = ['accuracy'])

# Plot summary and save model plot.
clf.summary()
clf.save_and_print_model_image()

### Show initial predictions

In [None]:
pred_train_initial, pred_test_initial = clf.predict()

print('Train:')
for i in range(5):
    print('real: ' + str(np.round(y_train[i],3)),
          'pred: ' + str(pred_train_initial[i]))
print('Test:')
for i in range(5):
    print('real: ' + str(np.round(y_test[i],3)),
          'pred: ' + str(pred_test_initial[i]))

### Train

In [None]:
epochs = 500
batch_size = 32

hist = clf.train(checkpoint = True,
                 early_stopping = False,
                 tb_log = True, 
                 csv_log = True,
                 hyperparam_log = True,
                 epochs = epochs, 
                 batch_size = batch_size,
                 verbose = 1)

In [None]:
from google.colab import output
output.eval_js('new Audio("http://soundbible.com/grab.php?id=1795&type=mp3").play()')

### Plot loss

In [None]:
graph = clfutils.TrainingGraphs(clf.history, clf.logging.fig_dir)
graph.plot_loss(to_file = True)
if clf.task == 'classification':
    graph.plot_accuracy(to_file = False)

### Evaluate on test data

In [None]:
if clf.task == 'classification':
    score = clf.evaluate()
    test_loss, test_accuracy = score[0], score[1]
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))
    print('Test accuracy: ' + str(np.round(test_accuracy, decimals=3)))
elif clf.task == 'regression':
    test_loss = clf.evaluate()
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))

###  Predict on train and test data

In [None]:
pred_train, pred_test = clf.predict()
if clf.task == 'classification':
    pred_train_classes, pred_test_classes = clf.predict_classes()

### Show some predictions

#### 10 random training samples

In [None]:
clf.plot_random(no_of_spectra = 10, dataset = 'train', with_prediction = True)  

#### 10 random test samples

In [None]:
clf.plot_random(no_of_spectra = 10, dataset = 'test', with_prediction = True)  

### Show wrong/worst predictions

In [None]:
if clf.task == 'classification':
    clf.show_wrong_classification()
elif clf.task == 'regression':
    clf.show_worst_predictions(no_of_spectra = 20)  

### Save model and results

In [None]:
#clf.save_model()
clf.pickle_results()

### Generate report

In [None]:
dir_name = clf.time + '_' + clf.exp_name
rep = clfutils.Report(dir_name)  
rep.write()

## Continue training

### Load custom modules

In [None]:
try:
    import importlib
    importlib.reload(classifier)
    importlib.reload(clfutils)
    print('\n Modules were reloaded.')
except:
    import xpsdeeplearning.network.classifier as classifier
    import xpsdeeplearning.network.utils as clfutils
    print('\n Modules were loaded.')

### Reload classifier from logpath

In [None]:
logpath = r'/content/drive/My Drive/deepxps/logs/20210309_17h23m_Pd_2_classes_linear_comb_small_gas_phase'
clf = classifier.restore_clf_from_logs(logpath)

### Load and inspect the data

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test,\
    aug_values_train, aug_values_val, aug_values_test =\
        clf.load_data_preprocess(input_filepath = clf.logging.hyperparams['input_filepath'],
                                 no_of_examples = clf.logging.hyperparams['no_of_examples'],
                                 train_test_split = clf.logging.hyperparams['train_test_split'],
                                 train_val_split = clf.logging.hyperparams['train_val_split'])
                
# Check how the examples are distributed across the classes.
class_distribution = clf.datahandler.check_class_distribution(clf.task)
clf.plot_class_distribution()
clf.plot_random(no_of_spectra = 10, dataset = 'train')  

### Load the model

In [None]:
clf.load_model(compile_model = True)

### Summarize the model

In [None]:
# Plot summary and save model plot.
clf.summary()
clf.save_and_print_model_image()

### Show predictions with current model

In [None]:
pred_train_intermediate, pred_test_intermediate = clf.predict()

print('Train:')
for i in range(5):
    print('real: ' + str(np.round(y_train[i],3)),
          'pred: ' + str(pred_train_intermediate[i]))
print('Test:')
for i in range(5):
    print('real: ' + str(np.round(y_test[i],3)),
          'pred: ' + str(pred_test_intermediate[i]))

### Train

In [None]:
epochs = 1000
batch_size = clf.logging.hyperparams['batch_size']

new_learning_rate = 1e-05#5e-06

hist = clf.train(checkpoint = True,
                 early_stopping = False,
                 tb_log = True, 
                 csv_log = True,
                 hyperparam_log = True,
                 epochs = epochs, 
                 batch_size = batch_size,
                 verbose = 1,
                 new_learning_rate = new_learning_rate)

### Plot loss

In [None]:
graph = clfutils.TrainingGraphs(clf.history, clf.logging.fig_dir)
graph.plot_loss(to_file = True)
if clf.task == 'classification':
    graph.plot_accuracy(to_file = True)

### Evaluate on test data

In [None]:
if clf.task == 'classification':
    score = clf.evaluate()
    test_loss, test_accuracy = score[0], score[1]
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))
    print('Test accuracy: ' + str(np.round(test_accuracy, decimals=3)))
elif clf.task == 'regression':
    test_loss = clf.evaluate()
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))

###  Predict on train and test data

In [None]:
pred_train, pred_test = clf.predict()
if clf.task == 'classification':
    pred_train_classes, pred_test_classes = clf.predict_classes()

### Show some predictions

#### 10 random training samples

In [None]:
clf.plot_random(no_of_spectra = 10, dataset = 'train', with_prediction = True)  

#### 10 random test samples

In [None]:
clf.plot_random(no_of_spectra = 10, dataset = 'test', with_prediction = True)  

### Show wrong/worst predictions

In [None]:
if clf.task == 'classification':
    clf.show_wrong_classification()
elif clf.task == 'regression':
    clf.show_worst_predictions(no_of_spectra = 20)  

### Save model and data

In [None]:
#clf.save_model()
clf.pickle_results()

### Generate report

In [None]:
dir_name = clf.time + '_' + clf.exp_name
rep = clfutils.Report(dir_name)  
rep.write()

## Save output of notebook

In [None]:
from IPython.display import Javascript, display
from nbconvert import HTMLExporter

def save_notebook():
    display(Javascript("IPython.notebook.save_notebook()"),
            include=['application/javascript'])

def output_HTML(read_file, output_file):
    import codecs
    import nbformat
    exporter = HTMLExporter()
    # read_file is '.ipynb', output_file is '.html'
    output_notebook = nbformat.read(read_file, as_version=4)
    output, resources = exporter.from_notebook_node(output_notebook)
    codecs.open(output_file, 'w', encoding='utf-8').write(output)

import time
import os

time.sleep(20)
save_notebook()
print('Notebook saved!')
time.sleep(30)
current_file = '/content/drive/My Drive/deepxps/xpsdeeplearning/train.ipynb'
output_file = os.path.join(clf.logging.log_dir,'train_out.html')
output_HTML(current_file, output_file)
print('HTML file saved!')