# Predict on new data using a trained CNN on XPS data on Google Colab

In this notebook, we will use a trained convolutional network to predict on unseen iron XPS spectra.

## Setup

### Mount google drive, change working directory

In [None]:
# Mount drive
from google.colab import drive
import os

drive.mount('/content/drive')

# Change working path
os.chdir('/content/drive/My Drive/app')

### Install packages and import modules

In [None]:
# Install packages
!pip install python-docx

# Import standard modules and magic commands
import datetime
import numpy as np
import pytz
import importlib
import matplotlib.pyplot as plt

# Magic commands
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Disable tf warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf

## Predict on new data set

### Load custom modules

In [None]:
try:
    import importlib
    importlib.reload(classifier)
    importlib.reload(clfutils)
    print('\n Modules were reloaded.')
except:
    import xpsdeeplearning.network.classifier as classifier
    import xpsdeeplearning.network.utils as clfutils
    print('\n Modules were loaded.')

### Set up the parameters & folder structure

In [None]:
np.random.seed(502)
time = datetime.datetime.now().astimezone(pytz.timezone('Europe/Berlin')).strftime("%Y%m%d_%Hh%Mm")
data_name = 'Fe_multiple_4_classes_Mark_predict_20200714_14h05m'

label_values = ['Fe metal', 'FeO', 'Fe3O4', 'Fe2O3']

clf = classifier.ClassifierMultiple(time = time,
                                    data_name = data_name,
                                    labels = label_values)

### Load and inspect the data

In [None]:
input_filepath = r'/content/drive/My Drive/app/datasets/20200714_iron_Mark_variable_linear_combination.h5'
train_test_split = 0.99
train_val_split = 0
no_of_examples = 100000

X_train, X_val, X_test, y_train, y_val, y_test = \
        clf.load_data_preprocess(input_filepath = input_filepath,
                                 no_of_examples = no_of_examples,
                                 train_test_split = train_test_split,
                                 train_val_split = train_val_split)
        
# Check how the examples are distributed across the classes.
class_distribution = clf.check_class_distribution()
clf.plot_class_distribution()
clf.plot_random(no_of_spectra = 10, dataset = 'test')  

### Load the model

In [None]:
clf.load_model(model_path = '/content/drive/My Drive/app/saved_models/20200714_14h05m_Fe_multiple_4_classes_variable_linear_comb')

### Compile and summarize the model

In [None]:
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError

learning_rate = 1e-05
optimizer = Adam(learning_rate = learning_rate) 
mse = MeanSquaredError()

# =============================================================================
# def custom_loss(y_true, y_pred):
#     """
#     Custom loss for linear combination of XPS spectra.
#     """
#     squared_difference = tf.square(tf.subtract(y_true,y_pred))
#     squared_output = tf.square(y_pred)
#     
#     return tf.reduce_sum(tf.multiply(squared_output,squared_difference))
# =============================================================================

# Compile model with build-in loss function
clf.model.compile(loss = mse, optimizer = optimizer)

# Plot summary and save model plot.
clf.summary()
clf.save_and_print_model_image()

### Evaluate on test data

In [None]:
clf.batch_size = 32
test_loss = clf.evaluate()
print('Test loss: ' + str(test_loss))

###  Predict on train & test data

In [None]:
pred_train, pred_test = clf.predict()

### Show some predictions on random test samples

In [None]:
clf.plot_random(no_of_spectra = 15, dataset = 'test', with_prediction = True)  


### Show the worst predictions on the test samples

In [None]:
clf.show_worst_predictions(no_of_spectra = 20)

### Save data

In [None]:
clf.save_hyperparams()
clf.shelve_results(full = False)

## Check where and why the predictions fail

### Calculate loss for each example in the test set

In [None]:
mse = tf.keras.losses.MeanSquaredError()
mae = tf.keras.losses.MeanAbsoluteError()

mse_losses = [mse(clf.y_test[i], clf.pred_test[i]).numpy() \
                  for i in range(clf.y_test.shape[0])]
mae_losses = [mae(clf.y_test[i], clf.pred_test[i]).numpy() \
                  for i in range(clf.y_test.shape[0])]

In [None]:
def show_worst_of_one_kind(indices, spectra = [0,20]):
    no_of_spectra =  spectra[1] - spectra[0]
      
    no_of_cols = 5
    no_of_rows = int(no_of_spectra/no_of_cols)
    if (no_of_spectra % no_of_cols) != 0:
        no_of_rows += 1

    fig, axs = plt.subplots(nrows = no_of_rows, ncols = no_of_cols)
    plt.subplots_adjust(left = 0.125, bottom = 0.5,
                        right=4.8, top = no_of_rows,
                        wspace = 0.2, hspace = 0.2)                    
    
    for i in range(no_of_spectra):
        index = indices[spectra[0]+i]
        x = np.arange(694, 750.05, 0.05)
        y = clf.X_test[index]
        label = str(np.around(clf.y_test[index], decimals = 3))
        real = ('Real: ' +  label + '\n')
         
        tmp_array = np.around(clf.pred_test[index], decimals = 3) 
        pred = ('Prediction: ' + str(list(tmp_array)) + '\n')
        loss_text = ('MSE: ' + str(np.around(mse_losses[index], decimals = 3)) + ', ' +\
                     'MAE: ' + str(np.around(mae_losses[index], decimals = 3)))
         
        row, col = int(i/no_of_cols), i % no_of_cols
        axs[row, col].plot(np.flip(x),y)
        axs[row, col].invert_xaxis()
        axs[row, col].set_xlim(750.05,694)
        axs[row, col].set_xlabel('Binding energy (eV)')
        axs[row, col].set_ylabel('Intensity (arb. units)')                          
        axs[row, col].text(0.025, 0.3, real,
                           horizontalalignment='left',
                           verticalalignment='top',
                           transform = axs[row, col].transAxes,
                           fontsize = 11) 
        axs[row, col].text(0.025, 0.2, pred,
                           horizontalalignment='left',
                           verticalalignment='top',
                           transform = axs[row, col].transAxes,
                           fontsize = 11)
        axs[row, col].text(0.025, 0.1, loss_text,
                           horizontalalignment='left',
                           verticalalignment='top',
                           transform = axs[row, col].transAxes,
                           fontsize = 11)

### Show worst predictions for loss threshold

In [None]:
threshold = 0.1
worst_indices = [j[1] for j in sorted([(x,i) for (i,x) in \
                        enumerate(mae_losses) if (x > threshold)],
                        reverse=True)]

show_worst_of_one_kind(worst_indices,
                       spectra = [len(worst_indices)-20,
                                  len(worst_indices)])

print('{0} of {1} test samples have a mean absolute error of of at least {2} = {3}%.'.format(str(len(worst_indices)),
                                                                                             str(clf.y_test.shape[0]),
                                                                                             str(threshold),
                                                                                             str(100*threshold)))

### Show worst predictions for loss threshold (single iron spectra)

In [None]:
threshold = 0.2
indices_single = [j[1] for j in sorted([(x,i) for (i,x) in \
                  enumerate(mae_losses) if len(np.where(clf.y_test[i] == 0.)[0]) == 3],
                  reverse=True)]
worst_indices_single = [j[1] for j in sorted([(x,i) for (i,x) in \
                        enumerate(mae_losses) if (len(np.where(clf.y_test[i] == 0.)[0]) == 3 and x >= threshold)],
                        reverse=True)]                  

show_worst_of_one_kind(worst_indices_single,
                       spectra = [len(worst_indices_single)-20,
                                  len(worst_indices_single)])

print('{0} of {1} test samples with single iron species ({2}%) have a mean absolute error of of at least {3} = {4}%.'.format(
    str(len(worst_indices_single)),
    str(len(indices_single)),
    str(100*(np.around(len(worst_indices_single)/len(indices_single), decimals = 3))),
    str(threshold),
    str(100*threshold)))

### Show worst predictions for loss threshold (linearly combined iron spectra)

In [None]:
threshold = 0.2
indices_multiple = [j[1] for j in sorted([(x,i) for (i,x) in \
                  enumerate(mae_losses) if len(np.where(clf.y_test[i] == 0.)[0]) != 3],
                  reverse=True)]
worst_indices_multiple = [j[1] for j in sorted([(x,i) for (i,x) in \
                        enumerate(mae_losses) if (len(np.where(clf.y_test[i] == 0.)[0]) != 3 and x >= threshold)],
                        reverse=True)]                  

show_worst_of_one_kind(worst_indices_multiple,
                       spectra = [len(worst_indices_multiple)-20,
                                  len(worst_indices_multiple)])

print('{0} of {1} test samples with multiple iron species ({2}%) have a mean absolute error of of at least {3} = {4}%.'.format(
    str(len(worst_indices_multiple)),
    str(len(indices_multiple)),
    str(100*(np.around(len(worst_indices_multiple)/len(indices_multiple), decimals = 3))),
    str(threshold),
    str(100*threshold)))

## Save output of notebook

In [None]:
from IPython.display import Javascript, display
from nbconvert import HTMLExporter

def save_notebook():
    display(Javascript("IPython.notebook.save_notebook()"),
            include=['application/javascript'])

def output_HTML(read_file, output_file):
    import codecs
    import nbformat
    exporter = HTMLExporter()
    # read_file is '.ipynb', output_file is '.html'
    output_notebook = nbformat.read(read_file, as_version=4)
    output, resources = exporter.from_notebook_node(output_notebook)
    codecs.open(output_file, 'w', encoding='utf-8').write(output)

import time
import os

time.sleep(20)
save_notebook()
print('Notebook saved!')
time.sleep(30)
current_file = '/content/drive/My Drive/app/xpsdeeplearning/predict_unknown_multiple.ipynb'
output_file = os.path.join(clf.log_dir,'predict_unknown_multiple_out.html')
output_HTML(current_file, output_file)