# Predict on new data using a trained CNN on XPS data on Google Colab

In this notebook, we will use a trained convolutional network to predict on unseen XPS spectra.

## Setup

### Mount google drive, change working directory

In [None]:
# Mount drive
from google.colab import drive
import os

drive.mount('/content/drive')

# Change working path
os.chdir('/content/drive/My Drive/deepxps')

### Install packages and import modules

In [None]:
%%capture
# Install packages
!pip install python-docx

# Import standard modules and magic commands
import datetime
import numpy as np
import pytz
import importlib
import matplotlib.pyplot as plt

# Magic commands
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Disable tf warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf

### Set seeds and restart session to ensure reproducibility

In [None]:
def reset_seeds_and_session(seed=1):
   os.environ['PYTHONHASHSEED']=str(seed)
   tf.random.set_seed(seed)
   np.random.seed(seed)

   session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                                           inter_op_parallelism_threads=1)
   sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(),
                               config=session_conf)
   tf.compat.v1.keras.backend.set_session(sess) 

reset_seeds_and_session(seed=1)

### Check TensorFlow version

In [None]:
f"TF version: {tf.__version__}."

## Predict on new data set

### Load custom modules

In [None]:
try:
    import importlib
    importlib.reload(classifier)
    importlib.reload(clfutils)
    print('\n Modules were reloaded.')
except:
    import xpsdeeplearning.network.classifier as classifier
    import xpsdeeplearning.network.utils as clfutils
    print('Modules were loaded.')

### Set up the parameters & folder structure

In [None]:
np.random.seed(502)
time = datetime.datetime.now().astimezone(pytz.timezone('Europe/Berlin')).strftime("%Y%m%d_%Hh%Mm")
exp_name = 'test'

clf = classifier.Classifier(time = time,
                            exp_name = exp_name,
                            task = 'regression',
                            intensity_only = True)

### Load and inspect the data

In [None]:
input_filepath = r'/content/drive/My Drive/deepxps/datasets/20210903_CoFe_combined_without_auger_peaks.h5'

train_test_split = 0.99
train_val_split = 0
no_of_examples = 100#000 #180

        
X_train, X_val, X_test, y_train, y_val, y_test,\
    names_train, names_val, names_test =\
        clf.load_data_preprocess(input_filepath = input_filepath,
                                 no_of_examples = no_of_examples,
                                 train_test_split = train_test_split,
                                 train_val_split = train_val_split)
        
# Check how the examples are distributed across the classes.
class_distribution = clf.datahandler.check_class_distribution(clf.task)
clf.plot_class_distribution()
clf.plot_random(no_of_spectra = 10, dataset = 'test')  

### Continue with 10-point average of last values (cutoff: 5 eV on each side)

In [None]:
for dataset in [clf.datahandler.X,
                clf.datahandler.X_train,
                clf.datahandler.X_val,
                clf.datahandler.X_test]:
    for arr in dataset:
        arr[:100,:] = np.average(arr[100:110,:],
                                 axis=0)
        arr[-100:,:] = np.average(arr[-110:-100,:],
                                  axis=0)

In [None]:
clf.plot_random(no_of_spectra = 10, dataset = 'test')  

### Load and compile the model

In [None]:
clf.load_model(model_path = '/content/drive/My Drive/deepxps/runs/20210914_19h11m_FeCo_combined_without_auger_7_classes_no_window/model')

### Plot summary and save model plot.


In [None]:
clf.summary()
clf.save_and_print_model_image()

### Evaluate on test data

In [None]:
clf.logging.hyperparams['batch_size'] = 32

if clf.task == 'classification':
    score = clf.evaluate()
    test_loss, test_accuracy = score[0], score[1]
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))
    print('Test accuracy: ' + str(np.round(test_accuracy, decimals=3)))
elif clf.task == 'regression':
    test_loss = clf.evaluate()
    print('Test loss: ' + str(np.round(test_loss, decimals=8)))

###  Predict on train & test data

In [None]:
pred_train, pred_test = clf.predict()
if clf.task == 'classification':
    pred_train_classes, pred_test_classes = clf.predict_classes()

### Show some predictions on random test samples

In [None]:
clf.plot_random(no_of_spectra = 15, dataset = 'test', with_prediction = True)  

In [None]:
clf.datahandler.plot_spectra(no_of_spectra=20, dataset="test", indices=list(range(20)), with_prediction=True)


### Show the worst predictions on the test samples

In [None]:
clf.show_worst_predictions(no_of_spectra = 10)

### Save data

In [None]:
#clf.save_hyperparams()
clf.pickle_results()

## Check where and why the predictions fail

### Show worst predictions for single spectra

In [None]:
clf.show_worst_predictions(no_of_spectra = 10, kind = 'single')

#### Show worst predictions for different loss thresholds (single spectra)


In [None]:
threshold = 0.2
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'single',
                           threshold = threshold)

In [None]:
threshold = 0.1
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'single',
                           threshold = threshold)

In [None]:
threshold = 0.05
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'single',
                           threshold = threshold)

In [None]:
threshold = 0.02
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'single',
                           threshold = threshold)

In [None]:
threshold = 0.01
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'single',
                           threshold = threshold)

### Show worst predictions for linearly combined spectra

In [None]:
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'linear_comb')

#### Show worst predictions for different loss thresholds (linearly combined spectra)

In [None]:
threshold = 0.3
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'linear_comb',
                           threshold = threshold)

In [None]:
threshold = 0.2
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'linear_comb',
                           threshold = threshold)

In [None]:
threshold = 0.1
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'linear_comb',
                           threshold = threshold)

In [None]:
threshold = 0.05
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'linear_comb',
                           threshold = threshold)

In [None]:
threshold = 0.025
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'linear_comb',
                           threshold = threshold)

In [None]:
threshold = 0.01
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'linear_comb',
                           threshold = threshold)

In [None]:
threshold = 0.005
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'linear_comb',
                           threshold = threshold)

### Show worst predictions for all

In [None]:
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all')

### Show worst predictions for different loss thresholds (all spectra)

In [None]:
threshold = 0.3
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all',
                           threshold = threshold)

In [None]:
threshold = 0.2
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all',
                           threshold = threshold)

In [None]:
threshold = 0.1
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all',
                           threshold = threshold)

In [None]:
threshold = 0.05
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all',
                           threshold = threshold)

In [None]:
threshold = 0.025
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all',
                           threshold = threshold)

In [None]:
threshold = 0.01
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all',
                           threshold = threshold)

In [None]:
threshold = 0.005
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all',
                           threshold = threshold)

In [None]:
threshold = 0.001
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all',
                           threshold = threshold)

In [None]:
threshold = 0.0005
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all',
                           threshold = threshold)

In [None]:
threshold = 0.00025
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all',
                           threshold = threshold)

In [None]:
threshold = 0.0001
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all',
                           threshold = threshold)

In [None]:
threshold = 0.00001
clf.show_worst_predictions(no_of_spectra = 10,
                           kind = 'all',
                           threshold = threshold)

## Remove empty model directory

In [None]:
import shutil
shutil.rmtree(clf.logging.model_dir)

del(clf.logging.model_dir)

## Save output of notebook

In [None]:
from IPython.display import Javascript, display
from nbconvert import HTMLExporter

def save_notebook():
    display(Javascript("IPython.notebook.save_notebook()"),
            include=['application/javascript'])

def output_HTML(read_file, output_file):
    import codecs
    import nbformat
    exporter = HTMLExporter()
    # read_file is '.ipynb', output_file is '.html'
    output_notebook = nbformat.read(read_file, as_version=4)
    output, resources = exporter.from_notebook_node(output_notebook)
    codecs.open(output_file, 'w', encoding='utf-8').write(output)

import time
import os

time.sleep(20)
save_notebook()
print('Notebook saved!')
time.sleep(30)
current_file = '/content/drive/My Drive/deepxps/xpsdeeplearning/notebooks/predict.ipynb'
output_file = os.path.join(clf.logging.log_dir,
                           'predict_out.html')
output_HTML(current_file, output_file)
print('HTML file saved!')