# Predict on new data using a trained CNN on XPS data on Google Colab

In this notebook, we will use a trained convolutional network to predict on unseen XPS spectra.

## Setup

### Mount google drive, change working directory

In [None]:
# Mount drive
from google.colab import drive
import os

drive.mount('/content/drive')

# Change working path
os.chdir('/content/drive/My Drive/deepxps')

### Install packages and import modules

In [None]:
%%capture
# Install packages
!pip install python-docx

# Import standard modules and magic commands
import datetime
import numpy as np
import pytz
import importlib
import matplotlib.pyplot as plt

# Magic commands
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Disable tf warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf

### Set seeds and restart session to ensure reproducibility

In [None]:
def reset_seeds_and_session(seed=1):
   os.environ['PYTHONHASHSEED']=str(seed)
   tf.random.set_seed(seed)
   np.random.seed(seed)

   session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                                           inter_op_parallelism_threads=1)
   sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(),
                               config=session_conf)
   tf.compat.v1.keras.backend.set_session(sess) 

reset_seeds_and_session(seed=1)

### Check TensorFlow version

In [None]:
f"TF version: {tf.__version__}."

## Predict on new data set

### Load custom modules

In [None]:
try:
    import importlib
    importlib.reload(classifier)
    importlib.reload(clfutils)
    print('\n Modules were reloaded.')
except:
    import xpsdeeplearning.network.classifier as classifier
    import xpsdeeplearning.network.utils as clfutils
    print('Modules were loaded.')

### Set up the parameters & folder structure

In [None]:
np.random.seed(502)
time = datetime.datetime.now().astimezone(pytz.timezone('Europe/Berlin')).strftime("%Y%m%d_%Hh%Mm")
exp_name = 'MgFeCoO4_chip2_Co2p_using_20221107_09h31m'

clf = classifier.Classifier(time = time,
                            exp_name = exp_name,
                            task = 'regression',
                            intensity_only = True)

clf.datahandler.labels = ["Co metal", "Co", "Co3O4"]

### Load and inspect the data

In [None]:
import h5py
from xpsdeeplearning.simulation.base_model.figures import Figure

input_filepath = r'/content/drive/My Drive/deepxps/datasets/MgFeCoO4 (chip2)_Co 2p.h5'

with h5py.File(input_filepath, "r") as hf:
    size = hf["X"].shape
    X = hf["X"][:, :, :]
    energies = hf["energies"][:]
    names = [str(name.decode('utf-8')) for name in hf["names"][:]]

from xpsdeeplearning.network.utils import SpectraPlot

data = []
texts = []

for i in range(X.shape[0]):
    new_energies = np.reshape(np.array(energies), (-1, 1))
    data.append(np.hstack((new_energies, X[i])))

data = np.array(data)

graphic = SpectraPlot(data=data, annots=names)
fig, axs = graphic.plot()

### Load and compile the model

In [None]:
from tensorflow.keras import backend as K
clf.datahandler.input_shape = data.shape[1:]
model = clf.load_model(model_path = '/content/drive/My Drive/deepxps/runs/20221103_17h06m_Co_linear_combination_normalized_inputs_small_gas_phase_shortened/model')

In [None]:
clf.summary()

###  Predict on new data

In [None]:
pred = clf.model.predict(X)

In [None]:
print(np.round(pred,3))

### Show some predictions on random test samples

In [None]:
data = []
texts = []

for i in range(X.shape[0]):
    new_energies = np.reshape(np.array(energies), (-1, 1))
    data.append(np.hstack((new_energies, X[i])))

data = np.array(data)

annots = []
for i, name in enumerate(names):
    annot = name + "\n" + str(np.round(pred[i],2))
    annots.append(annot)

graphic = SpectraPlot(data=data, annots=annots)
fig, axs = graphic.plot()

### Save predictions

In [None]:
import pandas as pd

pred_array = np.hstack(([np.round(pred,2),np.round(pred*100,1)]))
columns = clf.datahandler.labels + [label + " %" for label in clf.datahandler.labels]

df = pd.DataFrame(pred_array, index=names, columns=columns)
output_file = os.path.join(
    clf.logging.log_dir,
    'pred.xlsx')

df.to_excel(output_file,index=True)

### Save data

In [None]:
#clf.save_hyperparams()
clf.pickle_results()

## Remove empty model and figures directories

In [None]:
import shutil
shutil.rmtree(clf.logging.model_dir)
shutil.rmtree(clf.logging.fig_dir)

del(clf.logging.model_dir)
del(clf.logging.fig_dir)

## Save output of notebook

In [None]:
from IPython.display import Javascript, display
from nbconvert import HTMLExporter

def save_notebook():
    display(Javascript("IPython.notebook.save_notebook()"),
            include=['application/javascript'])

def output_HTML(read_file, output_file):
    import codecs
    import nbformat
    exporter = HTMLExporter()
    # read_file is '.ipynb', output_file is '.html'
    output_notebook = nbformat.read(read_file, as_version=4)
    output, resources = exporter.from_notebook_node(output_notebook)
    codecs.open(output_file, 'w', encoding='utf-8').write(output)

import time
import os

time.sleep(20)
save_notebook()
print('Notebook saved!')
time.sleep(30)
current_file = '/content/drive/My Drive/deepxps/xpsdeeplearning/notebooks/predict_without_ground_truth.ipynb'
output_file = os.path.join(clf.logging.log_dir,
                           'predict_out.html')
output_HTML(current_file, output_file)
print('HTML file saved!')