In [1]:
import heapq as hq
import pickle as pkl
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import mean_squared_error
from scipy.stats import wasserstein_distance
from bokeh.plotting import figure
from bokeh.io import output_notebook, show, export_svg, export_png
from bokeh.palettes import HighContrast3
output_notebook()

In [2]:
def bokeh_spectra(ml_spectra, true_spectra):
    p = figure(
    x_axis_label = 'Photon Energy (eV)', y_axis_label = 'arb. units',
    x_range = (280,300),
    width = 400, height = 400,
    outline_line_color = 'black', outline_line_width = 2
    )

    p.toolbar.logo = None
    p.toolbar_location = None
    p.min_border = 25

    # x-axis settings
    p.xaxis.ticker.desired_num_ticks = 3
    p.xaxis.axis_label_text_font_size = '24px'
    p.xaxis.major_label_text_font_size = '24px'
    p.xaxis.major_tick_in = 0
    p.xaxis.major_tick_out = 10
    p.xaxis.minor_tick_out = 6
    p.xaxis.major_tick_line_width = 2
    p.xaxis.minor_tick_line_width = 2
    p.xaxis.major_tick_line_color = 'black'
    p.xaxis.minor_tick_line_color = 'black'
    # y-axis settings
    p.yaxis.axis_label_text_font_size = '24px'
    p.yaxis.major_tick_line_color = None
    p.yaxis.minor_tick_line_color = None
    p.yaxis.major_label_text_color = None
    # grid settings
    p.grid.grid_line_color = 'grey'
    p.grid.grid_line_alpha = 0.3
    p.grid.grid_line_width = 1.5
    p.grid.grid_line_dash = "dashed"

    # plot data
    x = np.linspace(280,300,200)
    p.line(x, true_spectra, line_width=3, line_color=HighContrast3[0], legend_label='True')
    p.line(x, ml_spectra, line_width=3, line_color=HighContrast3[1], legend_label='ML Model')

    # legend settings
    p.legend.location = 'bottom_right'
    p.legend.label_text_font_size = '20px'

    return p

In [13]:
def calculate_rse(prediction, true_result):
    
    del_E = 20 / len(prediction)

    numerator = np.sum(del_E * np.power((true_result - prediction),2))

    denominator = np.sum(del_E * true_result)

    return np.sqrt(numerator) / denominator

In [5]:
file = open('spectra_results/spectra_ml_14.pkl', 'rb')
data = pkl.load(file)

In [6]:
predict = data[0]
true = data[1]

In [14]:
wasser = []
mse = []
rse = []

for x in range(len(predict)):
    # Wasserstein metric
    wass_temp = wasserstein_distance(predict[x], true[x])
    wasser.append(wass_temp)
    # Mean squared error
    mse_temp = mean_squared_error(predict[x], true[x])
    mse.append(mse_temp)
    # RSE
    rse_temp = calculate_rse(predict[x], true[x])
    rse.append(rse_temp)

print(f"Average Wasserstein distance = {sum(wasser) / len(wasser)}")
print(f"Average MSE = {sum(mse) / len(mse)}")
print(f'Average RSE = {sum(rse) / len(rse)}')

Average Wasserstein distance = 0.04441475810940467
Average MSE = 0.006931648395881515
Average RSE = 0.0391998097010462


In [9]:
five_best = hq.nsmallest(5, rse)
five_worst = hq.nlargest(5, rse)

best = []
worst = []

for x in range(5):
    best_idx = rse.index(five_best[x])
    best.append(best_idx)

    worst_idx = rse.index(five_worst[x])
    worst.append(worst_idx)

print('The 5 best RSE values are:')
for x in range(5):
    print(f'RSE = {five_best[x]:.3f}, graph number = {best[x]}')

print('')
print('The 5 worst RSE values are:')
for x in range(5):
    print(f'RSE = {five_worst[x]:.3f}, graph number = {worst[x]}')


The 5 best RSE values are:
RSE = 0.021, graph number = 33
RSE = 0.025, graph number = 12
RSE = 0.026, graph number = 11
RSE = 0.026, graph number = 16
RSE = 0.027, graph number = 37

The 5 worst RSE values are:
RSE = 0.070, graph number = 36
RSE = 0.060, graph number = 19
RSE = 0.060, graph number = 26
RSE = 0.054, graph number = 4
RSE = 0.050, graph number = 25


In [10]:
# Plot best spectra prediction
p = bokeh_spectra(predict[best[0]], true[best[0]])
show(p)

In [115]:
# Plot worst spectra prediction
p = bokeh_spectra(predict[worst[0]], true[worst[0]])
show(p)

In [104]:
p = bokeh_spectra(predict[4], true[4])

p.output_backend = 'svg'
export_svg(p, filename='test.svg')

show(p)