# Evaluation of CNN classification performance

In [None]:
muonic = False
hadronic = not muonic

In [None]:
import numpy as np

In [None]:
import tensorflow as tf

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
le = LabelEncoder()

In [None]:
from preprocessing import reshape_data

In [None]:
import uproot

In [None]:
import matplotlib.pyplot as plt

# plt.style.use(["science", "notebook"])

In [None]:
plt.rcParams["font.size"] = 14
plt.rcParams["axes.formatter.limits"] = -5, 4
plt.rcParams["figure.figsize"] = 6, 4
colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]

In [None]:
filename_test = "df_nu_CC_muonic_equal_test.root:df"

In [None]:
filename_test = "df_nu_CC_non-muonic_equal_new_test.root:df"

In [None]:
events_test = uproot.open(filename_test)

In [None]:
target = "nu_flavour"

target_pretty = "flavour"
target_LaTeX = "flavour"

In [None]:
events_test[target].num_entries

In [None]:
y = np.abs(events_test[target])

In [None]:
le.fit(y)

In [None]:
le.classes_

In [None]:
def event_generator():
    events = events_test
    for batch, report in events.iterate(step_size=1, report=True, library="np"):
        ys = le.transform(np.abs(batch[target]))
        for i in range(batch["X"].shape[0]):
            yield (
                batch["X"].astype(np.float16)[i],
                batch["X_mufilter"].astype(np.float16)[i],
                ys[i],
            )

In [None]:
gen = event_generator()

In [None]:
sample = gen.__next__()

In [None]:
generator_spec_0 = tf.type_spec_from_value(gen.__next__()[0])
generator_spec_1 = tf.type_spec_from_value(gen.__next__()[1])
generator_spec_2 = tf.type_spec_from_value(gen.__next__()[2])

In [None]:
print(generator_spec_0)

In [None]:
print(generator_spec_1)

In [None]:
print(generator_spec_2)

In [None]:
ds_test = (
    tf.data.Dataset.from_generator(
        event_generator,
        output_signature=(
            generator_spec_0,
            generator_spec_1,
            generator_spec_2,
        ),
    )
    .map(reshape_data)
    .apply(tf.data.experimental.assert_cardinality(events_test.num_entries))
)

In [None]:
y_test = le.transform(np.abs(events_test[target]))

In [None]:
np.count_nonzero(y_test)

In [None]:
batched_ds_test = ds_test.batch(30)

In [None]:
from tensorflow.keras.models import load_model

In [None]:
import tensorflow.keras.backend as K

K.set_image_data_format("channels_last")

In [None]:
model = load_model("CNN_jannu_logit_focus_nu_flavour_muvtau_n7465_e200.keras")

In [None]:
model = load_model("CNN_jannu_logit_focus_nu_flavour_evtau_n26824_e100.keras")

In [None]:
model_name = model.name

In [None]:
# test=retoy_model.predict(x=[x_test['scifi_h'], x_test['scifi_v'], x_test['us'], x_test['ds']])
y_pred = model.predict(batched_ds_test)

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
from config import nu_mu_yield, nu_tau_yield, nu_e_yield, frac_muon, frac_hadr

In [None]:
scaling_muon = np.array([nu_mu_yield, nu_tau_yield * frac_muon]).reshape(-1, 1)

In [None]:
scaling_hadr = np.array([nu_e_yield, nu_tau_yield * frac_hadr]).reshape(-1, 1)

In [None]:
def metric(threshold=0.5):
    TN, FP, FN, TP = confusion_matrix(y_test, (y_pred >= threshold).astype(int)).ravel()
    signal = TP / (TP + FN) * nu_tau_yield * (frac_muon if muonic else frac_hadr)
    background = FP / (FP + TN) * (nu_mu_yield if muonic else nu_e_yield)
    return -signal / np.sqrt(signal + background)

In [None]:
from scipy.optimize import basinhopping

In [None]:
res = basinhopping(metric, 0.2)

In [None]:
optimum_threshold = res.x[0]

In [None]:
# optimum_threshold = 0.5

In [None]:
y_pred_bool = (y_pred >= optimum_threshold).astype(int)

In [None]:
# plt.hist(y_pred_bool)
plt.hist(y_pred_bool)
plt.hist(y_test)

In [None]:
print(classification_report(y_test, y_pred_bool))

In [None]:
from sklearn.metrics import ConfusionMatrixDisplay

In [None]:
from plotting import watermark

In [None]:
fig, ax = plt.subplots(figsize=(6, 4))
# Plot non-normalized confusion matrix
titles_options = [
    ("Confusion matrix, without normalisation", None),
]
for title, normalize in titles_options:
    scale = 1.0
    if normalize == "scaled":
        normalize = "true"
        scale = scaling_hadr if hadronic else scaling_muon
    disp = ConfusionMatrixDisplay(
        confusion_matrix(
            le.inverse_transform(y_test.ravel()),
            le.inverse_transform(y_pred_bool.ravel()),
            normalize=normalize,
        )
        * scale,
        display_labels=[r"$\nu_e$" if hadronic else r"$\nu_\mu$", r"$\nu_\tau$"],
    )
    disp.plot(cmap=plt.cm.Blues, ax=ax)

    # disp.plot(ax=ax)
    # disp.ax_.set_title(title)

    # print(title)
    # print(disp.confusion_matrix)

watermark()

plt.savefig(f"plots/confusion_matrix_balanced_{model_name}.png")
plt.savefig(f"plots/confusion_matrix_balanced_{model_name}.pdf")

In [None]:
fig, ax = plt.subplots(figsize=(6, 4))
# Plot non-normalized confusion matrix
titles_options = [
    ("Normalised confusion matrix", "true"),
]
for title, normalize in titles_options:
    scale = 1.0
    if normalize == "scaled":
        normalize = "true"
        scale = scaling_hadr if hadronic else scaling_muon
    disp = ConfusionMatrixDisplay(
        confusion_matrix(
            le.inverse_transform(y_test.ravel()),
            le.inverse_transform(y_pred_bool.ravel()),
            normalize=normalize,
        )
        * scale,
        display_labels=[r"$\nu_e$" if hadronic else r"$\nu_\mu$", r"$\nu_\tau$"],
    )
    disp.plot(cmap=plt.cm.Blues, ax=ax)

    # disp.plot(ax=ax)
    # disp.ax_.set_title(title)

    # print(title)
    # print(disp.confusion_matrix)

watermark()

plt.savefig(f"plots/confusion_matrix_normalised_{model_name}.png")
plt.savefig(f"plots/confusion_matrix_normalised_{model_name}.pdf")

In [None]:
fig, ax = plt.subplots(figsize=(6, 4))
# Plot non-normalized confusion matrix
titles_options = [
    # ("Confusion matrix, without normalisation", None),
    # ("Normalised confusion matrix", "true"),
    ("Scaled confusion matrix", "scaled"),
]
for title, normalize in titles_options:
    scale = 1.0
    if normalize == "scaled":
        normalize = "true"
        scale = scaling_hadr if hadronic else scaling_muon
    disp = ConfusionMatrixDisplay(
        confusion_matrix(
            le.inverse_transform(y_test.ravel()),
            le.inverse_transform(y_pred_bool.ravel()),
            normalize=normalize,
        )
        * scale,
        display_labels=[r"$\nu_e$" if hadronic else r"$\nu_\mu$", r"$\nu_\tau$"],
    )
    disp.plot(cmap=plt.cm.Blues, ax=ax)

    # disp.plot(ax=ax)
    # disp.ax_.set_title(title)

    # print(title)
    # print(disp.confusion_matrix)

watermark()

plt.savefig(f"plots/confusion_matrix_scaled_{model_name}.png")
plt.savefig(f"plots/confusion_matrix_scaled_{model_name}.pdf")

In [None]:
from sklearn.metrics import RocCurveDisplay

In [None]:
RocCurveDisplay.from_predictions(y_test, y_pred)