# CNN for AdvSND target energy reconstruction

In [None]:
import numpy as np

In [None]:
import pandas as pd

In [None]:
import uproot

In [None]:
import tensorflow as tf

In [None]:
from preprocessing import reshape_data

In [None]:
import matplotlib.pyplot as plt

plt.style.use(["science", "notebook"])

In [None]:
plt.rcParams["font.size"] = 14
plt.rcParams["axes.formatter.limits"] = -5, 4
plt.rcParams["figure.figsize"] = 6, 4
colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]

In [None]:
filename_train = "dataframe_CC_saturation5_train.root:df"
filename_test = "dataframe_CC_saturation5_test.root:df"

In [None]:
events_train = uproot.open(filename_train)
events_test = uproot.open(filename_test)

In [None]:
n_events = events_train.num_entries + events_test.num_entries

In [None]:
target = "lepton_energy"

target_pretty = "Lepton Energy"
target_LaTeX = "E_\ell"

In [None]:
target = "hadron_energy"

target_pretty = "Hadron Energy"
target_LaTeX = "E_h"

In [None]:
target = "nu_energy"

target_pretty = "Neutrino energy"
target_LaTeX = "E_\nu"

In [None]:
target = "start_z"

In [None]:
target = "both"

In [None]:
target = "deps"
edep_correction = 1e-9

In [None]:
def event_generator(train=True):
    events = events_train if train else events_test
    log = "energy" in target
    for batch, report in events.iterate(step_size=1, report=True, library="np"):
        for i in range(batch["X"].shape[0]):
            yield (
                batch["X"].astype(np.float16)[i],
                batch["X_mufilter"].astype(np.float16)[i],
                (np.log(batch[target][i]) if log else batch[target][i]),
            )

In [None]:
gen = event_generator(True)

In [None]:
input_shape = (100, 3072, 1)

In [None]:
sample = gen.__next__()

In [None]:
plt.figure()
plt.imshow(sample[0], aspect=0.05)
plt.figure()
plt.imshow(sample[1], aspect=0.01)

In [None]:
sample[2]

In [None]:
generator_spec_0 = tf.type_spec_from_value(gen.__next__()[0])
generator_spec_1 = tf.type_spec_from_value(gen.__next__()[1])
generator_spec_2 = tf.type_spec_from_value(gen.__next__()[2])

In [None]:
# TODO reshape data only once

In [None]:
ds_train = (
    tf.data.Dataset.from_generator(
        event_generator,
        output_signature=(
            generator_spec_0,
            generator_spec_1,
            generator_spec_2,
        ),
    )
    .map(reshape_data)
    .apply(tf.data.experimental.assert_cardinality(events_train.num_entries))
)

In [None]:
ds_test = (
    tf.data.Dataset.from_generator(
        event_generator,
        args=[False],
        output_signature=(
            generator_spec_0,
            generator_spec_1,
            generator_spec_2,
        ),
    )
    .map(reshape_data)
    .apply(tf.data.experimental.assert_cardinality(events_test.num_entries))
)

In [None]:
# y_test = events_test["energy_dep_target"].array() + edep_correction, events_test["energy_dep_mufilter"].array()+edep_correction
y_test = (
    np.log(events_test[target].array())
    if "energy" in target
    else events_test[target].array()
)

In [None]:
batch_size = 30

In [None]:
batched_ds_train = ds_train.batch(batch_size)

In [None]:
batched_ds_test = ds_test.batch(batch_size)

In [None]:
import tensorflow.keras
from tensorflow.keras.layers import Input, Dense, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Concatenate
import tensorflow.keras.optimizers
import tensorflow.keras.metrics
import tensorflow.keras.losses
from tensorflow.keras.models import Model

import tensorflow.keras.backend as K

K.set_image_data_format("channels_last")

In [None]:
from tensorflow.keras.layers import BatchNormalization

In [None]:
from tensorflow.keras.layers import ReLU

In [None]:
history_df = None

In [None]:
model_name = f"CNN_3dSat5_grandjorasses_{target}"

In [None]:
drop_middle = 0.25

lr = 2e-4  # ; betaa1=30; betaa2=100; decay=1e-3

# AdvTarget

# TODO bigger kernels/windows in strip dimension (axis=1)?

target_h_input = Input(input_shape, name="target_h_in")
X_h = Conv2D(16, kernel_size=(1, 9), padding="same")(target_h_input)
X_h = BatchNormalization()(X_h)
X_h = ReLU()(X_h)
X_h = MaxPooling2D(pool_size=(2, 4), padding="valid")(X_h)
X_h = Dropout(rate=drop_middle)(X_h)

X_h = Conv2D(16, kernel_size=(3, 3), padding="same")(X_h)
X_h = BatchNormalization()(X_h)
X_h = ReLU()(X_h)
X_h = MaxPooling2D(pool_size=(2, 4), padding="valid")(X_h)
X_h = Dropout(rate=drop_middle)(X_h)

X_h = Conv2D(16, kernel_size=(3, 3), padding="same")(X_h)
X_h = BatchNormalization()(X_h)
X_h = ReLU()(X_h)
X_h = MaxPooling2D(pool_size=(2, 4), padding="valid")(X_h)
X_h = Dropout(rate=drop_middle)(X_h)

X_h = Conv2D(16, kernel_size=(3, 3), padding="same")(X_h)
X_h = BatchNormalization()(X_h)
X_h = ReLU()(X_h)
X_h = MaxPooling2D(pool_size=(2, 2), padding="same")(X_h)

X_h = Flatten()(X_h)

target_v_input = Input(input_shape, name="target_v_in")
X_v = Conv2D(16, kernel_size=(1, 9), padding="same")(target_v_input)
X_v = BatchNormalization()(X_v)
X_v = ReLU()(X_v)
X_v = MaxPooling2D(pool_size=(2, 4), padding="valid")(X_v)
X_v = Dropout(rate=drop_middle)(X_v)

X_v = Conv2D(16, kernel_size=(3, 3), padding="same")(X_v)
X_v = BatchNormalization()(X_v)
X_v = ReLU()(X_v)
X_v = MaxPooling2D(pool_size=(2, 4), padding="valid")(X_v)
X_v = Dropout(rate=drop_middle)(X_v)

X_v = Conv2D(16, kernel_size=(3, 3), padding="same")(X_v)
X_v = BatchNormalization()(X_v)
X_v = ReLU()(X_v)
X_v = MaxPooling2D(pool_size=(2, 4), padding="valid")(X_v)
X_v = Dropout(rate=drop_middle)(X_v)

X_v = Conv2D(16, kernel_size=(3, 3), padding="same")(X_v)
X_v = BatchNormalization()(X_v)
X_v = ReLU()(X_v)
X_v = MaxPooling2D(pool_size=(2, 2), padding="same")(X_v)

X_v = Flatten()(X_v)

# AdvTarget

mufilter_h_input = Input((21, 4608, 1), name="mufilter_h_in")
X_mf_h = Conv2D(16, kernel_size=(1, 3), padding="same")(mufilter_h_input)
X_mf_h = BatchNormalization()(X_mf_h)
X_mf_h = ReLU()(X_mf_h)
X_mf_h = MaxPooling2D(pool_size=(1, 4), padding="valid")(X_mf_h)
X_mf_h = Dropout(rate=drop_middle)(X_mf_h)

X_mf_h = Conv2D(16, kernel_size=(3, 3), padding="same")(X_mf_h)
X_mf_h = BatchNormalization()(X_mf_h)
X_mf_h = ReLU()(X_mf_h)
X_mf_h = MaxPooling2D(pool_size=(2, 4), padding="valid")(X_mf_h)
X_mf_h = Dropout(rate=drop_middle)(X_mf_h)

X_mf_h = Conv2D(16, kernel_size=(3, 3), padding="same")(X_mf_h)
X_mf_h = BatchNormalization()(X_mf_h)
X_mf_h = ReLU()(X_mf_h)
X_mf_h = MaxPooling2D(pool_size=(2, 4), padding="valid")(X_mf_h)
X_mf_h = Dropout(rate=drop_middle)(X_mf_h)

X_mf_h = Conv2D(16, kernel_size=(3, 3), padding="same")(X_mf_h)
X_mf_h = BatchNormalization()(X_mf_h)
X_mf_h = ReLU()(X_mf_h)
X_mf_h = MaxPooling2D(pool_size=(2, 2), padding="same")(X_mf_h)

X_mf_h = Flatten()(X_mf_h)

mufilter_v_input = Input((5, 4608, 1), name="mufilter_v_in")
X_mf_v = Conv2D(16, kernel_size=(1, 3), padding="same")(mufilter_v_input)
X_mf_v = BatchNormalization()(X_mf_v)
X_mf_v = ReLU()(X_mf_v)
X_mf_v = MaxPooling2D(pool_size=(1, 4), padding="valid")(X_mf_v)
X_mf_v = Dropout(rate=drop_middle)(X_mf_v)

X_mf_v = Conv2D(16, kernel_size=(2, 3), padding="same")(X_mf_v)
X_mf_v = BatchNormalization()(X_mf_v)
X_mf_v = ReLU()(X_mf_v)
X_mf_v = MaxPooling2D(pool_size=(2, 4), padding="valid")(X_mf_v)
X_mf_v = Dropout(rate=drop_middle)(X_mf_v)

X_mf_v = Conv2D(16, kernel_size=(2, 3), padding="same")(X_mf_v)
X_mf_v = BatchNormalization()(X_mf_v)
X_mf_v = ReLU()(X_mf_v)
X_mf_v = MaxPooling2D(pool_size=(2, 4), padding="valid")(X_mf_v)
X_mf_v = Dropout(rate=drop_middle)(X_mf_v)

X_mf_v = Conv2D(16, kernel_size=(3, 3), padding="same")(X_mf_v)
X_mf_v = BatchNormalization()(X_mf_v)
X_mf_v = ReLU()(X_mf_v)
X_mf_v = MaxPooling2D(pool_size=(2, 2), padding="same")(X_mf_v)

X_mf_v = Flatten()(X_mf_v)

X = Concatenate()([X_h, X_v, X_mf_h, X_mf_v])
X = Dense(4)(X)
X = BatchNormalization()(X)
X = ReLU()(X)
X = Dense(20)(X)
X = BatchNormalization()(X)
X = ReLU()(X)
X = Dropout(rate=0.2)(X)
# TODO Add dropout?
# X_ell = Dense(1)(X)
# X_had = Dense(1)(X)
X = Dense(1)(X)

model = Model(
    inputs=[target_h_input, target_v_input, mufilter_h_input, mufilter_v_input],
    outputs=X,
    name=model_name,
)

K.clear_session()

model.compile(optimizer="Adam", loss="mse", metrics=["mae"])

In [None]:
model.summary()

In [None]:
# TODO activation for max pooling?
# TODO Reduce number of convolutional layers?
# TODO Add hidden hidden layer (or two?) before outputs?
# TODO predict independently?

In [None]:
fit_result = model.fit(
    batched_ds_train.prefetch(tf.data.AUTOTUNE),
    epochs=5,
)

In [None]:
history_df = pd.concat([history_df, pd.DataFrame(fit_result.history)])

In [None]:
history_df.to_csv(f"history_{model_name}_n{n_events}_e{len(history_df)}.csv")

In [None]:
model.save(f"{model_name}_n{n_events}_e{len(history_df)}.keras")

In [None]:
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
plt.title("CNN lepton + hadron energy")
ax1.plot(history_df["loss"].values, color=colors[0])
ax1.set_xlabel("Epochs")
ax1.set_ylabel("Loss Function", color=colors[0])
try:
    ax2.plot(history_df["mae"].values, color=colors[1])
except KeyError:
    ax2.plot(history_df["dense_2_mae"].values, color=colors[1])
    ax2.plot(history_df["dense_3_mae"].values, color=colors[1])
ax2.set_ylabel("Error", color=colors[1])
plt.text(
    0.3,
    0.7,
    f"Training dataset: {events_train.num_entries} events\n"
    f"Test dataset: {events_test.num_entries} events\n"
    f"Training duration: {len(history_df)} epochs\n{model_name}",
    transform=ax1.transAxes,
)
plt.savefig(f"plots/convergence_{model_name}_n{n_events}_e{len(history_df)}.pdf")
plt.savefig(f"plots/convergence_{model_name}_n{n_events}_e{len(history_df)}.png")

In [None]:
# test=retoy_model.predict(x=[x_test['scifi_h'], x_test['scifi_v'], x_test['us'], x_test['ds']])
y_pred = model.predict(batched_ds_test)

In [None]:
# df = pd.DataFrame({"lepton_energy_pred" : np.squeeze(np.exp(y_pred)[0]), "lepton_energy_test" : np.squeeze(np.exp(y_test)[0]),
#                  "hadron_energy_pred" : np.squeeze(np.exp(y_pred)[1]), "hadron_energy_test" : np.squeeze(np.exp(y_test)[1])})

df = pd.DataFrame(
    {
        f"{target}_pred": np.squeeze(np.exp(y_pred)),
        f"{target}_test": np.squeeze(np.exp(y_test)),
    }
)
if "energy" not in target:
    df = pd.DataFrame(
        {f"{target}_pred": np.squeeze(y_pred), f"{target}_test": np.squeeze(y_test)}
    )

In [None]:
df.to_csv(f"{model_name}_n{n_events}_e{len(history_df)}.csv")

In [None]:
f"{model_name}_n{n_events}_e{len(history_df)}.keras"