In [None]:
import pandas as pd

In [None]:
import matplotlib.pyplot as plt

In [None]:
import numpy as np

In [None]:
import plotting

In [None]:
import scipy
from iminuit import cost
from iminuit import Minuit

In [None]:
import hist

In [None]:
plt.style.use(["science", "notebook"])

In [None]:
plt.rcParams["font.size"] = 14
plt.rcParams["axes.formatter.limits"] = -5, 4
plt.rcParams["figure.figsize"] = 6, 4
colors = plt.rcParams["axes.prop_cycle"].by_key()["color"]

In [None]:
df = pd.read_csv("features_CNN_1d_99987.csv")

In [None]:
X = np.load("images_1d_99987.npy")

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(X)

In [None]:
X = scaler.transform(X)

In [None]:
plt.imshow(X[:100, :])
plotting.watermark()
plt.xlabel(r"$2\times \mathrm{station} + \mathrm{plane}$")
plt.ylabel("Event")
plt.savefig("plots/1d_data.png")
plt.savefig("plots/1d_data.pdf")

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
from sklearn.ensemble import AdaBoostRegressor

In [None]:
from skopt.space import Real, Integer
from skopt import BayesSearchCV

In [None]:
y = df["start_z"].values

In [None]:
y_scaler = StandardScaler()

In [None]:
y = y.reshape(-1, 1)

In [None]:
y_scaler.fit(y)

In [None]:
y = y_scaler.transform(y)

In [None]:
y = (y + 235) / 155

In [None]:
def y_to_z(y):
    return (y * 155) - 235

In [None]:
y_to_z(y)

In [None]:
z = y_scaler.inverse_transform(y)

In [None]:
y.ravel().shape

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y.ravel(), random_state=0)

In [None]:
estimator = AdaBoostRegressor(random_state=0, learning_rate=0.1, n_estimators=466)

estimator.fit(X_train, y_train)

In [None]:
bdt_opt = BayesSearchCV(
    AdaBoostRegressor(),
    {
        "learning_rate": Real(0.005, 0.9, prior="log-uniform"),
        "n_estimators": Integer(1, 1000),
    },
    n_iter=100,
    cv=5,
)

bdt_opt.fit(X_train, y_train)

In [None]:
bdt_opt.best_params_

In [None]:
y_pred = estimator.predict(X_test)

estimator.score(X_test, y_test)

In [None]:
z_pred = y_scaler.inverse_transform(y_pred.reshape(-1, 1)).ravel()

In [None]:
z_test = y_scaler.inverse_transform(y_test.reshape(-1, 1)).ravel()

In [None]:
z_pred = y_to_z(y_pred)

In [None]:
h = hist.Hist.new.Regular(20, -10, +10, name=r"𝛥z [cm]").Double()

In [None]:
h.fill(np.squeeze(z_pred) - np.squeeze(z_test))

In [None]:
entries, edges = h.to_numpy()

In [None]:
def residual_model(x, mu, sigma):
    return scipy.stats.norm.cdf(x, mu, sigma)

In [None]:
m = Minuit(cost.BinnedNLL(entries, edges, residual_model), 0, 25)

In [None]:
res = m.migrad()

In [None]:
res

In [None]:
h.plot()
plt.xlabel(r"$\Delta z\;[\mathrm{cm}]$")
plot_range = edges[0], edges[-1]
x = np.linspace(*plot_range, 100)
best_fit = scipy.stats.norm(res.params[0].value, res.params[1].value)
# best_fit = scipy.stats.norm(0.044, 2.83) # TODO take from fit
n_bins = len(entries)
binsize = (plot_range[1] - plot_range[0]) / n_bins
scale = h.sum() / (best_fit.cdf(plot_range[1]) - best_fit.cdf(plot_range[0])) * binsize
plt.plot(x, scale * best_fit.pdf(x))
ax = plt.gca()
# plt.text(0.6, 0.9, r"$\mu = 0.044 $\;cm", transform=ax.transAxes, usetex=True)
plt.text(
    0.6,
    0.9,
    rf"$\mu = {res.params[0].value:.2f} \pm {res.params[0].error:.2f}$\;cm",
    transform=ax.transAxes,
    usetex=True,
)
plt.text(
    0.25,
    0.1,
    f"Training dataset: {len(y_train)} events\n"
    f"Test dataset: {len(y_test)} events\nAdaBoost BDT",
    transform=ax.transAxes,
    usetex=True,
)
# plt.text(0.6, 0.81, r"$\sigma = 2.83 $\;cm", transform=ax.transAxes, usetex=True)
plt.text(
    0.6,
    0.81,
    rf"$\sigma = {res.params[1].value:.2f} \pm {res.params[1].error:.2f}$\;cm",
    transform=ax.transAxes,
    usetex=True,
)
plotting.watermark()
plt.savefig(f"plots/h_dz_AdaBoost_n{len(y)}.pdf")
plt.savefig(f"plots/h_dz_AdaBoost_n{len(y)}.png")

In [None]:
plt.hist(y_to_z(y_train))
plt.hist(y_to_z(y_test))