In [None]:
from IceCube.Essential import *
from IceCube.Model import *
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle
import pdb


In [None]:
def draw_hist(title, x, density=True, nbins=30):
    kappa_lo = x[reco_df.kappa < 1]
    kappa_hi = x[reco_df.kappa >= 1]

    plt.figure()
    plt.hist(kappa_lo, bins=nbins, color='blue',
             label='Kappa < 1', histtype='step', density=density)
    plt.hist(kappa_hi, bins=nbins, color='orange',
             label='Kappa > 1', histtype='step', density=density)
    plt.title(title)
    plt.legend()


In [None]:
BATCHES_TEST = list(range(81, 86))
# BATCHES_TEST = [81]

# ground truth
true_df = get_target_angles(BATCHES_TEST)
true_df = angles2vector(true_df)
print(true_df.head(5))
n = true_df[["nx", "ny", "nz"]].to_numpy()

# reconstructed directions
reco_df = get_reco_angles(BATCHES_TEST)
print(reco_df.head(5))
n_hat = reco_df[["x", "y", "z"]].to_numpy()

error, az_error, ze_error = angle_errors(n, n_hat)
print(
    f"error, az_error, ze_error = {error.mean()}, {az_error.mean()}, {ze_error.mean()}")

idx = reco_df.kappa < 0.5


In [None]:
# fit_error can be considered as the goodness of fit
Nbins = 100
draw_hist("log10(error)", np.log10(
    np.sqrt(reco_df["error"]) + 1e-6), nbins=Nbins)
draw_hist("hits", reco_df["hits"], nbins=Nbins)
draw_hist("zenith", reco_df["zenith"], nbins=Nbins)
draw_hist("log10(sumq)", np.log10(reco_df["sumq"] + 1e-3), nbins=Nbins)
draw_hist("log10(dt_15)", np.log10(reco_df["dt_15"] + 1e-3), nbins=Nbins)
draw_hist("log10(dt_50)", np.log10(reco_df["dt_50"] + 1e-3), nbins=Nbins)
draw_hist("log10(dt_85)", np.log10(reco_df["dt_85"] + 1e-3), nbins=Nbins)
draw_hist("min dt", np.min([np.log10(reco_df["dt_15"] + 1e-3), np.log10(
    reco_df["dt_50"] + 1e-3), np.log10(reco_df["dt_85"] + 1e-3)], axis=0), nbins=Nbins)
draw_hist("log10(meanq)", np.log10(reco_df["meanq"] + 1e-3), nbins=Nbins >> 1)
draw_hist("bratio", np.clip(reco_df["bratio"], 0, 0.00001), nbins=Nbins >> 1)
draw_hist("uniq_x", reco_df["uniq_x"], nbins=Nbins >> 1)
draw_hist("uniq_y", reco_df["uniq_y"], nbins=Nbins >> 1)
draw_hist("uniq_z", reco_df["uniq_z"], nbins=Nbins >> 1)
draw_hist("qx", reco_df["qx"], nbins=Nbins >> 1)
draw_hist("qy", reco_df["qy"], nbins=Nbins >> 1)
draw_hist("qz", reco_df["qz"], nbins=Nbins >> 1)
draw_hist("ez", reco_df["ez"], nbins=Nbins)
draw_hist("log10(kappa)", np.log10(reco_df.kappa + 1e-3), nbins=Nbins)


In [None]:
# reco_df inputs
columns = ["error", "hits", "sumq", "qz",
           "dt_15", "dt_50", "dt_85", "ez", "uniq_x"]
X = reco_df[columns].to_numpy()
LOGGER.info(f"input shape = {X.shape}")


In [None]:
# inputs
# load the model and predict
LOGGER.info("Loading BDT model...")
clf = pickle.load(
    open(os.path.join(MODEL_PATH, 'EventCat_clf.Tree.10.sklearn'), 'rb'))
LOGGER.info("Predicting...")
y_hat = clf.predict(X)
score = clf.decision_function(X)

# Evaluate accuracy
accuracy = accuracy_score(idx, y_hat)
LOGGER.info(f"Test accuracy: {accuracy * 100:.2f}%")


In [None]:
for i, c in enumerate(columns):
    LOGGER.info(f"{c}\t{clf.feature_importances_[i]}")


In [None]:
error, az_error, ze_error = angle_errors(n, n_hat)
LOGGER.info(
    f"error, az_error, ze_error = {error.mean()}, {az_error.mean()}, {ze_error.mean()}")

draw_hist("score", np.clip(score, -0.05, 0.05), density=True, nbins=100)

idx_num = np.zeros(idx.values.shape)
idx_num[idx.values] = 1
y_hat_num = np.zeros(y_hat.shape)
y_hat_num[y_hat] = 1

draw_hist("cat", idx_num, density=False, nbins=100)
draw_hist("cat", y_hat_num, density=False, nbins=100)
