In [None]:
from IceCube.Essential import *
from IceCube.Model import *
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pickle
import pdb


In [None]:
def draw_hist(title, x, density=True, nbins=30):
    gnn_better = x[error < errorx]
    fit_better = x[error > errorx]

    plt.figure()
    plt.hist(gnn_better, bins=nbins, color='blue',
             label='GNN', histtype='step', density=density)
    plt.hist(fit_better, bins=nbins, color='orange',
             label='Fit', histtype='step', density=density)
    plt.title(title)
    plt.legend()


In [None]:
# BATCHES_TEST = list(range(1, 21))
BATCHES_TEST = [81]

# ground truth
true_df = get_target_angles(BATCHES_TEST)
true_df = angles2vector(true_df)
print(true_df.head(5))
n = true_df[["nx", "ny", "nz"]].to_numpy()

# reconstructed directions
reco_df = get_reco_angles(BATCHES_TEST)
print(reco_df.head(5))
n_hat = reco_df[["x", "y", "z"]].to_numpy()

e = reco_df[["ex", "ey", "ez"]].to_numpy()
xe = np.sum(n_hat * e, axis=1, keepdims=True)
print(xe.shape)
proj = n_hat - xe * e
proj /= (np.linalg.norm(proj, axis=1, keepdims=True) + 1e-8)

error, az_error, ze_error = angle_errors(n, n_hat)
print(
    f"error, az_error, ze_error = {error.mean()}, {az_error.mean()}, {ze_error.mean()}")

errorx, az_errorx, ze_errorx = angle_errors(n, proj)
print(
    f"error, az_error, ze_error = {errorx.mean()}, {az_errorx.mean()}, {ze_errorx.mean()}")

idx = error > errorx


In [None]:
# plot errors
plt.figure()
plt.hist(error, bins=30, color='blue', label='GNN',
         histtype='step', density=False)
plt.hist(error[error < errorx], bins=30, color='black',
         label='GNN better', histtype='step', density=False)
plt.hist(errorx, bins=30, color='orange',
         label='Fit', histtype='step', density=False)
plt.hist(errorx[error > errorx], bins=30, color='red',
         label='Fit better', histtype='step', density=False)
plt.legend()


In [None]:
# fit_error can be considered as the goodness of fit
Nbins = 100
draw_hist("log10(error)", np.log10(
    np.sqrt(reco_df["error"]) + 1e-6), nbins=Nbins)
draw_hist("hits", reco_df["hits"], nbins=Nbins)
draw_hist("zenith", reco_df["zenith"], nbins=Nbins)
draw_hist("log10(sumq)", np.log10(reco_df["sumq"] + 1e-3), nbins=Nbins)
draw_hist("log10(dt_15)", np.log10(reco_df["dt_15"] + 1e-3), nbins=Nbins)
draw_hist("log10(dt_50)", np.log10(reco_df["dt_50"] + 1e-3), nbins=Nbins)
draw_hist("log10(dt_85)", np.log10(reco_df["dt_85"] + 1e-3), nbins=Nbins)
draw_hist("log10(meanq)", np.log10(reco_df["meanq"] + 1e-3), nbins=Nbins >> 1)
draw_hist("bratio", np.clip(reco_df["bratio"], 0, 0.00001), nbins=Nbins >> 1)
draw_hist("uniq_x", reco_df["uniq_x"], nbins=Nbins >> 1)
draw_hist("uniq_y", reco_df["uniq_y"], nbins=Nbins >> 1)
draw_hist("uniq_z", reco_df["uniq_z"], nbins=Nbins >> 1)
draw_hist("qx", reco_df["qx"], nbins=Nbins >> 1)
draw_hist("qy", reco_df["qy"], nbins=Nbins >> 1)
draw_hist("qz", reco_df["qz"], nbins=Nbins >> 1)
draw_hist("ez", reco_df["ez"], nbins=Nbins)
draw_hist("arccos(xe)", np.arccos(xe), nbins=Nbins)
draw_hist("log10(kappa)", np.log10(reco_df["kappa"] + 1e-3), nbins=Nbins)


In [None]:
# trajectory display
col_xyzt = [
    "x0", "y0", "z0", "t0",
    "x1", "y1", "z1", "t1",
    "x2", "y2", "z2", "t2",
    "x3", "y3", "z3", "t3", ]
traj = reco_df[col_xyzt].values
traj = traj.reshape(-1, 4, 4)
traj


In [None]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np


def draw_trajectory(eid):
    event = traj[eid]

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    ax.set_xlim(-0.5, 0.5)
    ax.set_ylim(-0.5, 0.5)
    ax.set_zlim(-0.5, 0.5)

    ax.scatter(event[:, 0], event[:, 1], event[:, 2])
    times = event[:, 3]
    norm = plt.Normalize(times.min(), times.max())
    cmap = plt.cm.get_cmap('RdYlBu')

    colors = cmap(norm(times))
    ax.scatter(event[:, 0], event[:, 1], event[:, 2], c=colors)


In [None]:
for i in range(10000, 99900, 9900):
    draw_trajectory(i)


In [None]:
# reco_df inputs
reco_df["error"] = np.log10(reco_df["error"] + 1e-6)
reco_df["sumq"] = np.log10(reco_df["sumq"] + 1e-3)
reco_df["dt_50"] = np.log10(reco_df["dt_50"] + 1e-3)
reco_df["dt_85"] = np.log10(reco_df["dt_85"] + 1e-3)
reco_df["kappa"] = np.log10(reco_df["kappa"] + 1e-3)
columns = ["kappa", "zenith", "error", "sumq", "qz", "dt_50", "dt_85", "ez"]
reco = reco_df[columns].to_numpy()
xe = np.arccos(xe)

# trajectory display
col_xyzt = [
    "x0", "y0", "z0", "t0",
    "x1", "y1", "z1", "t1",
    "x2", "y2", "z2", "t2",
    "x3", "y3", "z3", "t3", ]
traj = reco_df[col_xyzt].values
traj = traj.reshape(-1, 4, 4)

v1 = 1e3 * (traj[:, 1, :3] - traj[:, 0, :3]) / \
    (traj[:, 1, 3] - traj[:, 0, 3] + 1)[:, np.newaxis]
v2 = 1e3 * (traj[:, 2, :3] - traj[:, 1, :3]) / \
    (traj[:, 2, 3] - traj[:, 1, 3] + 1)[:, np.newaxis]
v3 = 1e3 * (traj[:, 3, :3] - traj[:, 2, :3]) / \
    (traj[:, 3, 3] - traj[:, 2, 3] + 1)[:, np.newaxis]

v1scale = np.linalg.norm(v1, axis=1, keepdims=True) + 1e-1
v2scale = np.linalg.norm(v2, axis=1, keepdims=True) + 1e-1
v3scale = np.linalg.norm(v3, axis=1, keepdims=True) + 1e-1

ev1 = np.sum(-v1 * e / v1scale, axis=1, keepdims=True)
ev2 = np.sum(-v2 * e / v2scale, axis=1, keepdims=True)
ev3 = np.sum(-v3 * e / v3scale, axis=1, keepdims=True)

ev1 = np.arccos(ev1)
ev2 = np.arccos(ev2)
ev3 = np.arccos(ev3)

vv12 = np.sum(v1 * v2 / v1scale / v2scale, axis=1, keepdims=True)
vv23 = np.sum(v2 * v3 / v2scale / v3scale, axis=1, keepdims=True)
vv31 = np.sum(v3 * v1 / v3scale / v1scale, axis=1, keepdims=True)

vavg = np.log10(np.mean((v1scale, v2scale, v3scale), axis=0))
evvv = np.mean((ev1, ev2, ev3), axis=0)
vvvv = np.mean((vv12, vv23, vv31), axis=0)

pos = np.mean(traj[:, :, :3], axis=1)
xyzq = reco_df[["qx", "qy", "qz"]].to_numpy()
distq = pos - xyzq
distq = np.linalg.norm(distq, axis=1, keepdims=True) + 1e-3

# input
X = np.concatenate([reco, xe, ev1, ev2, ev3, vavg, evvv, vvvv, distq], axis=1)
X[np.isnan(X)] = 0
columns += ["xe", "ev1", "ev2", "ev3", "vavg", "evvv", "vvvv", "distq"]
LOGGER.info(f"input shape = {X.shape}")


In [None]:
# inputs
# load the model and predict
LOGGER.info("Loading BDT model...")
clf = pickle.load(
    open(os.path.join(MODEL_PATH, 'BDT_clf.Baseline.0414.sklearn'), 'rb'))
LOGGER.info("Predicting...")
y_hat = clf.predict(X)
score = clf.decision_function(X)

# Evaluate accuracy
accuracy = accuracy_score(idx, y_hat)
LOGGER.info(f"Test accuracy: {accuracy * 100:.2f}%")
error[y_hat] = errorx[y_hat]
LOGGER.info(f"error -> {error.mean()}")


In [None]:
for i, c in enumerate(columns):
    LOGGER.info(f"{c}\t{clf.feature_importances_[i]}")


In [None]:
error, az_error, ze_error = angle_errors(n, n_hat)
LOGGER.info(
    f"error, az_error, ze_error = {error.mean()}, {az_error.mean()}, {ze_error.mean()}")

errorx, az_errorx, ze_errorx = angle_errors(n, proj)
LOGGER.info(
    f"error, az_error, ze_error = {errorx.mean()}, {az_errorx.mean()}, {ze_errorx.mean()}")

criteria = score > -0.00001
error[criteria] = errorx[criteria]
LOGGER.info(f"error -> {error.mean()}")

draw_hist("score", np.clip(score, -0.01, 0), density=True)


In [None]:
# trajectory display
col_xyzt = [
    "x0", "y0", "z0", "t0",
    "x1", "y1", "z1", "t1",
    "x2", "y2", "z2", "t2",
    "x3", "y3", "z3", "t3", ]
traj = reco_df[col_xyzt].values
traj = traj.reshape(-1, 4, 4)

v1 = 1e3 * (traj[:, 1, :3] - traj[:, 0, :3]) / \
    (traj[:, 1, 3] - traj[:, 0, 3] + 1)[:, np.newaxis]
v2 = 1e3 * (traj[:, 2, :3] - traj[:, 1, :3]) / \
    (traj[:, 2, 3] - traj[:, 1, 3] + 1)[:, np.newaxis]
v3 = 1e3 * (traj[:, 3, :3] - traj[:, 2, :3]) / \
    (traj[:, 3, 3] - traj[:, 2, 3] + 1)[:, np.newaxis]

v1scale = np.linalg.norm(v1, axis=1, keepdims=True) + 1e-1
v2scale = np.linalg.norm(v2, axis=1, keepdims=True) + 1e-1
v3scale = np.linalg.norm(v3, axis=1, keepdims=True) + 1e-1

ev1 = np.sum(-v1 * e / v1scale, axis=1, keepdims=True)
ev2 = np.sum(-v2 * e / v2scale, axis=1, keepdims=True)
ev3 = np.sum(-v3 * e / v3scale, axis=1, keepdims=True)

vv12 = np.sum(v1 * v2 / v1scale / v2scale, axis=1, keepdims=True)
vv23 = np.sum(v2 * v3 / v2scale / v3scale, axis=1, keepdims=True)
vv31 = np.sum(v3 * v1 / v3scale / v1scale, axis=1, keepdims=True)

vavg = np.log10(np.mean((v1scale, v2scale, v3scale), axis=0))
evvv = np.mean((ev1, ev2, ev3), axis=0)
vvvv = np.mean((vv12, vv23, vv31), axis=0)

ev1 = np.arccos(ev1)
ev2 = np.arccos(ev2)
ev3 = np.arccos(ev3)

draw_hist("arccos(ev1)", ev1, Nbins)
draw_hist("arccos(ev2)", ev2, Nbins)
draw_hist("arccos(ev3)", ev3, Nbins)
draw_hist("vv12", np.arccos(vv12), Nbins)
draw_hist("vv23", np.arccos(vv23), Nbins)
draw_hist("vv31", np.arccos(vv31), Nbins)
draw_hist("vmax", vavg, Nbins)
draw_hist("evvv", evvv, Nbins)
draw_hist("vvvv", vvvv, Nbins)
