In [1]:
import uproot
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import os

from tqdm.notebook import tqdm

from particle import Particle # https://github.com/scikit-hep/particle

import json


In [2]:
# reco BEE link: https://www.phy.bnl.gov/twister/bee/set/f51043f3-10d4-49b3-9866-bc7fbcf5d4fe/event/list/
# truth BEE link: https://www.phy.bnl.gov/twister/bee/set/7ad1b4aa-a181-435a-9117-d37c0c9ca677/event/list/


In [3]:
from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors

def fps_sampling(points, n_samples):
    """
    Perform Farthest Point Sampling on the input point cloud.
    
    :param points: numpy array of shape (N, 3) representing the point cloud
    :param n_samples: number of points to sample
    :return: indices of sampled points
    """
    N = points.shape[0]
    sampled_indices = np.zeros(n_samples, dtype=int)
    distances = np.full(N, np.inf)
    
    # Randomly choose the first point
    sampled_indices[0] = np.random.randint(N)
    
    for i in range(1, n_samples):
        distances = np.minimum(distances, np.sum((points - points[sampled_indices[i-1]])**2, axis=1))
        sampled_indices[i] = np.argmax(distances)
    
    return sampled_indices

def fps_clustering_downsample(points, n_samples):
    """
    Downsample the point cloud using FPS and clustering.
    
    :param points: numpy array of shape (N, 3) representing the point cloud
    :param n_samples: number of points in the downsampled cloud
    :return: downsampled point cloud
    """
    # Perform FPS to get initial samples
    sampled_indices = fps_sampling(points, n_samples)
    sampled_points = points[sampled_indices]
    
    # Use K-means clustering to associate other points with the samples
    kmeans = KMeans(n_clusters=n_samples, init=sampled_points, n_init=1, max_iter=100)
    kmeans.fit(points)
    
    # Compute the new point positions as the mean of each cluster
    new_points = np.array([points[kmeans.labels_ == i].mean(axis=0) for i in range(n_samples)])
    
    return new_points

def get_min_dists(points_A, points_B):
    """
    Get the minimum distance between each point in points_A and all the points in points_B.
    """
    nbrs = NearestNeighbors(n_neighbors=1, algorithm='ball_tree').fit(points_B)
    distances, _ = nbrs.kneighbors(points_A)
    return distances


In [4]:
# loading event 13779 223 11160 from json truth file

for dir in tqdm(os.listdir("input_files/misclustering_candidate_truth_outputs/bee/data/")):
    json_file = f"input_files/misclustering_candidate_truth_outputs/bee/data/{dir}/{dir}-truthDepo.json"

    with open(json_file, "r") as f:
        truth_data = json.load(f)

    run = int(truth_data["runNo"])
    subrun = int(truth_data["subRunNo"])
    event = int(truth_data["eventNo"])
    
    if run == 13779 and subrun == 223 and event == 11160:
        print("found the right truth event!")
        break

truth_spacepoints = np.column_stack((np.array(truth_data["x"]) + 2, np.array(truth_data["y"]), np.array(truth_data["z"])))


  0%|          | 0/83 [00:00<?, ?it/s]

found the right truth event!


In [5]:
# loading event 13779 223 11160 from reco file

reco_dfs = []

for file in tqdm(os.listdir("input_files/misclustering_candidate_nue_files")):
    if not file.endswith(".root"):
        continue

    f = uproot.open(f"input_files/misclustering_candidate_nue_files/{file}")

    rse_df = f["Trun"].arrays(["runNo", "subRunNo", "eventNo"], library="pd")

    if rse_df["runNo"].iloc[0] == 13779 and rse_df["subRunNo"].iloc[0] == 223 and rse_df["eventNo"].iloc[0] == 11160:
        print("found the right reco event!")
        break



  0%|          | 0/84 [00:00<?, ?it/s]

found the right reco event!


In [6]:
track_shower_spacepoints_df = f["T_rec"].arrays(["x", "y", "z", "q"], library="pd")
nu_trajectory_spacepoints_blob_df = f["T_rec_charge_blob"].arrays(["x", "y", "z", "q"], library="pd")
cosmic_spacepoints_df = f["T_cluster"].arrays(["x", "y", "z", "q"], library="pd")

cosmic_spacepoints_xyz = np.column_stack((cosmic_spacepoints_df["x"], cosmic_spacepoints_df["y"], cosmic_spacepoints_df["z"]))
print("downsampling cosmic spacepoints...", end="")
downsampled_cosmic_spacepoints = fps_clustering_downsample(cosmic_spacepoints_xyz, 5000)
print("done")

downsampled_cosmic_spacepoints_df = pd.DataFrame(downsampled_cosmic_spacepoints, columns=["x", "y", "z"])

print("calculating minimum distances...", end="")
min_dists = get_min_dists(downsampled_cosmic_spacepoints, truth_spacepoints)
print("done")

downsampled_cosmic_spacepoints_df["min_dist"] = min_dists

close_to_truth_downsampled_cosmic_spacepoints_df = downsampled_cosmic_spacepoints_df[downsampled_cosmic_spacepoints_df["min_dist"] <= 5.0]
far_from_truth_downsampled_cosmic_spacepoints_df = downsampled_cosmic_spacepoints_df[downsampled_cosmic_spacepoints_df["min_dist"] > 5.0]


downsampling cosmic spacepoints...done
calculating minimum distances...done


In [7]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=1, cols=1, specs=[[{'type': 'scene'}]])

fig.add_trace(go.Scatter3d(
    x=truth_spacepoints[:, 2],
    y=truth_spacepoints[:, 0],
    z=truth_spacepoints[:, 1],
    mode='markers',
    marker=dict(size=2, color='orange', opacity=0.8),
    name='BEE Truth Spacepoints'
))

fig.add_trace(go.Scatter3d(
    x=nu_trajectory_spacepoints_blob_df["z"],
    y=nu_trajectory_spacepoints_blob_df["x"],
    z=nu_trajectory_spacepoints_blob_df["y"],
    mode='markers',
    marker=dict(
        size=2,
        color=nu_trajectory_spacepoints_blob_df["q"],
        colorscale='Jet',
        opacity=0.8
    ),
    name='Neutrino Cluster Spacepoints'
))


"""
fig.add_trace(go.Scatter3d(
    x=cosmic_spacepoints_df["z"],
    y=cosmic_spacepoints_df["x"],
    z=cosmic_spacepoints_df["y"],
    mode='markers',
    marker=dict(
        size=1,
        color='black',
        opacity=0.5
    ),
    name='Cosmic Spacepoints'
))
"""

fig.add_trace(go.Scatter3d(
    x=close_to_truth_downsampled_cosmic_spacepoints_df["z"],
    y=close_to_truth_downsampled_cosmic_spacepoints_df["x"],
    z=close_to_truth_downsampled_cosmic_spacepoints_df["y"],
    mode='markers',
    marker=dict(
        size=2,
        color='red',
        opacity=0.8
    ),
    name='Downsampled Cosmic Spacepoints, close to truth depositions'
))

fig.add_trace(go.Scatter3d(
    x=far_from_truth_downsampled_cosmic_spacepoints_df["z"],
    y=far_from_truth_downsampled_cosmic_spacepoints_df["x"],
    z=far_from_truth_downsampled_cosmic_spacepoints_df["y"],
    mode='markers',
    marker=dict(
        size=2,
        color='blue',
        opacity=0.8
    ),
    name='Downsampled Cosmic Spacepoints, far from truth depositions'
))

tpc_min_x = -1.
tpc_max_x = 254.3
tpc_min_y = -115.
tpc_max_y = 117.
tpc_min_z = 0.6
tpc_max_z = 1036.4

# First do the layout without camera settings
fig.update_layout(
    scene=dict(
        xaxis_title='z',
        yaxis_title='x',
        zaxis_title='y',
        aspectmode="auto",
        aspectratio=dict(
            x=(tpc_max_z - tpc_min_z),
            y=(tpc_max_x - tpc_min_x),
            z=(tpc_max_y - tpc_min_y)
        ),
        xaxis=dict(
            range=[tpc_min_z, tpc_max_z],
            autorange=False
        ),
        yaxis=dict(
            range=[tpc_min_x, tpc_max_x],
            autorange=False
        ),
        zaxis=dict(
            range=[tpc_min_y, tpc_max_y],
            autorange=False
        )
    ),
    width=2000,
    height=1000,
    autosize=False
)

fig.show(renderer="browser")
