# Data exploration playground

https://zenodo.org/records/17045562

* $N=99942$: number of events
* $V$: total number of voxels (e.g. 9x16x45 = 6480)
* $E_i,\quad i=1,\dots,N$: incident energy
* $\alpha_i$: angle of entrance
* $\pmb{x}_{i}\in \mathbb{R}^V$ energy deposits across voxels
* `showers.hdf5` numpy array of shape (N, V)
* `incident_energes.hdf5` numpy array of shape (N, 1)

$$
\pmb{E} = 
\begin{bmatrix}
E_1\\
E_2\\
\vdots\\
E_N
\end{bmatrix},
\ \ \ 
X = 
\begin{bmatrix}
\ \pmb{x}^t_1 \\
\ \pmb{x}^t_2 \\
\vdots\\
\ \pmb{x}^t_N 
\end{bmatrix}
$$

$$
\pmb{x}_i = f(E_i;\alpha_i ,\ pid_j,\ geometry),\quad i=1,..,N
$$

In [16]:
import pandas as pd
import numpy as np
import h5py
import matplotlib.pyplot as plt
import sys
sys.path.append('../CaloChallenge/code/')
from HighLevelFeatures import HighLevelFeatures as HLF
import os

filename = "../FCCeeALLEGRO/LEMURS_FCCeeALLEGRO_gamma_100kEvents_1GeV100GeV_GPSflat_part1.h5"
# filename = "/home/panos/turing/diffusion4sim/simulations/generated_100events_Geo_SiW_E_50GeV_Phi_0.0_Theta_1.57.h5"

i_idcs = None
# i_idcs = np.arange(0,100)


# === Open the HDF5 file ===
with h5py.File(filename, "r") as f:
    print("Features in " + os.path.basename(filename) + ":", list(f.keys()))
    
    showers = f["showers"][i_idcs,:] if i_idcs is not None else f["showers"]
    showers = np.array(showers)
    # showers = showers.reshape(showers.shape[0],-1)
    incident_energies = f["incident_energy"][i_idcs] if i_idcs is not None else f["incident_energy"]
    incient_energies = np.array(incident_energies).reshape(-1,1)

print("Showers shape:", showers.shape)
print("Incident energies shape:", incident_energies.shape)

Features in LEMURS_FCCeeALLEGRO_gamma_100kEvents_1GeV100GeV_GPSflat_part1.h5: ['incident_energy', 'incident_phi', 'incident_theta', 'showers']
Showers shape: (99942, 9, 16, 45)
Incident energies shape: (99942,)


## 3D scatter of a single shower

In [4]:
"""
requires
pip install --upgrade nbformat
pip install plotly
"""

import plotly.graph_objects as go

i_idx = 99

shower3d = showers[i_idx, :, :, :]

num_rad_splits, num_ang_splits, num_layers = shower3d.shape

# bin centers
R   = np.arange(num_rad_splits)
PHI = np.linspace(0, 2*np.pi, num_ang_splits, endpoint=False)
Z   = np.arange(num_layers)

r, phi, z = np.meshgrid(R, PHI, Z, indexing='ij')

# Cartesian conversion
x = r * np.cos(phi)
y = r * np.sin(phi)


fig = go.Figure(
    data=[
        go.Scatter3d(
            x = x.flatten(),
            y = y.flatten(),
            z = z.flatten(),
            mode='markers',
            marker=dict(
                size=2,
                opacity=0.8,
                colorscale='Greens',
                color = shower3d.flatten()
            )
        )
    ]
)

fig.update_layout(
    scene=dict(
        xaxis_title="X",
        yaxis_title="Y",
        zaxis_title="Z",
    ),
    height=800,
    width=1000,
    scene_camera=dict(
        eye=dict(x=1.8, y=1.2, z=-1.8),
        up=dict(x=0, y=1, z=0)
    )
)

fig.show()