In [None]:
import ROOT

In [None]:
ROOT.EnableImplicitMT()

In [None]:
import pandas as pd

In [None]:
import numpy as np

In [None]:
from tqdm import tqdm

In [None]:
n_files = 10
n_events_per_file = 100
n_event = n_files * n_events_per_file

In [None]:
df = ROOT.ROOT.RDataFrame(
    "cbmsim",
    [
        f"root://eospublic.cern.ch//eos/experiment/sndlhc/users/olantwin/advsnd/2024/07/nu12/CCDIS/{i + 1}/sndLHC.Genie-TGeant4_dig.root"
        for i in range(n_files)
    ],
)

In [None]:
df = df.Filter("Digi_AdvMuFilterHits.GetEntries() || Digi_AdvTargetHits.GetEntries()")

In [None]:
ROOT.gInterpreter.ProcessLine('#include "ShipMCTrack.h"')
ROOT.gInterpreter.ProcessLine('#include "AdvTargetHit.h"')
ROOT.gInterpreter.ProcessLine('#include "AdvMuFilterHit.h"')

In [None]:
ROOT.gInterpreter.Declare("""
int station_from_id(int id) {
    return id >>17;
}
""")
ROOT.gInterpreter.Declare("""
int column_from_id(int id) {
    return (id >> 11) % 4;
}
""")
ROOT.gInterpreter.Declare("""
int sensor_from_id(int id) {
    return (id >> 10) % 2;
}
""")
ROOT.gInterpreter.Declare("""
int strip_from_id(int id) {
    return (id) % 1024;
}
""")
ROOT.gInterpreter.Declare("""
int plane_from_id(int id) {
    return (id >> 16) % 2;
}
""")
ROOT.gInterpreter.Declare("""
 template<typename T>
 ROOT::RVec<T> Deduplicate (ROOT::RVec<T> v){
    std::sort(v.begin(), v.end());
    auto last = std::unique(v.begin(), v.end());
    v.erase(last, v.end());
    return v;
}
""")

In [None]:
df = df.Define("start_z", "dynamic_cast<ShipMCTrack*>(MCTrack[1])->GetStartZ()")
df = df.Define("nu_energy", "dynamic_cast<ShipMCTrack*>(MCTrack[0])->GetEnergy()")
df = df.Define("energy_dep_target", "Sum(AdvTargetPoint.fELoss)")
df = df.Define("energy_dep_mufilter", "Sum(AdvMuFilterPoint.fELoss)")

In [None]:
df = df.Define("stations", "Map(Digi_AdvTargetHits.fDetectorID, station_from_id)")
df = df.Define("columns", "Map(Digi_AdvTargetHits.fDetectorID, column_from_id)")
df = df.Define("sensors", "Map(Digi_AdvTargetHits.fDetectorID, sensor_from_id)")
df = df.Define("strips", "Map(Digi_AdvTargetHits.fDetectorID, strip_from_id)")
df = df.Define("planes", "Map(Digi_AdvTargetHits.fDetectorID, plane_from_id)")
df = df.Define(
    "indices",
    "(4 * columns + sensors - 2 * columns * sensors) * 768 + pow(-1, columns) * strips - 1 * columns",
)

In [None]:
col_names = [
    "start_z",
    "nu_energy",
    "energy_dep_target",
    "energy_dep_mufilter",
    "indices",
    "stations",
    "planes",
]

In [None]:
cols = df.AsNumpy(col_names)

In [None]:
hitmaps = np.zeros((n_event, 3072, 200))

In [None]:
for event in tqdm(range(n_event)):
    indices = np.array(cols["indices"][event], dtype=int)
    stations = np.array(cols["stations"][event], dtype=int)
    planes = np.array(cols["planes"][event], dtype=int)
    hitmaps[event, indices, 2 * stations + planes] = 1

In [None]:
np.save(f"images_{n_event}.npy", hitmaps)

In [None]:
pandas_df = pd.DataFrame(cols)

In [None]:
pandas_df.pop("stations")
pandas_df.pop("indices")
pandas_df.pop("planes")

In [None]:
pandas_df.to_csv(f"features_CNN_{n_event}.csv")