In [None]:
import ROOT

In [None]:
ROOT.EnableImplicitMT()

In [None]:
import pandas as pd

In [None]:
n_files = 10

In [None]:
df = ROOT.ROOT.RDataFrame(
    "cbmsim",
    [
        f"root://eospublic.cern.ch//eos/experiment/sndlhc/users/olantwin/advsnd/2024/07/nu12/CCDIS/{i+1}/sndLHC.Genie-TGeant4_dig.root"
        for i in range(n_files)
    ],
)

In [None]:
df = df.Filter("Digi_AdvMuFilterHits.GetEntries() || Digi_AdvTargetHits.GetEntries()")

In [None]:
ROOT.gInterpreter.ProcessLine('#include "ShipMCTrack.h"')

In [None]:
ROOT.gInterpreter.ProcessLine('#include "AdvTargetHit.h"')

In [None]:
ROOT.gInterpreter.ProcessLine('#include "AdvMuFilterHit.h"')

In [None]:
df = df.Define("start_z", "dynamic_cast<ShipMCTrack*>(MCTrack[1])->GetStartZ()")

In [None]:
df = df.Define("nu_energy", "dynamic_cast<ShipMCTrack*>(MCTrack[0])->GetEnergy()")

In [None]:
df = df.Define("energy_dep_target", "Sum(AdvTargetPoint.fELoss)")

In [None]:
df = df.Define("energy_dep_mufilter", "Sum(AdvMuFilterPoint.fELoss)")

In [None]:
ROOT.gInterpreter.Declare("""
int station_from_id(int id) {
    return id >>17;
}
""")

In [None]:
ROOT.gInterpreter.Declare("""
 template<typename T>
 ROOT::RVec<T> Deduplicate (ROOT::RVec<T> v){
    std::sort(v.begin(), v.end());
    auto last = std::unique(v.begin(), v.end());
    v.erase(last, v.end());
    return v;
}
""")

In [None]:
df = df.Define(
    "target_stations", "Map(Digi_AdvTargetHits.fDetectorID, station_from_id)"
)

In [None]:
df = df.Define(
    "mufilter_stations", "Map(Digi_AdvMuFilterHits.fDetectorID, station_from_id)"
)

In [None]:
df = df.Define("target_n_stations", "Deduplicate(target_stations).size()")

In [None]:
df = df.Define("mufilter_n_stations", "Deduplicate(mufilter_stations).size()")

In [None]:
df = df.Define("target_n_hits", "Digi_AdvTargetHits.GetEntries()")

In [None]:
df = df.Define("mufilter_n_hits", "Digi_AdvMuFilterHits.GetEntries()")

In [None]:
for i in range(100):
    df = df.Define(
        f"target_n_hits_station_{i}",
        f"std::count(target_stations.begin(), target_stations.end(), {i})",
    )

In [None]:
for i in range(20):
    df = df.Define(
        f"mufilter_n_hits_station_{i}",
        f"std::count(mufilter_stations.begin(), mufilter_stations.end(), {i})",
    )

In [None]:
col_names = (
    [
        "start_z",
        "nu_energy",
        "energy_dep_target",
        "energy_dep_mufilter",
        "target_n_hits",
        "target_n_stations",
        "mufilter_n_hits",
        "mufilter_n_stations",
    ]
    + [f"target_n_hits_station_{i}" for i in range(100)]
    + [f"mufilter_n_hits_station_{i}" for i in range(20)]
)

In [None]:
cols = df.AsNumpy(col_names)

In [None]:
pandas_df = pd.DataFrame(cols)

In [None]:
pandas_df.to_csv("features.csv")