## Next adds for TESSSA -> Getting directly the masses and Nº of beamOn direclty from the .root files

### Modules

In [1]:
import uproot
import pandas as pd
from typing import Optional

### First part: Getting the geometryTable->Print() data

In [2]:
def load_geometry_table_uproot(root_path: str,
                               objname: str = "geometryTable",
                               header: bool = True) -> pd.DataFrame:
    """
    Read a TMacro (geometryTable) from a ROOT file using uproot and return a pandas.DataFrame.

    - root_path: path to the .root file
    - objname: key name (or path) of the TMacro inside the file (default: "geometryTable")
    - header: use first non-empty line as header if True

    Notes:
    - If the object isn't at top-level, run `f.classnames()` first to find the exact key
      (use that as objname; e.g. "dir/subdir/geometryTable").
    """
    f = uproot.open(root_path)

    # debug: list classes if you want to find the object path
    # print(f.classnames())

    # get the object (raise a clear error if missing)
    if objname not in f:
        raise KeyError(f"'{objname}' not found in file. Run f.classnames() to locate it.")
    obj = f[objname]   # this is an uproot.Model for the stored object

    # Inspect members if needed:
    # print("object classname:", obj.classname)
    # print("members:", list(obj.all_members.keys()))

    # TMacro stores its lines in member named "fLines" (a TList of TObjString)
    if "fLines" not in obj.all_members:
        raise KeyError("object has no member 'fLines' — not a TMacro-like object? "
                       "Check obj.all_members to see available members.")
    lines_list = obj.member("fLines")  # this is a Model_TList (sequence-like)

    # build Python list of strings robustly (items might be str or a small model)
    lines = []
    for item in lines_list:
        # many TObjString models behave like Python str in uproot; handle bytes too
        if isinstance(item, (bytes, bytearray)):
            s = item.decode("utf-8", errors="ignore")
        else:
            s = str(item)
        # some TMacro lines contain leading/trailing spaces; keep original spacing if you need
        if s is None:
            continue
        lines.append(s.rstrip("\n"))

    # parse into rows by splitting on whitespace (adjust split logic if columns are fixed-width or CSV-like)
    rows = [ln.strip().split() for ln in lines if ln.strip()]

    if not rows:
        return pd.DataFrame()  # empty

    df = pd.DataFrame(rows)

    if header:
        # use first non-empty row as header (convert to valid column names if needed)
        df.columns = df.iloc[0].astype(str).tolist()
        df = df.iloc[1:].reset_index(drop=True)

    return df


In [4]:
df = load_geometry_table_uproot("/disk/data1/lze/ljuign/biasing/data_test_long/layer_10/B_Off_Concrete_Gammas_K40_0.root", objname="geometryTable",header=False)


In [5]:
df

Unnamed: 0,0,1,2
0,world,7.99576e-19,G4_Galactic
1,ShieldRomanBox,64.9264,PE
2,ShieldRomanNeck,6.95077,PE
3,ShieldPEITube,380.576,PE
4,ShieldPEINeck,20.8523,PE
5,ShieldPbBox,13583.4,Pb
6,ShieldPbNeck,326.976,Pb
7,ShieldSSBox,3709.76,SS
8,ShieldSSNeck,56.475,SS
9,ShieldPEOBox,1801.2,PEext


### Second Part : Get the number of /run/beamOn

In [None]:
df = load_geometry_table_uproot("/disk/data1/lze/ljuign/biasing/data_test_long/layer_10/B_Off_Concrete_Gammas_K40_0.root", objname="runMacro",header=False)

In [7]:
df

Unnamed: 0,0,1,2,3,4,5,6,7
0,#,Macro,file,for,the,initialization,,
1,#,in,interactive,session,,,,
2,#Set,geometry,,,,,,
3,#,100,TESSERACT,with,virtual,detector,,
4,#,101,with,HeRALD,detector,(not,implemented,yet)
5,#,102,with,SPICE,detector,(not,implemented,yet)
6,#,200,Rock,,,,,
7,/geometry/type,100,,,,,,
8,/geometry/setByVersion,Hybrid_V4_5,,,,,,
9,/geometry/construct,,,,,,,


In [None]:
def get_runbeamon_number(df: pd.DataFrame) -> int:
    """
    Given a pandas DataFrame from a runMacro TMacro,
    find the number after '/run/beamOn'.
    """
    # search for the row where first column is '/run/beamOn'
    mask = df.iloc[:,0] == '/run/beamOn'
    if mask.any():
        # grab the second column of that row (index 1)
        val = df.loc[mask, df.columns[1]].iloc[0]
        return int(val)
    else:
        raise ValueError("No '/run/beamOn' command found in DataFrame")


In [9]:
beamon_number = get_runbeamon_number(df)
print("Number after /run/beamOn:", beamon_number)

Number after /run/beamOn: 1000000000


### Third part : Using the class and optimisation

In [3]:
from get_norm_param import GetNormParam

In [5]:
file = f"/disk/data1/lze/ljuign/run2_g4sim/Sim_Hybrid-v4.5/test_filtering/Rock_Gammas_K40_0_filtered.root"
macro = GetNormParam(
    file,
    objname="runMacro"
)
print("BeamOn number:", macro.beamon_number)
    
table = GetNormParam(
    file,
    objname="geometryTable"
)
print(table.df.head())


BeamOn number: 1000000000
                 0            1            2
0            world  7.99576e-19  G4_Galactic
1   ShieldRomanBox      64.9271           PE
2  ShieldRomanNeck      6.95077           PE
3    ShieldPEITube      380.575           PE
4    ShieldPEINeck      20.8523           PE


In [6]:
import pandas as pd
from collections import defaultdict

def build_total_mass_dict(df: pd.DataFrame) -> dict:
    """
    Build a dictionary where:
    - keys = material name (last column)
    - values = total mass (sum of the middle column)
    
    Assumes DataFrame structure:
    [name | mass | material]
    """
    if df.empty:
        return {}

    material_col = df.columns[-1]
    mass_col = df.columns[1]

    total_mass = defaultdict(float)
    for _, row in df.iterrows():
        try:
            mass = float(row[mass_col])
            material = str(row[material_col])
            total_mass[material] += mass
        except (ValueError, TypeError):
            continue

    return dict(total_mass)


In [3]:
file = f"/disk/data1/lze/ljuign/biasing/data_test_long/layer_10/B_Off_Concrete_Gammas_K40_0.root"

# For runMacro
macro_run = GetNormParam(file, objname="runMacro")
print("BeamOn number:", macro_run.beamon_number)  # returns number
print("Total mass:", macro_run.total_mass)        # None

# For geometryTable
macro_geom = GetNormParam(file, objname="geometryTable")
print("BeamOn number:", macro_geom.beamon_number) # None
print("Total mass:", macro_geom.total_mass)       # dict with total mass per material


BeamOn number: 1000000000
Total mass: None
BeamOn number: None
Total mass: {'G4_Galactic': 7.99576e-19, 'PE': 473.30547, 'Pb': 13989.6789, 'SS': 3766.235, 'PEext': 1892.4017000000001, 'Ti': 38.6849, 'Cu': 189.93327430000002, 'PureCu': 133.6185, 'SSi': 0.47812229999999994, 'LHe': 0.797336}
