In [7]:
import ROOT
from collections import defaultdict
import pandas as pd

In [8]:
treepath = "fmmtrackerana/RC_Tree"
filepath = "rootfiles_monopoles/Combined_25370_filtered_files_21039_52306_fastmonopole_data.root"

In [9]:
source_df = ROOT.RDataFrame(treepath, filepath)

In [10]:
source_df.GetColumnNames()

vector<string>{ "ADC_tot", "Chi2_per_ndf", "Chisqr_TN", "Chisqr_TS", "GeV_tot", "MIP_tot", "Run", "Tf", "Ti", "Xf", "Xi", "Yf", "Yi", "Zf", "Zi", "adc_W", "docasqrx", "docasqrx_cells_on_track", "docasqrx_max", "docasqrx_min", "docasqry", "docasqry_cells_on_track", "docasqry_max", "docasqry_min", "event", "lxsqr", "lysqr", "max_gap", "miss_plane", "nhits_sat", "nxhits", "nyhits", "p_cross", "p_overlap", "subRun", "tSat_mean", "tSat_rms", "t_max", "t_min", "tfit_mean", "tfit_rms", "tnonSat_mean", "tnonSat_rms", "tns_max", "tns_mean", "tns_min", "tns_rms", "vSat_err", "vnonSat_err", "vrc_fit", "vrc_nonsat", "vrc_sat", "x_tmax", "x_tmin", "y_tmax", "y_tmin", "z_tmax", "z_tmin" }

In [None]:
"""
For monopole data we need number of events equivalent to one year,
each subRun of detector runs for 160
thus, no of subruns needed are 197230 in total
we took runs from 21039 to 25366 for as cosmic background data.

So, for monopole data we take after run 27000, upto 1 year equivalent.
ANs 27000 - 31340
"""

start_run = 27000  # <-- Change this to your desired starting run number

rdf = ROOT.RDataFrame(treepath, filepath)

# Step 1: Extract small dataframe with just Run and subRun
df = rdf.AsNumpy(columns=["Run", "subRun"])
df = pd.DataFrame(df)

# Step 2: Drop duplicates to get unique (Run, subRun) pairs
unique_pairs = df.drop_duplicates(subset=["Run", "subRun"])

# Step 3: Count subRuns per Run
counts = unique_pairs.groupby("Run").size().sort_index()

# Step 4: Filter only runs >= start_run
filtered_counts = counts[counts.index >= start_run]

# Step 5: Accumulate until target ≈ 197230
total = 0
for run, n in filtered_counts.items():
    total += n
    if total >= 197230:
        max_run = run
        break

print(f"Use Runs from {start_run} to {max_run} with total subRuns: {total}")




In [11]:
max_run = 31340
# Step 5: Filter events and Snapshot
source_df = source_df.Filter(f"Run >= {start_run}")
source_df = source_df.Filter(f"Run <= {max_run}")

In [12]:
source_df.Snapshot("MonopoleTree", "monopoleData.root", ["Xi","Xf", "Yi", "Yf", "Zi", "Zf", "Ti", "Tf", "GeV_tot", "nxhits", "nyhits", "ADC_tot", "vrc_fit", "p_cross", "docasqrx", "docasqrx_max", "docasqrx_min", "docasqry", "docasqry_max", "docasqry_min"])

<cppyy.gbl.ROOT.RDF.RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager,void> > object at 0x5d3af9bee650>