In [1]:
import sys, os, glob, re

repo_root = os.path.abspath("..")
sys.path.append(repo_root)

from MyClasses.reader import PONE_Reader
from MyClasses.feature_extractor import I3FeatureExtractorPONE
from MyClasses.truth_extractor import I3TruthExtractorPONE

  from .autonotebook import tqdm as notebook_tqdm




In [2]:
INPUT_GLOB = "/project/def-nahee/kbas/POM_Response_GZ/*.i3.gz"
OUTDIR     = "/project/def-nahee/kbas/POM_Response_Parquet"
GCD_RESCUE = "/project/6008051/pone_simulation/GCD_Library/PONE_800mGrid.i3.gz"


In [3]:
def batch_id_from_i3(path):
    m = re.search(r"batch_(\d+)\.i3\.gz$", os.path.basename(path))
    return int(m.group(1)) if m else None

def batch_ids_in_outdir(outdir):
    # outdir içinde "batch_1234" geçen her şeyi tara
    candidates = glob.glob(os.path.join(outdir, "**", "*"), recursive=True)
    ids = set()
    for p in candidates:
        m = re.search(r"batch_(\d+)", os.path.basename(p))
        if m:
            ids.add(int(m.group(1)))
    return ids

In [4]:
all_files = sorted(glob.glob(INPUT_GLOB))
done_ids  = batch_ids_in_outdir(OUTDIR)

In [5]:
todo = []
for f in all_files:
    bid = batch_id_from_i3(f)
    if bid is None:
        continue
    if bid not in done_ids:
        todo.append(f)

In [6]:
todo = todo[3:] # '/project/def-nahee/kbas/POM_Response_GZ/pom_response_batch_1052.i3.gz' is problematic
# '/project/def-nahee/kbas/POM_Response_GZ/pom_response_batch_1060.i3.gz'

In [7]:
print("Total i3:", len(all_files))
print("Done batches:", len(done_ids))
print("Todo i3:", len(todo))
print("First 5 todo:", todo[:5])

Total i3: 4996
Done batches: 91
Todo i3: 4902
First 5 todo: ['/project/def-nahee/kbas/POM_Response_GZ/pom_response_batch_1086.i3.gz', '/project/def-nahee/kbas/POM_Response_GZ/pom_response_batch_1087.i3.gz', '/project/def-nahee/kbas/POM_Response_GZ/pom_response_batch_1088.i3.gz', '/project/def-nahee/kbas/POM_Response_GZ/pom_response_batch_1089.i3.gz', '/project/def-nahee/kbas/POM_Response_GZ/pom_response_batch_109.i3.gz']


In [8]:
from graphnet.data.dataconverter import DataConverter


In [9]:
from graphnet.data.dataconverter import DataConverter
from graphnet.data.writers import ParquetWriter
from graphnet.data.extractors.icecube.utilities.i3_filters import NullSplitI3Filter

In [10]:
reader = PONE_Reader(
    gcd_rescue=GCD_RESCUE,
    i3_filters=NullSplitI3Filter(),
)

[1;34mgraphnet[0m [MainProcess] [32mINFO    [0m 2026-01-11 15:49:49 - NullSplitI3Filter.__init__ - Writing log to [1mlogs/graphnet_20260111-154949.log[0m


In [11]:
reader = PONE_Reader(
    gcd_rescue=GCD_RESCUE,
    i3_filters=NullSplitI3Filter(),
)

In [12]:
extractors = [
    I3FeatureExtractorPONE(pulsemap="EventPulseSeries", name="features", exclude=['pmt_area', 'rde', 'width', 'event_time', 'is_bright_dom', 'is_saturated_dom', 'is_errata_dom', 'is_bad_dom', 'hlc','awtd','string', 'pmt_number', 'dom_number', 'dom_type']),
    I3TruthExtractorPONE(mctree="I3MCTree_postprop", name="truth", exclude=['L7_oscNext_bool', 'L6_oscNext_bool',
                                                               'L5_oscNext_bool', 'L4_oscNext_bool',
                                                               'L3_oscNext_bool',
                                                               'OnlineL2Filter_17','MuonFilter_13',
                                                               'CascadeFilter_13','DeepCoreFilter_13', 
                                                                'dbang_decay_length', 'track_length', 'stopped_muon', 'energy_track', 'energy_cascade', 'inelasticity', 'is_starting'] ),

]


In [13]:
writer = ParquetWriter(truth_table="truth", index_column="event_no")

In [14]:
converter = DataConverter(
    file_reader=reader,
    save_method=writer,  
    extractors=extractors,
    outdir=OUTDIR,
    num_workers=1,  #4?
    index_column="event_no",
)

In [None]:
converter(input_dir=todo)

Assuming list of files.
[1;34mgraphnet[0m [MainProcess] [32mINFO    [0m 2026-01-11 15:49:49 - DataConverter.__call__ - Processing 4902 file(s) in main thread(not multiprocessing)[0m


  1%|[32m▎                               [0m| 57/4902 [08:16<12:02:18,  8.94s/ file(s)][0m

In [None]:
print("DONE:", OUTDIR)

In [None]:
merged oncesi yeni columnlari ekle. angle ve loc of pmt

In [None]:
MERGED_DIR = os.path.join(OUTDIR, "merged")


In [None]:
writer.merge_files(
    files=[],                
    output_dir=MERGED_DIR,   # <-- burasi OUTDIR/merged olacak
    events_per_batch=200000, # istersen 50000 yap
    num_workers=1,
)