In [None]:
import sys, os, glob, re

repo_root = os.path.abspath("..")
sys.path.append(repo_root)

from MyClasses.reader import PONE_Reader
from MyClasses.feature_extractor import I3FeatureExtractorPONE
from MyClasses.truth_extractor import I3TruthExtractorPONE

In [None]:
INPUT_GLOB = "/project/def-nahee/kbas/POM_Response_GZ/*.i3.gz"
OUTDIR     = "/project/def-nahee/kbas/POM_Response_Parquet"
GCD_RESCUE = "/project/6008051/pone_simulation/GCD_Library/PONE_800mGrid.i3.gz"


In [None]:
def batch_id_from_i3(path):
    m = re.search(r"batch_(\d+)\.i3\.gz$", os.path.basename(path))
    return int(m.group(1)) if m else None

def batch_ids_in_outdir(outdir):
    # outdir içinde "batch_1234" geçen her şeyi tara
    candidates = glob.glob(os.path.join(outdir, "**", "*"), recursive=True)
    ids = set()
    for p in candidates:
        m = re.search(r"batch_(\d+)", os.path.basename(p))
        if m:
            ids.add(int(m.group(1)))
    return ids

In [None]:
all_files = sorted(glob.glob(INPUT_GLOB))
done_ids  = batch_ids_in_outdir(OUTDIR)

In [None]:
todo = []
for f in all_files:
    bid = batch_id_from_i3(f)
    if bid is None:
        continue
    if bid not in done_ids:
        todo.append(f)

In [None]:
print("Total i3:", len(all_files))
print("Done batches:", len(done_ids))
print("Todo i3:", len(todo))
print("First 5 todo:", todo[:5])

In [None]:
from graphnet.data.dataconverter import DataConverter


In [None]:
from graphnet.data.dataconverter import DataConverter
from graphnet.data.writers import ParquetWriter
from graphnet.data.extractors.icecube.utilities.i3_filters import NullSplitI3Filter

In [None]:
reader = PONE_Reader(
    gcd_rescue=GCD_RESCUE,
    i3_filters=NullSplitI3Filter(),
)

In [None]:
reader = PONE_Reader(
    gcd_rescue=GCD_RESCUE,
    i3_filters=NullSplitI3Filter(),
)

In [None]:
extractors = [
    I3FeatureExtractorPONE(pulsemap="EventPulseSeries", name="features", exclude=['pmt_area', 'rde', 'width', 'event_time', 'is_bright_dom', 'is_saturated_dom', 'is_errata_dom', 'is_bad_dom', 'hlc','awtd', 'dom_type']),
    I3TruthExtractorPONE(mctree="I3MCTree_postprop", name="truth", exclude=['L7_oscNext_bool', 'L6_oscNext_bool',
                                                               'L5_oscNext_bool', 'L4_oscNext_bool',
                                                               'L3_oscNext_bool',
                                                               'OnlineL2Filter_17','MuonFilter_13',
                                                               'CascadeFilter_13','DeepCoreFilter_13', 
                                                                'dbang_decay_length', 'track_length', 'stopped_muon', 'energy_track', 'energy_cascade', 'inelasticity', 'is_starting'] ),

]


# incude pmt locs and truth per  pulse being noise or not

In [None]:
writer = ParquetWriter(truth_table="truth", index_column="event_no")

In [None]:
converter = DataConverter(
    file_reader=reader,
    save_method=writer,  
    extractors=extractors,
    outdir=OUTDIR,
    num_workers=1,  #4?
    index_column="event_no",
)

In [None]:
import traceback

for b in todo:  # örn: batch klasörleri / dosyaları listesi
    try:
        converter(input_dir=b)
    except Exception as e:
        print(f"[SKIP] Batch failed: {b}\n  -> {e}")
        # print(traceback.format_exc())  
        continue


In [None]:
# converter(input_dir=todo)

In [None]:
#

In [None]:
print("DONE:", OUTDIR)

In [None]:
merged oncesi yeni columnlari ekle. angle ve loc of pmt

In [None]:
MERGED_DIR = os.path.join(OUTDIR, "merged")


In [None]:
writer.merge_files(
    files=[],                
    output_dir=MERGED_DIR,   # <-- burasi OUTDIR/merged olacak
    events_per_batch=200000, # istersen 50000 yap
    num_workers=1,
)