In [1]:
import sys, os

repo_root = os.path.abspath("../..")
sys.path.append(repo_root)

from MyClasses import PONE_Reader, I3FeatureExtractorPONE, I3TruthExtractorPONE



  from .autonotebook import tqdm as notebook_tqdm




In [2]:
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union, OrderedDict 
from graphnet.utilities.imports import has_icecube_package

if has_icecube_package() or TYPE_CHECKING:
    from icecube import icetray, dataclasses, dataio  # pyright: reportMissingImports=false

from graphnet.data.readers import (
    GraphNeTFileReader,
    I3Reader
)


from graphnet.data.extractors.icecube.utilities.i3_filters import (
    I3Filter,
    NullSplitI3Filter,
)

from graphnet.data.extractors.icecube import I3Extractor
from graphnet.data.dataclasses import I3FileSet
from graphnet.utilities.filesys import find_i3_files


In [3]:
if has_icecube_package():
    from icecube import icetray, dataio  # pyright: reportMissingImports=false


In [4]:
from graphnet.data.extractors.icecube.utilities.frames import frame_is_montecarlo, frame_is_noise

In [5]:
DATA_FOLDER_PATH = "/project/def-nahee/kbas/POM_Response_GZ"
GCD_PATH = "/project/6008051/pone_simulation/GCD_Library/PONE_800mGrid.i3.gz"

In [6]:
my_reader = PONE_Reader(gcd_rescue=GCD_PATH)

[1;34mgraphnet[0m [MainProcess] [32mINFO    [0m 2025-12-22 13:08:05 - NullSplitI3Filter.__init__ - Writing log to [1mlogs/graphnet_20251222-130805.log[0m


In [7]:
my_reader._accepted_extractors

[graphnet.data.extractors.icecube.i3extractor.I3Extractor]

In [8]:
my_reader._accepted_file_extensions

['.bz2', '.zst', '.gz']

In [9]:
my_reader._gcd_rescue

'/project/6008051/pone_simulation/GCD_Library/PONE_800mGrid.i3.gz'

In [10]:
my_reader._i3filters

[<graphnet.data.extractors.icecube.utilities.i3_filters.NullSplitI3Filter at 0x14dfd7e886e0>]

In [11]:
my_reader._extractors # this will be available after using the method "set_extractors"

AttributeError: 'PONE_Reader' object has no attribute '_extractors'

In [12]:
my_reader.accepted_file_extensions

['.bz2', '.zst', '.gz']

In [13]:
my_reader.accepted_extractors

[graphnet.data.extractors.icecube.i3extractor.I3Extractor]

In [14]:
my_reader.extractor_names # this will be available after using the method "set_extractors"

AttributeError: 'PONE_Reader' object has no attribute '_extractors'

In [15]:
extractors_wishlist = list((I3FeatureExtractorPONE(pulsemap="EventPulseSeries"), I3TruthExtractorPONE(mctree = 'I3MCTree_postprop')))
# extractors that I want to use in my reader

In [16]:
extractors_wishlist

[<MyClasses.feature_extractor.I3FeatureExtractorPONE at 0x14dfd7d51430>,
 <MyClasses.truth_extractor.I3TruthExtractorPONE at 0x14dfd7d535c0>]

In [17]:
my_reader.set_extractors(extractors_wishlist)

In [18]:
my_reader.extracor_names

['feature', 'truth']

In [19]:
my_reader._extractors

[<MyClasses.feature_extractor.I3FeatureExtractorPONE at 0x14dfd7d51430>,
 <MyClasses.truth_extractor.I3TruthExtractorPONE at 0x14dfd7d535c0>]

In [20]:
my_file_set = my_reader.find_files(path=DATA_FOLDER_PATH)

Assuming list of directories.


In [21]:
my_file_set[0]

I3FileSet(i3_file='/project/def-nahee/kbas/POM_Response_GZ/pom_response_batch_1718.i3.gz', gcd_file='/project/6008051/pone_simulation/GCD_Library/PONE_800mGrid.i3.gz')

In [22]:
my_reader.validate_files(my_file_set)

In [23]:
data = my_reader(my_file_set[0])

In [24]:
type(data)

list

In [25]:
len(data)
## there are 41 events inside the list, so this list has length 41
# check if really there is 41 events.

41

In [26]:
data[1]

OrderedDict([('feature',
              {'charge': [1.6013669967651367,
                1.2102833986282349,
                1.0727909803390503,
                1.7622349262237549,
                0.6926360130310059,
                1.0034797191619873,
                0.7345649600028992,
                0.6463330388069153,
                0.801712155342102,
                1.198893427848816,
                0.7883833646774292,
                1.1845169067382812,
                0.9603919386863708,
                0.9769912958145142,
                1.1960269212722778,
                0.5922125577926636,
                0.3390648663043976,
                0.6318301558494568,
                0.7753310799598694,
                1.4305673837661743,
                0.7918666005134583,
                0.705599308013916,
                0.9211395978927612,
                1.2947815656661987,
                1.2376039028167725,
                0.6728255748748779,
                0.70370495319366

In [27]:
data[1]['feature']

{'charge': [1.6013669967651367,
  1.2102833986282349,
  1.0727909803390503,
  1.7622349262237549,
  0.6926360130310059,
  1.0034797191619873,
  0.7345649600028992,
  0.6463330388069153,
  0.801712155342102,
  1.198893427848816,
  0.7883833646774292,
  1.1845169067382812,
  0.9603919386863708,
  0.9769912958145142,
  1.1960269212722778,
  0.5922125577926636,
  0.3390648663043976,
  0.6318301558494568,
  0.7753310799598694,
  1.4305673837661743,
  0.7918666005134583,
  0.705599308013916,
  0.9211395978927612,
  1.2947815656661987,
  1.2376039028167725,
  0.6728255748748779,
  0.7037049531936646,
  0.8631435632705688,
  1.1633893251419067,
  0.6639195680618286,
  1.1375095844268799,
  0.8611122965812683,
  1.2470426559448242,
  0.9867833256721497,
  1.1771883964538574,
  1.0718475580215454,
  1.1980832815170288,
  0.5326935052871704,
  1.0681331157684326,
  0.9747642874717712,
  0.9370624423027039,
  0.901410698890686,
  1.3349082469940186,
  0.8017259240150452,
  0.34896355867385864,
  1

In [28]:
data[1]['feature'].keys()

dict_keys(['charge', 'dom_time', 'width', 'dom_x', 'dom_y', 'dom_z', 'pmt_area', 'rde', 'is_bright_dom', 'is_bad_dom', 'is_saturated_dom', 'is_errata_dom', 'event_time', 'hlc', 'awtd', 'string', 'pmt_number', 'dom_number', 'dom_type'])

In [29]:
# is there a sanity check that checks for the length of the list for these keys?
# edit the feature extractor and the truth extractor

In [30]:
data[1]['feature']['charge']

[1.6013669967651367,
 1.2102833986282349,
 1.0727909803390503,
 1.7622349262237549,
 0.6926360130310059,
 1.0034797191619873,
 0.7345649600028992,
 0.6463330388069153,
 0.801712155342102,
 1.198893427848816,
 0.7883833646774292,
 1.1845169067382812,
 0.9603919386863708,
 0.9769912958145142,
 1.1960269212722778,
 0.5922125577926636,
 0.3390648663043976,
 0.6318301558494568,
 0.7753310799598694,
 1.4305673837661743,
 0.7918666005134583,
 0.705599308013916,
 0.9211395978927612,
 1.2947815656661987,
 1.2376039028167725,
 0.6728255748748779,
 0.7037049531936646,
 0.8631435632705688,
 1.1633893251419067,
 0.6639195680618286,
 1.1375095844268799,
 0.8611122965812683,
 1.2470426559448242,
 0.9867833256721497,
 1.1771883964538574,
 1.0718475580215454,
 1.1980832815170288,
 0.5326935052871704,
 1.0681331157684326,
 0.9747642874717712,
 0.9370624423027039,
 0.901410698890686,
 1.3349082469940186,
 0.8017259240150452,
 0.34896355867385864,
 1.4258298873901367,
 1.588365912437439,
 1.17406988143920

In [31]:
data[1]['feature']['width']

[nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan

In [32]:
data[1]['truth']

{'energy': 2106.3824388042412,
 'position_x': -433.89881101521024,
 'position_y': 493.7906947118704,
 'position_z': -177.69477779410101,
 'azimuth': 4.335359354673131,
 'zenith': 2.1008453598297985,
 'pid': 14,
 'event_time': 0,
 'sim_type': 'LeptonInjector',
 'interaction_type': 1,
 'elasticity': 0.47387602258873174,
 'RunID': 1718,
 'SubrunID': 4294967295,
 'EventID': 4,
 'SubEventID': 0,
 'dbang_decay_length': -1,
 'track_length': -1,
 'stopped_muon': -1,
 'energy_track': 998.2697849261949,
 'energy_cascade': 1108.1126538780463,
 'inelasticity': 0.5260738190103331,
 'DeepCoreFilter_13': -1,
 'CascadeFilter_13': -1,
 'MuonFilter_13': -1,
 'OnlineL2Filter_17': -1,
 'L3_oscNext_bool': -1,
 'L4_oscNext_bool': -1,
 'L5_oscNext_bool': -1,
 'L6_oscNext_bool': -1,
 'L7_oscNext_bool': -1,
 'is_starting': True}

In [33]:
## fix the TruthExtractor and use "exclude" method

In [35]:
my_reader.extractor_names == my_reader.extracor_names

True

In [1]:
converter = DataConverter(
    file_reader=reader,
    save_method=writer,
    extractors=extractors,
    outdir=OUTDIR,
    num_workers=1,
    index_column="event_no",
)

NameError: name 'DataConverter' is not defined