In [1]:
import pandas as pd
import sys, os
import numpy as np

sys.path.insert(0, "/home/storage/hans/jax_reco_new")

In [2]:
from lib.simdata_i3 import I3SimHandler

In [3]:
# Get an IceCube event.

event_index = 0
bp = '/home/storage2/hans/i3files/21217'
sim_handler = I3SimHandler(os.path.join(bp, 'meta_ds_21217_from_35000_to_53530.ftr'),
                              os.path.join(bp, 'pulses_ds_21217_from_35000_to_53530.ftr'),
                              '/home/storage/hans/jax_reco/data/icecube/detector_geometry.csv')

meta, pulses = sim_handler.get_event_data(event_index)
print(f"muon energy: {meta['muon_energy_at_detector']/1.e3:.1f} TeV")

muon energy: 2.1 TeV


In [119]:
import itertools

def padding(row, n_pulses):
    pad_vals = [0.0] * (n_pulses - len(row))
    return [x for x in itertools.chain(row, pad_vals)]
    
def get_per_dom_summary_extended(meta: pd.DataFrame, 
                                 pulses: pd.DataFrame, 
                                 geo: pd.DataFrame,
                                 n_pulses: int) -> np.ndarray:
    
    pulses_sorted = pulses.sort_values(["sensor_id", "time"]).groupby("sensor_id").head(n_pulses)
    sensors = pulses_sorted['sensor_id'].unique()
    dom_locations = sim_handler.geo.iloc[sensors][["x", "y", "z"]].to_numpy()
    df = pulses_sorted[['sensor_id', 'time', 'charge']].groupby('sensor_id').agg(list).reset_index()

    padded_time = df['time'].apply(lambda row: padding(row, n_pulses)).explode().to_numpy()
    padded_time = np.array(padded_time.reshape((len(sensors), n_pulses))).astype(float)

    padded_charge = df['charge'].apply(lambda row: padding(row, n_pulses)).explode().to_numpy()
    padded_charge = np.array(padded_charge.reshape((len(sensors), n_pulses))).astype(float)

    return np.concatenate([dom_locations, padded_time, padded_charge], axis=1)
    

In [122]:
data = get_per_dom_summary_extended(meta, pulses, sim_handler.geo, 4)

In [123]:
print(data.shape)

(29, 11)


In [39]:
print(pulses)

        event_id  sensor_id     time  charge  is_HLC
0   2.121703e+09     1436.0  11191.0   0.975     1.0
1   2.121703e+09     1438.0   9955.0   0.875     1.0
2   2.121703e+09     1438.0  11212.0   0.975     1.0
3   2.121703e+09     1439.0   9887.0   1.025     1.0
4   2.121703e+09     1439.0  10634.0   0.875     1.0
5   2.121703e+09     1974.0  10276.0   1.025     0.0
6   2.121703e+09     2036.0  10153.0   0.875     1.0
7   2.121703e+09     2037.0  10744.0   1.125     1.0
8   2.121703e+09     2039.0  10150.0   0.725     1.0
9   2.121703e+09     2569.0  10908.0   0.425     1.0
10  2.121703e+09     2569.0  11079.0   1.275     1.0
11  2.121703e+09     2570.0  10760.0   0.825     1.0
12  2.121703e+09     2624.0  11195.0   1.025     0.0
13  2.121703e+09     2628.0  10741.0   1.275     1.0
14  2.121703e+09     2628.0  11386.0   0.875     1.0
15  2.121703e+09     2629.0  10592.0   1.825     1.0
16  2.121703e+09     2629.0  10596.0   1.525     1.0
17  2.121703e+09     2629.0  10604.0   0.675  

In [110]:
n_pulses = 2
pulses_sorted = pulses.sort_values(["sensor_id", "time"]).groupby("sensor_id").head(n_pulses)

In [111]:
sensors = pulses_sorted['sensor_id'].unique()

In [112]:
dom_locations = sim_handler.geo.iloc[sensors][["x", "y", "z"]].to_numpy()

In [113]:
df = pulses_sorted[['sensor_id', 'time', 'charge']].groupby('sensor_id').agg(list).reset_index()

In [115]:
import itertools

def padding(row, n_pulses):
    pad_vals = [0.0] * (n_pulses - len(row))
    return [x for x in itertools.chain(row, pad_vals)]
    

In [99]:
padded_time = df['time'].apply(lambda row: padding(row, n_pulses)).explode().to_numpy()
padded_time = np.array(padded_time.reshape((len(sensors), n_pulses))).astype(float)


In [100]:
padded_charge = df['charge'].apply(lambda row: padding(row, n_pulses)).explode().to_numpy()
padded_charge = np.array(padded_charge.reshape((len(sensors), n_pulses))).astype(float)

In [101]:
new_data

array([[-245.65, -190.49, -451.07],
       [-245.65, -190.49, -484.99],
       [-245.65, -190.49, -502.01],
       [-324.39,  -93.43, -418.99],
       [-200.55,  -74.03, -429.74],
       [-200.55,  -74.03, -446.76],
       [-200.55,  -74.03, -480.8 ],
       [-279.53,   23.17, -331.41],
       [-279.53,   23.17, -348.43],
       [-156.23,   43.37, -248.08],
       [-156.23,   43.37, -316.16],
       [-156.23,   43.37, -333.18],
       [-156.23,   43.37, -350.2 ],
       [-156.23,   43.37, -367.22],
       [-156.23,   43.37, -384.24],
       [ -32.96,   62.44, -304.16],
       [-111.51,  159.98, -231.36],
       [  11.87,  179.19, -178.49],
       [  11.87,  179.19, -195.51],
       [  54.26,  292.97,  -54.15],
       [  54.26,  292.97, -105.21],
       [  54.26,  292.97, -122.23],
       [  54.26,  292.97, -139.25],
       [ 174.47,  315.54,   19.14],
       [ -21.97,  393.24,   40.25],
       [ 101.04,  412.79,   27.83],
       [ 101.04,  412.79,   -6.21],
       [ 101.04,  412.79,  -

In [102]:
padded_charge

array([[0.97500002, 0.        , 0.        , 0.        ],
       [0.875     , 0.97500002, 0.        , 0.        ],
       [1.02499998, 0.875     , 0.        , 0.        ],
       [1.02499998, 0.        , 0.        , 0.        ],
       [0.875     , 0.        , 0.        , 0.        ],
       [1.125     , 0.        , 0.        , 0.        ],
       [0.72500002, 0.        , 0.        , 0.        ],
       [0.42500001, 1.27499998, 0.        , 0.        ],
       [0.82499999, 0.        , 0.        , 0.        ],
       [1.02499998, 0.        , 0.        , 0.        ],
       [1.27499998, 0.875     , 0.        , 0.        ],
       [1.82500005, 1.52499998, 0.67500001, 0.        ],
       [1.52499998, 3.07500005, 1.625     , 1.92499995],
       [1.07500005, 2.2750001 , 0.92500001, 0.32499999],
       [0.97500002, 0.47499999, 0.        , 0.        ],
       [0.52499998, 0.        , 0.        , 0.        ],
       [1.27499998, 0.        , 0.        , 0.        ],
       [0.77499998, 0.        ,

In [103]:
data_arr = np.concatenate([dom_locations, padded_time, padded_charge], axis=1)

In [104]:
data_arr.shape

(29, 11)

In [114]:
df['charge']

0                         [0.9750000238418579]
1                  [0.875, 0.9750000238418579]
2                   [1.024999976158142, 0.875]
3                          [1.024999976158142]
4                                      [0.875]
5                                      [1.125]
6                         [0.7250000238418579]
7     [0.42500001192092896, 1.274999976158142]
8                          [0.824999988079071]
9                          [1.024999976158142]
10                  [1.274999976158142, 0.875]
11     [1.8250000476837158, 1.524999976158142]
12      [1.524999976158142, 3.075000047683716]
13    [1.0750000476837158, 2.2750000953674316]
14    [0.9750000238418579, 0.4749999940395355]
15                        [0.5249999761581421]
16                         [1.274999976158142]
17                        [0.7749999761581421]
18                        [1.3250000476837158]
19                                     [0.375]
20     [1.9249999523162842, 0.824999988079071]
21           

In [116]:
 df['time'].apply(lambda row: padding(row, n_pulses)).explode()

0     11191.0
0         0.0
1      9955.0
1     11212.0
2      9887.0
2     10634.0
3     10276.0
3         0.0
4     10153.0
4         0.0
5     10744.0
5         0.0
6     10150.0
6         0.0
7     10908.0
7     11079.0
8     10760.0
8         0.0
9     11195.0
9         0.0
10    10741.0
10    11386.0
11    10592.0
11    10596.0
12    10535.0
12    10555.0
13    10529.0
13    10530.0
14    10826.0
14    11017.0
15    11231.0
15        0.0
16    11605.0
16        0.0
17    11605.0
17        0.0
18    11528.0
18        0.0
19    12409.0
19        0.0
20    11896.0
20    11903.0
21    11952.0
21        0.0
22    12015.0
22        0.0
23    13220.0
23        0.0
24    13935.0
24        0.0
25    12888.0
25        0.0
26    12555.0
26    13291.0
27    13247.0
27        0.0
28    13107.0
28        0.0
Name: time, dtype: object

In [4]:
event_data = sim_handler.get_per_dom_summary_extended_from_index(0, n_pulses=2)

In [5]:
print(event_data.shape)

(29, 7)
