In [40]:
import numpy as np
import os

from pathlib import Path

# change working directory to make src visible
os.chdir(Path.cwd().parent)

from src.dataset import SPDEventsDataset
from src.normalization import ConstraintsNormalizer, TrackParamsNormalizer

In [41]:
np.random.seed(42)

dataset = SPDEventsDataset(
    hits_normalizer=ConstraintsNormalizer(),
    track_params_normalizer=TrackParamsNormalizer(),
    shuffle=True
)

In [42]:
sample = dataset[0]

In [43]:
sample

{'hits': array([[ 0.59280544,  0.23586112,  0.81287491],
        [-0.27955427,  0.459404  , -0.98133676],
        [-0.2749876 , -0.64170251, -0.96161545],
        ...,
        [-0.7404729 ,  0.50909341,  0.17553872],
        [ 0.70824555, -0.11907849,  0.21250588],
        [ 0.25619556,  0.2771329 , -0.2644813 ]]),
 'hit_labels': array([-1, -1, -1, -1, -1,  4, -1,  2,  2,  3, -1, -1, -1, -1,  3, -1, -1,
         0, -1, -1, -1, -1,  1,  2, -1,  0, -1,  4, -1, -1, -1,  3,  1,  0,
        -1, -1, -1,  4,  2, -1,  3, -1, -1, -1, -1, -1, -1, -1, -1, -1,  2,
        -1, -1,  0, -1,  0, -1,  1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  0,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  3, -1,  3,  2,
        -1, -1,  3, -1, -1,  0, -1, -1, -1, -1,  0, -1,  1, -1,  2,  1, -1,
        -1,  3,  4,  1, -1, -1, -1,  0, -1, -1,  4, -1, -1, -1, -1,  0, -1,
        -1,  1,  0, -1, -1,  2, -1, -1,  3, -1, -1,  0, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1,  3, -1, -1,  0,  2, -1, -1, -1, -

In [44]:
sample["hits"].shape

(665, 3)

In [45]:
sample["hit_labels"].shape

(665,)

In [46]:
sample["param_labels"]

array([ 0,  1,  3,  2,  4, -1, -1, -1, -1, -1], dtype=int32)

In [47]:
np.round(sample["params"], 3)

array([[0.503, 0.499, 0.529, 0.156, 0.156, 0.845, 0.   , 1.   ],
       [0.503, 0.499, 0.529, 0.648, 0.001, 0.595, 1.   , 0.   ],
       [0.503, 0.499, 0.529, 0.215, 0.377, 0.874, 0.   , 1.   ],
       [0.503, 0.499, 0.529, 0.954, 0.915, 0.584, 1.   , 0.   ],
       [0.503, 0.499, 0.529, 0.974, 0.986, 0.37 , 1.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ],
       [0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ]],
      dtype=float32)

In [48]:
vectorized_denorm = np.vectorize(dataset.track_params_normalizer.denormalize)
orig_params = np.apply_along_axis(
    dataset.track_params_normalizer.denormalize, 
    axis=1, 
    arr=sample["params"][:5]
)
np.round(orig_params, 2)

array([[  4.97,  -1.38, 139.2 , 240.42,   0.98,   2.65,   1.  ],
       [  4.97,  -1.38, 139.2 , 682.92,   0.  ,   1.87,  -1.  ],
       [  4.97,  -1.38, 139.2 , 293.13,   2.37,   2.74,   1.  ],
       [  4.97,  -1.38, 139.2 , 958.54,   5.75,   1.83,  -1.  ],
       [  4.97,  -1.38, 139.2 , 976.96,   6.2 ,   1.16,  -1.  ]],
      dtype=float32)

In [49]:
dataset.track_params_normalizer = None
sample = dataset[0]
np.round(sample["params"], 2)

array([[  4.97,  -1.38, 139.2 , 240.42,   0.98,   2.65,   1.  ],
       [  4.97,  -1.38, 139.2 , 682.92,   0.  ,   1.87,  -1.  ],
       [  4.97,  -1.38, 139.2 , 293.13,   2.37,   2.74,   1.  ],
       [  4.97,  -1.38, 139.2 , 958.54,   5.75,   1.83,  -1.  ],
       [  4.97,  -1.38, 139.2 , 976.96,   6.2 ,   1.16,  -1.  ],
       [  0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ],
       [  0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ],
       [  0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ],
       [  0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ],
       [  0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ,   0.  ]],
      dtype=float32)

## Test dataloader

In [50]:
from torch.utils.data import DataLoader
from src.dataset import collate_fn

#### Without normalization for parameters

In [51]:
train_loader = DataLoader(dataset, batch_size=4, collate_fn=collate_fn)
batch = next(iter(train_loader))
[f"{k}: {v.shape}" for k, v in batch.items()]

['inputs: torch.Size([4, 2841, 3])',
 'mask: torch.Size([4, 2841])',
 'targets: torch.Size([4, 10, 7])']

In [52]:
event_idx = 3
np.unique(dataset[event_idx]["hit_labels"]).size - 1

6

In [53]:
np.unique(dataset[3]["hit_labels"][512:], return_counts=True)

(array([-1,  0,  1,  2,  3,  4,  5]),
 array([290,  15,  13,  15,   8,  14,  10]))

#### With normalized parameters

In [59]:
dataset = SPDEventsDataset(
    hits_normalizer=ConstraintsNormalizer(),
    track_params_normalizer=TrackParamsNormalizer(),
    shuffle=True
)

train_loader = DataLoader(dataset, batch_size=16, collate_fn=collate_fn)
batch = next(iter(train_loader))

[f"{k}: {v.shape}, {v.min(), v.mean(), v.max()}" for k, v in batch.items()]

['inputs: torch.Size([16, 2335, 3]), (tensor(-0.9999), tensor(-0.0005), tensor(0.9998))',
 'mask: torch.Size([16, 2335]), (tensor(0.), tensor(0.4012), tensor(1.))',
 'targets: torch.Size([16, 10, 8]), (tensor(0., dtype=torch.float64), tensor(0.2418, dtype=torch.float64), tensor(1., dtype=torch.float64))']