In [None]:
from pathlib import Path
import sys

# More reliable: get the project root from the notebook's location
ROOT_PATH = Path(__file__).parent.parent if '__file__' in globals() else Path.cwd().parent
# Or even better for notebooks:
ROOT_PATH = Path().resolve().parent  # Goes up from notebooks/ folder

sys.path.append(str(ROOT_PATH))

from src.data.dataset import OrionAEFrameDataset
from src.data.transforms import (
    BaseTransform,
    FilterPipeline,
    NormPipeline,
    PreprocessingPipeline,
    HighPassFilter,
    MinMaxNorm,
    ZScoreNorm,
)

In [None]:
prepprocess_pipeline = PreprocessingPipeline(
    filters=[HighPassFilter(cutoff=100, fs=5e6, order=5)],
    norms=[
        ZScoreNorm(
            mean=[0.51982021, 1.78221031, 0.32854723], 
            std=[4.81500911, 6.92255416, 8.82353942]
        )
    ]
)

train_set = OrionAEFrameDataset(
    data_path=r"C:\Users\nguye\Documents\GitHub\orion-ae-study\data\raw\segmented_ms_30_0_o_0_00_c_A_B_C_D_20251213_092549",
    config_path=r"C:\Users\nguye\Documents\GitHub\orion-ae-study\configs\dataset\example_1.yaml",
    type="train",
    preprocessing_pipeline=prepprocess_pipeline
)

In [9]:
train_set[4]

{'raw': array([[ 10.925627 ,  12.024293 ,  13.122959 , ...,  -6.591998 ,
          -6.591998 ,  -8.7893305],
        [  7.56859  ,   9.7659235,   6.530961 , ...,  -9.82696  ,
         -12.024293 , -12.024293 ],
        [ -8.728293 ,  -8.728293 ,  -8.728293 , ...,  -9.82696  ,
          -9.82696  ,  -7.629627 ]], shape=(3, 150000), dtype=float32),
 'preprocessed': array([[ 2.16065745,  2.38786382,  2.61497761, ..., -1.63937568,
         -1.63874516, -2.0943726 ],
        [ 0.83565126,  1.15255877,  0.68477266, ..., -2.02347659,
         -2.3401187 , -2.33927987],
        [-1.02623966, -1.02583748, -1.02543538, ..., -1.4355139 ,
         -1.43501698, -1.18554006]], shape=(3, 150000)),
 'features': {},
 'label': 0}