In [2]:
from pathlib import Path
import sys
import os

# Get the project root by finding the directory containing README.md or going up from current location
# This works regardless of where the notebook is located
current_dir = Path(os.getcwd())
project_root = current_dir

# Try to find project root by looking for README.md or going up directories
while project_root != project_root.parent:
    if (project_root / 'README.md').exists() and (project_root / 'src').exists():
        break
    project_root = project_root.parent

# If we're in notebooks/scalogram_analyze/, we can also go up 2 levels
# But the above method is more robust
ROOT_PATH = project_root

# Add to path if not already there
if str(ROOT_PATH) not in sys.path:
    sys.path.insert(0, str(ROOT_PATH))

# Import necessary modules
import yaml
import numpy as np
import matplotlib.pyplot as plt


from src.data.dataset import OrionAEFrameDataset

In [4]:
with open(r"/home/serene/orion-ae-study/configs/dataset/example_1.yaml", "r") as f:
    dataset_config = yaml.safe_load(f)['dataset']
    
dataset_config['channels'] = ['A', 'B', 'C']

dataset = OrionAEFrameDataset(
    data_path = r"/home/serene/orion-ae-study/data/raw/segmented_cycles_positive_c1_l42373_c_A_B_C_D_20260107_205752",
    config=dataset_config,
    type="all",
)

In [15]:
def get_series_events(dataset, series_name):
    """
    Extract frame events for a specific series.
    
    Args:
        dataset: OrionAEFrameDataset instance
        series_name: Name of the series (e.g., 'B', 'C', 'D')
    
    Returns:
        numpy array with shape (3, number_of_frames) where:
            - events_array[0] = positive_peak events
            - events_array[1] = mid_zero events
            - events_array[2] = negative_peak events
    """
    # Filter dataset for specified series files
    series_indices = [i for i, serie in enumerate(dataset.file_series) if serie == series_name]
    
    # Collect all events from the series
    all_events = []  # Will be list of (num_frames, 3) arrays
    
    for file_idx in series_indices:
        file_path = dataset.file_paths[file_idx]
        events = dataset._load_events(file_path)
        if events is not None:
            all_events.append(events)
    
    # Stack all events: shape will be (total_frames, 3)
    if all_events:
        stacked_events = np.vstack(all_events)  # (total_frames, 3)
        
        # Transpose to get (event_type, number_of_frames)
        # Row 0: positive_peak, Row 1: mid_zero, Row 2: negative_peak
        events_array = stacked_events.T  # Shape: (3, total_frames)
    else:
        events_array = np.empty((3, 0), dtype=np.int32)
    
    return events_array


def get_multiple_series_events(dataset, series_names):
    """
    Get events for multiple series.
    
    Args:
        dataset: OrionAEFrameDataset instance
        series_names: List of series names (e.g., ['B', 'C', 'D'])
    
    Returns:
        Dictionary mapping series name to events array
    """
    return {series: get_series_events(dataset, series) for series in series_names}

In [16]:
events_dict = get_multiple_series_events(dataset, ['B', 'C', 'D', 'E', 'F'])

In [None]:
events_dict['B'][0].mean(), events_dict['B'][0].std()

(np.float64(10426.847850328337), np.float64(205.87225372653361))

In [None]:
events_dict['B'][1].mean(), events_dict['B'][1].std()

(np.float64(21150.97782183125), np.float64(375.4151243000588))