Author: Mario Roca

In [1]:
import numpy as np

## Explore npy structures (recursive)

In [2]:
def explore_structure(data, level=0):
    indent = "  " * level
    if isinstance(data, dict):
        print(f"{indent}Dict with keys: {list(data.keys())}")
        for key, value in data.items():
            print(f"{indent}Exploring key '{key}':")
            explore_structure(value, level + 1)
    elif isinstance(data, list):
        print(f"{indent}List of {len(data)} items")
        for i, item in enumerate(data[:3]):  # Limit to first 3 items for brevity
            print(f"{indent}Exploring list item {i}:")
            explore_structure(item, level + 1)
    elif isinstance(data, np.ndarray):
        print(f"{indent}Array with shape {data.shape} and dtype {data.dtype}")
        print(f"{indent}Content (first element): {data.ravel()[:4]}")  # Display first few elements
    else:
        print(f"{indent}{type(data)}: {data}")

In [None]:
# The data structure is a nested dictionary with the following hierarchy:
# 
# 1. The outermost dictionary has keys representing patient IDs or identifiers (e.g., '2107_1', '2126_1').
# 2. Each patient ID maps to a list containing a single dictionary.
# 3. This inner dictionary has keys representing different statuses or conditions (e.g., '10', '20', '16').
# 4. Each status key maps to a list containing a single NumPy array.
# 5. These NumPy arrays have shapes indicating the number of samples and channels (e.g., (1249, 4) means 1249 samples and 4 channels).
# 6. The arrays contain EEG signal data with dtype float64.
# 
# Example structure:
# {
#     '2107_1': [
#         {
#             '10': [array with shape (1249, 4)],
#             '20': [array with shape (389, 4)],
#             '16': [array with shape (6366, 4)],
#             ...
#         }
#     ],
#     '2126_1': [
#         {
#             '10': [array with shape (1260, 4)],
#             '20': [array with shape (559, 4)],
#             '15': [array with shape (6161, 4)],
#             ...
#         }
#     ],
#     ...
# }
# 
# Usage:
# - The outermost dictionary allows access to data for each patient.
# - For each patient, the list contains a dictionary with different statuses.
# - Each status key maps to a list containing a NumPy array of EEG signal data.
# - The arrays can be accessed and processed for feature extraction, analysis, or modeling.

In [4]:
# Load the EEG signals file
eeg_signals = np.load("Data/eeg_signals.npy", allow_pickle=True).item()
# Usage
explore_structure(eeg_signals)

Dict with keys: ['2107_1', '2126_1', '2104_2', '2123_1', '2124_2', '2113_1', '2122_1', '2110_1', '2121_1', '2104_1', '2114_1', '2131_2', '2120_1', '2112_1', '2115_1', '2117_1', '2119_1', '2130_1', '2131_1', '2109_1', '2129_1', '2116_1', '2134_1', '2133_1', '2111_1', '2108_1', '2132_1', '2127_1', '2106_1', '2124_1', '2125_1', '2136_1']
Exploring key '2107_1':
  List of 1 items
  Exploring list item 0:
    Dict with keys: ['10', '20', '16', '26', '10_1', '20_2', '15', '25', '10_3', '20_4', '14', '24', '10_5', '20_6', '13', '23', '10_7', '20_8', '12', '22', '10_9', '20_10', '11']
    Exploring key '10':
      List of 1 items
      Exploring list item 0:
        Array with shape (1249, 4) and dtype float64
        Content (first element): [862.96583166 832.80086522 901.64134202 853.19858563]
    Exploring key '20':
      List of 1 items
      Exploring list item 0:
        Array with shape (389, 4) and dtype float64
        Content (first element): [835.43051946 825.63288986 860.68306938 8

In [None]:
# The data structure is a nested dictionary with the following hierarchy:
# 
# 1. The outermost dictionary has keys representing patient IDs or identifiers (e.g., '2107_1', '2126_1').
# 2. Each patient ID maps to a list containing a single dictionary.
# 3. This inner dictionary has keys representing different statuses or conditions (e.g., '10', '20', '16').
# 4. Each status key maps to a list containing a single NumPy array.
# 5. These NumPy arrays have shapes indicating the number of samples and channels (e.g., (31311, 1) means 31311 samples and 1 channel).
# 6. The arrays contain ECG signal data with dtype uint16.
# 
# Example structure:
# {
#     '2107_1': [
#         {
#             '10': [array with shape (31311, 1)],
#             '20': [array with shape (9702, 1)],
#             '16': [array with shape (159264, 1)],
#             ...
#         }
#     ],
#     '2126_1': [
#         {
#             '10': [array with shape (31500, 1)],
#             '20': [array with shape (13986, 1)],
#             '15': [array with shape (153972, 1)],
#             ...
#         }
#     ],
#     ...
# }
# 
# Usage:
# - The outermost dictionary allows access to data for each patient.
# - For each patient, the list contains a dictionary with different statuses.
# - Each status key maps to a list containing a NumPy array of ECG signal data.
# - The arrays can be accessed and processed for feature extraction, analysis, or modeling.

In [5]:
# Load the ECG signals file
ecg_signals = np.load("Data/ecg_signals.npy", allow_pickle=True).item()
# Usage
explore_structure(ecg_signals)

Dict with keys: ['2107_1', '2126_1', '2123_1', '2124_2', '2113_1', '2122_1', '2110_1', '2121_1', '2104_1', '2114_1', '2131_2', '2120_1', '2112_1', '2115_1', '2117_1', '2119_1', '2130_1', '2131_1', '2109_1', '2129_1', '2116_1', '2134_1', '2133_1', '2105_1', '2111_1', '2108_1', '2132_1', '2127_1', '2106_1', '2124_1', '2125_1', '2136_1']
Exploring key '2107_1':
  List of 1 items
  Exploring list item 0:
    Dict with keys: ['10', '20', '16', '26', '10_1', '20_2', '15', '25', '10_3', '20_4', '14', '24', '10_5', '20_6', '13', '23', '10_7', '20_8', '12', '22', '10_9', '20_10', '11']
    Exploring key '10':
      List of 1 items
      Exploring list item 0:
        Array with shape (31311, 1) and dtype uint16
        Content (first element): [492 492 492 492]
    Exploring key '20':
      List of 1 items
      Exploring list item 0:
        Array with shape (9702, 1) and dtype uint16
        Content (first element): [493 492 492 493]
    Exploring key '16':
      List of 1 items
      Explori

In [10]:
ecg_signals['2107_1'][0]['10'][0]

array([[492],
       [492],
       [492],
       ...,
       [494],
       [494],
       [493]], dtype=uint16)