In [3]:
import numpy as np
from scipy.io import loadmat

In [4]:
# for systolic blood pressure only
def build_dataset_SBPLabel(Path, FieldName="Subsets"):
    data = loadmat(Path, squeeze_me=True, struct_as_record=False)
    subset = data[FieldName]

    # extracting fields
    Signals = subset.Signals            # shape: (N, signal_length)
    SBPLabels = subset.SBP              # shape: (N,)
    Age = subset.Age
    Gender = subset.Gender              # ['M', 'F', ...]
    Height = subset.Height
    Weight = subset.Weight

    # convert Gender into numeric: male=1, female=0
    Gender = np.array([1.0 if g == 'M' else 0.0 for g in Gender])

    # combine demographic info
    Demographics = np.column_stack([Age, Gender, Height, Weight])

    return Signals, SBPLabels, Demographics


In [5]:
# for diastolic blood pressure only
def build_dataset_DBPLabel(Path, FieldName="Subsets"):
    data = loadmat(Path, squeeze_me=True, struct_as_record=False)
    subset = data[FieldName]

    # extracting fields
    Signals = subset.Signals            # shape: (N, signal_length)
    DBPLabels = subset.DBP              
    Age = subset.Age
    Gender = subset.Gender              # ['M', 'F', ...]
    Height = subset.Height
    Weight = subset.Weight

    # convert Gender into numeric: male=1, female=0
    Gender = np.array([1.0 if g == 'M' else 0.0 for g in Gender])

    # combine demographic info
    Demographics = np.column_stack([Age, Gender, Height, Weight])

    return Signals, DBPLabels, Demographics

In [None]:
# Load and validate the datasets
import os
import pandas as pd

# Check if processed data directory exists and has MATLAB files
processed_dir = '../data/processed'
if not os.path.exists(processed_dir) or not os.listdir(processed_dir):
    print("‚ùå No processed data found. Please:")
    print("1. Run the data loader script to download PulseDB dataset")
    print("2. Place processed .mat files in data/processed/")
else:
    # Try to load data files
    mat_files = [f for f in os.listdir(processed_dir) if f.endswith('.mat')]
    print(f"üìÅ Found {len(mat_files)} MATLAB files: {mat_files}")
    
    if mat_files:
        # Process the first file as example
        file_path = os.path.join(processed_dir, mat_files[0])
        print(f"\nüîÑ Processing: {file_path}")
        
        try:
            # Load SBP dataset
            signals_sbp, sbp_labels, demographics_sbp = build_dataset_SBPLabel(file_path)
            print(f"‚úÖ SBP Dataset loaded:")
            print(f"   - Signals shape: {signals_sbp.shape}")
            print(f"   - SBP labels shape: {sbp_labels.shape}")
            print(f"   - Demographics shape: {demographics_sbp.shape}")
            print(f"   - SBP range: {sbp_labels.min():.1f} - {sbp_labels.max():.1f} mmHg")
            
            # Load DBP dataset  
            signals_dbp, dbp_labels, demographics_dbp = build_dataset_DBPLabel(file_path)
            print(f"‚úÖ DBP Dataset loaded:")
            print(f"   - Signals shape: {signals_dbp.shape}")
            print(f"   - DBP labels shape: {dbp_labels.shape}")
            print(f"   - Demographics shape: {demographics_dbp.shape}")
            print(f"   - DBP range: {dbp_labels.min():.1f} - {dbp_labels.max():.1f} mmHg")
            
            # Save processed data as numpy arrays for faster loading
            np.save(os.path.join(processed_dir, 'signals_sbp.npy'), signals_sbp)
            np.save(os.path.join(processed_dir, 'sbp_labels.npy'), sbp_labels)
            np.save(os.path.join(processed_dir, 'demographics_sbp.npy'), demographics_sbp)
            
            np.save(os.path.join(processed_dir, 'signals_dbp.npy'), signals_dbp)
            np.save(os.path.join(processed_dir, 'dbp_labels.npy'), dbp_labels)
            np.save(os.path.join(processed_dir, 'demographics_dbp.npy'), demographics_dbp)
            
            print(f"\nüíæ Processed data saved as .npy files for faster loading")
            
        except Exception as e:
            print(f"‚ùå Error processing data: {e}")
            print("Please check your MATLAB file structure and field names")
    else:
        print("‚ùå No .mat files found in processed directory")