In [1]:
import h5py
import numpy as np
from scipy.signal import welch
from tqdm import tqdm


def compute_psd(data_chunk, fs=500, nperseg=256):
    """
    Compute PSD features for a chunk of EEG data.

    Parameters:
    - data_chunk: numpy array of shape (num_samples, num_channels, signal_length)
    - fs: Sampling frequency
    - nperseg: Length of each segment for Welch's method

    Returns:
    - psd_features: numpy array of shape (num_samples, num_channels, num_freq_bins)
    """
    num_samples, num_channels, _ = data_chunk.shape
    # Determine the number of frequency bins from a single PSD computation
    _, Pxx_sample = welch(data_chunk[0, 0], fs=fs, nperseg=nperseg)
    num_freq_bins = len(Pxx_sample)

    # Pre-allocate PSD features array  , nperseg=128, noverlap=64)
    psd_features = np.zeros((num_samples, num_channels, num_freq_bins), dtype=np.float32)

    # Compute PSD for each sample and channel
    for i in range(num_samples):
        for j in range(num_channels):
            _, Pxx = welch(data_chunk[i, j], fs=fs, nperseg=nperseg)
            psd_features[i, j] = Pxx

    return psd_features


def process_and_save_psd(input_h5_file, output_h5_file, fs=500, nperseg=256, chunk_size=1000):
    """
    Process the EEG data to compute PSD features and save them to a new HDF5 file.

    Parameters:
    - input_h5_file: Path to the input HDF5 file containing the raw EEG data
    - output_h5_file: Path to the output HDF5 file to save the PSD features
    - fs: Sampling frequency
    - nperseg: Length of each segment for Welch's method
    - chunk_size: Number of samples to process at once
    """
    # Open the original HDF5 file in read mode
    with h5py.File(input_h5_file, 'r') as h5_orig:
        data = h5_orig['data']  # Shape: (num_samples, num_channels, signal_length)
        labels = h5_orig['labels'][:]
        sessions = h5_orig['sessions'][:]
        num_samples, num_channels, signal_length = data.shape

        # Determine the number of frequency bins from a single PSD computation
        _, Pxx_sample = welch(data[0, 0], fs=fs, nperseg=nperseg)
        num_freq_bins = len(Pxx_sample)

        # Create a new HDF5 file to store PSD features
        with h5py.File(output_h5_file, 'w') as h5_new:
            # Copy 'labels' and 'sessions' datasets
            h5_new.create_dataset('labels', data=labels, dtype=labels.dtype)
            h5_new.create_dataset('sessions', data=sessions, dtype=sessions.dtype)

            # Create 'data' dataset for PSD features with appropriate shape
            psd_shape = (num_samples, num_channels, num_freq_bins)
            psd_dataset = h5_new.create_dataset('data', shape=psd_shape, dtype=np.float32, chunks=True)

            # Process data in chunks
            for start in tqdm(range(0, num_samples, chunk_size), desc='Processing PSD'):
                end = min(start + chunk_size, num_samples)
                data_chunk = data[start:end]  # Shape: (chunk_size, num_channels, signal_length)
                psd_chunk = compute_psd(data_chunk, fs=fs, nperseg=nperseg)  # Shape: (chunk_size, num_channels, num_freq_bins)
                # Save the computed PSD to the new HDF5 file
                #print(psd_chunk.shape)
                psd_dataset[start:end] = psd_chunk

    print(f"PSD computation and saving completed successfully to '{output_h5_file}'.")


In [2]:
if __name__ == "__main__":
    # Specify the input and output HDF5 filenames
    input_h5_file = '../Data/train_raw.h5'
    output_h5_file = '../Data/train_psd.h5'  # New filename with 'psd' included

    # Call the processing function
    process_and_save_psd(input_h5_file, output_h5_file, fs=500, nperseg=256, chunk_size=1000)


Processing PSD: 100%|█████████████████████████| 401/401 [43:37<00:00,  6.53s/it]


PSD computation and saving completed successfully to '../Data/train_psd.h5'.


In [3]:
if __name__ == "__main__":
    # Specify the input and output HDF5 filenames
    input_h5_file = '../Data/train_raw.h5'
    output_h5_file = '../Data/train_psd.h5'  # New filename with 'psd' included

    # Call the processing function
    process_and_save_psd(input_h5_file, output_h5_file, fs=500, nperseg=256, chunk_size=1000)


Processing PSD: 100%|█████████████████████████| 401/401 [43:38<00:00,  6.53s/it]

PSD computation and saving completed successfully to '../Data/train_psd.h5'.





In [4]:
if __name__ == "__main__":
    # Specify the input and output HDF5 filenames
    input_h5_file = '../Data/test_raw.h5'
    output_h5_file = '../Data/test_psd.h5'  # New filename with 'psd' included

    # Call the processing function
    process_and_save_psd(input_h5_file, output_h5_file, fs=500, nperseg=256, chunk_size=1000)


Processing PSD: 100%|███████████████████████████| 86/86 [09:16<00:00,  6.47s/it]

PSD computation and saving completed successfully to '../Data/test_psd.h5'.





In [5]:
if __name__ == "__main__":
    # Specify the input and output HDF5 filenames
    input_h5_file = '../Data/valid_raw.h5'
    output_h5_file = '../Data/valid_psd.h5'  # New filename with 'psd' included

    # Call the processing function
    process_and_save_psd(input_h5_file, output_h5_file, fs=500, nperseg=256, chunk_size=1000)


Processing PSD: 100%|███████████████████████████| 32/32 [03:23<00:00,  6.36s/it]

PSD computation and saving completed successfully to '../Data/valid_psd.h5'.





In [6]:
if __name__ == "__main__":
    # Specify the input and output HDF5 filenames
    input_h5_file = '../Data/neg_raw.h5'
    output_h5_file = '../Data/neg_psd.h5'  # New filename with 'psd' included

    # Call the processing function
    process_and_save_psd(input_h5_file, output_h5_file, fs=500, nperseg=256, chunk_size=1000)


Processing PSD: 100%|███████████████████████████| 85/85 [09:06<00:00,  6.43s/it]

PSD computation and saving completed successfully to '../Data/neg_psd.h5'.





In [7]:
import h5py
import numpy as np
from scipy.signal import welch
from tqdm import tqdm

# Define the frequency bands
bands = {
    'delta': (0.5, 4),
    'theta': (4, 8),
    'alpha': (8, 12),
    'beta': (13, 30),
    'gamma': (30, 50)
}

def compute_psd(data_chunk, fs=500, nperseg=256):
    """
    Compute PSD features for a chunk of EEG data and extract band-specific PSD.

    Parameters:
    - data_chunk: numpy array of shape (num_samples, num_channels, signal_length)
    - fs: Sampling frequency
    - nperseg: Length of each segment for Welch's method

    Returns:
    - psd_band_features: numpy array of shape (num_samples, num_channels, num_bands)
    """
    num_samples, num_channels, _ = data_chunk.shape

    # Pre-allocate PSD features for bands
    num_bands = len(bands)
    psd_band_features = np.zeros((num_samples, num_channels, num_bands), dtype=np.float32)

    # Compute PSD for each sample and channel
    for i in range(num_samples):
        for j in range(num_channels):
            freqs, Pxx = welch(data_chunk[i, j], fs=fs, nperseg=nperseg)

            # Extract band-specific power by summing PSD values within each band
            for k, (band, (low, high)) in enumerate(bands.items()):
                band_power = np.sum(Pxx[(freqs >= low) & (freqs < high)])
                psd_band_features[i, j, k] = band_power

    return psd_band_features

def process_and_save_psd(input_h5_file, output_h5_file, fs=500, nperseg=256, chunk_size=1000):
    """
    Process the EEG data to compute band-specific PSD features and save them to a new HDF5 file.

    Parameters:
    - input_h5_file: Path to the input HDF5 file containing the raw EEG data
    - output_h5_file: Path to the output HDF5 file to save the PSD features
    - fs: Sampling frequency
    - nperseg: Length of each segment for Welch's method
    - chunk_size: Number of samples to process at once
    """
    # Open the original HDF5 file in read mode
    with h5py.File(input_h5_file, 'r') as h5_orig:
        data = h5_orig['data']  # Shape: (num_samples, num_channels, signal_length)
        labels = h5_orig['labels'][:]
        sessions = h5_orig['sessions'][:]
        num_samples, num_channels, signal_length = data.shape

        # Determine the number of bands
        num_bands = len(bands)

        # Create a new HDF5 file to store PSD features
        with h5py.File(output_h5_file, 'w') as h5_new:
            # Copy 'labels' and 'sessions' datasets
            h5_new.create_dataset('labels', data=labels, dtype=labels.dtype)
            h5_new.create_dataset('sessions', data=sessions, dtype=sessions.dtype)

            # Create 'data' dataset for PSD features with appropriate shape
            psd_shape = (num_samples, num_channels, num_bands)
            psd_dataset = h5_new.create_dataset('data', shape=psd_shape, dtype=np.float32, chunks=True)

            # Process data in chunks
            for start in tqdm(range(0, num_samples, chunk_size), desc='Processing PSD'):
                end = min(start + chunk_size, num_samples)
                data_chunk = data[start:end]  # Shape: (chunk_size, num_channels, signal_length)
                psd_chunk = compute_psd(data_chunk, fs=fs, nperseg=nperseg)  # Shape: (chunk_size, num_channels, num_bands)
                # Save the computed PSD to the new HDF5 file
                psd_dataset[start:end] = psd_chunk

    print(f"Band-specific PSD computation and saving completed successfully to '{output_h5_file}'.")


In [8]:
if __name__ == "__main__":
    # Specify the input and output HDF5 filenames
    input_h5_file = '../Data/neg_raw.h5'
    output_h5_file = '../Data/neg_psd_bins.h5'  # New filename with 'psd' included

    # Call the processing function
    process_and_save_psd(input_h5_file, output_h5_file, fs=500, nperseg=256, chunk_size=1000)


Processing PSD: 100%|███████████████████████████| 85/85 [11:28<00:00,  8.10s/it]

Band-specific PSD computation and saving completed successfully to '../Data/neg_psd_bins.h5'.





In [9]:
if __name__ == "__main__":
    # Specify the input and output HDF5 filenames
    input_h5_file = '../Data/train_raw.h5'
    output_h5_file = '../Data/train_psd_bins.h5'  # New filename with 'psd' included

    # Call the processing function
    process_and_save_psd(input_h5_file, output_h5_file, fs=500, nperseg=256, chunk_size=1000)


Processing PSD: 100%|█████████████████████████| 401/401 [54:34<00:00,  8.16s/it]

Band-specific PSD computation and saving completed successfully to '../Data/train_psd_bins.h5'.





In [10]:
if __name__ == "__main__":
    # Specify the input and output HDF5 filenames
    input_h5_file = '../Data/test_raw.h5'
    output_h5_file = '../Data/test_psd_bins.h5'  # New filename with 'psd' included

    # Call the processing function
    process_and_save_psd(input_h5_file, output_h5_file, fs=500, nperseg=256, chunk_size=1000)


Processing PSD: 100%|███████████████████████████| 86/86 [11:40<00:00,  8.15s/it]

Band-specific PSD computation and saving completed successfully to '../Data/test_psd_bins.h5'.





In [11]:
if __name__ == "__main__":
    # Specify the input and output HDF5 filenames
    input_h5_file = '../Data/valid_raw.h5'
    output_h5_file = '../Data/valid_psd_bins.h5'  # New filename with 'psd' included

    # Call the processing function
    process_and_save_psd(input_h5_file, output_h5_file, fs=500, nperseg=256, chunk_size=1000)


Processing PSD: 100%|███████████████████████████| 32/32 [04:16<00:00,  8.03s/it]

Band-specific PSD computation and saving completed successfully to '../Data/valid_psd_bins.h5'.



