In [114]:
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

# signals
import mne
from scipy.signal import welch, get_window
from scipy.signal.windows import hamming

In [115]:
base_path_data = '../Data'
hup_atlas = sio.loadmat(os.path.join(base_path_data, 'HUP_atlas.mat'))
mni_atlas = sio.loadmat(os.path.join(base_path_data, 'MNI_atlas.mat'))

In [116]:
# print(mni_atlas.keys())

from pprint import pprint
pprint(mni_atlas)

{'AgeAtTimeOfStudy': array([[34],
       [18],
       [46],
       [16],
       [27],
       [32],
       [40],
       [44],
       [28],
       [15],
       [29],
       [38],
       [23],
       [34],
       [33],
       [19],
       [51],
       [41],
       [36],
       [35],
       [45],
       [37],
       [19],
       [26],
       [37],
       [17],
       [24],
       [30],
       [35],
       [32],
       [14],
       [29],
       [33],
       [41],
       [39],
       [15],
       [23],
       [19],
       [29],
       [47],
       [32],
       [21],
       [24],
       [24],
       [51],
       [57],
       [23],
       [52],
       [48],
       [35],
       [47],
       [38],
       [43],
       [38],
       [36],
       [36],
       [39],
       [42],
       [38],
       [20],
       [30],
       [57],
       [23],
       [33],
       [27],
       [38],
       [45],
       [36],
       [40],
       [29],
       [36],
       [38],
       [26],
       [42],
       [38],
    

In [117]:
hup_df = pd.DataFrame(hup_atlas['mni_coords'], columns=['x', 'y', 'z'])
mni_df = pd.DataFrame(mni_atlas['ChannelPosition'], columns=['x', 'y', 'z'])

hup_ts = pd.DataFrame(hup_atlas['wake_clip']) # (12000, 3431) @ x-axis is time steps, y-axis is electrodes
mni_ts = pd.DataFrame(mni_atlas['Data_W']) # (13600, 1765)  @ x-axis is time steps, y-axis is electrodes

hup_patients = pd.DataFrame(hup_atlas['patient_no'])
mni_patients = pd.DataFrame(mni_atlas['Patient'])

hup_patient_ids = np.unique(hup_atlas['patient_no'])
mni_patient_ids = np.unique(mni_atlas['Patient']) 

mni_samp_freq = int(mni_atlas['SamplingFrequency'].flatten()[~np.isnan(mni_atlas['SamplingFrequency'].flatten())][0])
hup_samp_freq = int(hup_atlas['SamplingFrequency'].flatten()[~np.isnan(hup_atlas['SamplingFrequency'].flatten())][0])

hup_patient_total_el_counts = len(hup_atlas['patient_no'])
mni_patient_total_el_counts = len(mni_atlas['Patient'])

hup_patient_numbers = hup_atlas['patient_no'].flatten()
hup_el_to_pat_map_dict = {}
for idx, patient_num in enumerate(hup_patient_numbers):
    hup_el_to_pat_map_dict[idx] = patient_num
hup_idx_map_arr = np.array([patient_num for patient_num in hup_patient_numbers]) # arr equivalent

mni_patient_numbers = mni_atlas['Patient'].flatten()
mni_el_to_pat_map_dict = {}
for idx, patient_num in enumerate(mni_patient_numbers):
    mni_el_to_pat_map_dict[idx] = patient_num
mni_idx_map_arr = np.array([patient_num for patient_num in mni_patient_numbers])

In [118]:
def get_norm_psd(iEEGnormal, data_timeS, sampling_frequency=200):
    """
    Function to compute normalized power spectral densities for different EEG frequency bands.
    
    Args:
    iEEGnormal (DataFrame): A DataFrame to append results to.
    data_timeS (array): Time domain EEG data for a single electrode (1D array)
    sampling_frequency (int): Sampling frequency of the EEG data.
    
    Returns:
    DataFrame: Updated DataFrame with new EEG features.
    """
    
    Fs = sampling_frequency
    window = Fs * 2
    NFFT = window
    
    # Compute PSD
    f, data_psd = welch(data_timeS, fs=Fs, window=hamming(window), 
                       nfft=NFFT, scaling='density', noverlap=window//2)
    
    # filter out noise frequency 57.5Hz to 62.5Hz
    noise_mask = (f >= 57.5) & (f <= 62.5)
    f = f[~noise_mask]
    # Handle 1D data_psd
    data_psd = data_psd[~noise_mask]
    
    def bandpower(psd, freqs, freq_range):
        """Calculate power in the given frequency range."""
        idx = np.logical_and(freqs >= freq_range[0], freqs <= freq_range[1])
        return np.trapz(psd[idx], freqs[idx])
    
    # Define frequency bands
    bands = {'delta': (1, 4), 'theta': (4, 8), 'alpha': (8, 13), 
             'beta': (13, 30), 'gamma': (30, 80), 'broad': (1, 80)}
    
    # Calculate band powers (using 1D data_psd)
    band_powers = {band: bandpower(data_psd, f, freq_range) 
                  for band, freq_range in bands.items()}
    
    # Compute log transform
    log_band_powers = {f'{band}log': np.log10(power + 1) 
                      for band, power in band_powers.items()}
    
    # Calculate total power
    total_band_power = np.sum([value for value in log_band_powers.values()])
    
    # Calculate relative powers
    relative_band_powers = {f'{band}Rel': log_band_powers[f'{band}log'] / total_band_power 
                          for band in bands}
    
    # Create DataFrame row
    data_to_append = pd.DataFrame([relative_band_powers])
    data_to_append['broadlog'] = log_band_powers['broadlog']
    
    # Append to existing DataFrame
    iEEGnormal = pd.concat([iEEGnormal, data_to_append], ignore_index=True)
    
    return iEEGnormal

For HUP data:
Processed from patient 1 to patient 60
Last processed electrode is 3430
Each electrode's time series has shape (12000,)


For MNI data:
Processed from patient 1 to patient 110
Last processed electrode is 1764
Each electrode's time series has shape (13600,)

In [119]:
hup_iEEGnormal = pd.DataFrame()
mni_iEEGnormal = pd.DataFrame()

# for each patient, for each electrode, compute PSD
for patient in hup_patient_ids:
    # print(f"Processing HUP patient {patient}")
    patient_el_ids = np.where(hup_idx_map_arr == patient)[0]
    # print(f"Found {len(patient_el_ids)} electrodes")
    
    for idx in patient_el_ids:
        # print(f"Processing electrode {idx}")
        hup_electrode_data = hup_ts.iloc[:, idx].values
        # print(f"Electrode data shape: {hup_electrode_data.shape}")
        hup_iEEGnormal = get_norm_psd(hup_iEEGnormal, hup_electrode_data)
        # print(f"Processed electrode {idx}, DataFrame now has {len(hup_iEEGnormal)} rows")

# print("\nFinished HUP processing, starting MNI\n")

# For each electrode, we get one scalar value per frequency band for the entire duration of the signal
for patient in mni_patient_ids:
    # print(f"Processing MNI patient {patient}")
    patient_el_ids = np.where(mni_idx_map_arr == patient)[0]
    # print(f"Found {len(patient_el_ids)} electrodes")
    
    for idx in patient_el_ids:
        # print(f"Processing electrode {idx}")
        mni_electrode_data = mni_ts.iloc[:, idx].values
        # print(f"Electrode data shape: {mni_electrode_data.shape}")
        mni_iEEGnormal = get_norm_psd(mni_iEEGnormal, mni_electrode_data)
        # print(f"Processed electrode {idx}, DataFrame now has {len(mni_iEEGnormal)} rows")

In [120]:
# Check dimensions of both DataFrames
print("HUP DataFrame shape:", hup_iEEGnormal.shape)
print("MNI DataFrame shape:", mni_iEEGnormal.shape)

# Check the number of unique patients that were processed
print("\nNumber of patients processed:")
print("HUP patients:", len(hup_patient_ids))
print("MNI patients:", len(mni_patient_ids))

# Verify total electrode counts
print("\nTotal electrode counts:")
print("HUP electrodes in original data:", hup_ts.shape[1])  # From (12000, 3431)
print("HUP electrodes processed:", len(hup_iEEGnormal))
print("MNI electrodes in original data:", mni_ts.shape[1])  # From (13600, 1765)
print("MNI electrodes processed:", len(mni_iEEGnormal))

# Check what columns we got (spectral features)
print("\nSpectral features computed:")
print(hup_iEEGnormal.columns.tolist())

# Let's look at the first few rows of data to verify values are reasonable
print("\nFirst few rows of HUP data:")
print(hup_iEEGnormal.head())

HUP DataFrame shape: (3431, 7)
MNI DataFrame shape: (1765, 7)

Number of patients processed:
HUP patients: 60
MNI patients: 106

Total electrode counts:
HUP electrodes in original data: 3431
HUP electrodes processed: 3431
MNI electrodes in original data: 1765
MNI electrodes processed: 1765

Spectral features computed:
['deltaRel', 'thetaRel', 'alphaRel', 'betaRel', 'gammaRel', 'broadRel', 'broadlog']

First few rows of HUP data:
   deltaRel  thetaRel  alphaRel   betaRel  gammaRel  broadRel  broadlog
0  0.177924  0.147311  0.135588  0.173211  0.139114  0.226852  2.281969
1  0.210691  0.231001  0.098005  0.129331  0.048090  0.282882  1.720748
2  0.248122  0.184206  0.116251  0.126453  0.059346  0.265622  2.333526
3  0.214907  0.162279  0.114287  0.135578  0.073299  0.299651  1.216269
4  0.187794  0.182233  0.136686  0.152295  0.069230  0.271761  1.517257


Misc

In [121]:
import scipy.io
import pandas as pd

mat_file_path = os.path.join(base_path_data, 'metaData.mat')
mat_file = sio.loadmat(mat_file_path)

# Print all the keys in the loaded .mat file
print(mat_file.keys())

# Print the type and possibly the shape of the data under 'None'
print(type(mat_file['None']))
try:
    print(mat_file['None'].shape)
except AttributeError:
    pass  # In case it's not a numpy array

# If it's an ndarray and seems to hold structured data, inspect further:
if isinstance(mat_file['None'], np.ndarray):
    print(mat_file['None'].dtype.names)  # This will print the field names if it's a structured array


dict_keys(['__header__', '__version__', '__globals__', 'None', '__function_workspace__'])
<class 'scipy.io.matlab._mio5_params.MatlabOpaque'>
(1,)
('s0', 's1', 's2', 'arr')


In [122]:
mni_chan_pos = pd.DataFrame(mni_atlas['ChannelPosition'])
print(mni_chan_pos.head())
print(mni_chan_pos.shape)

      0     1     2
0 -56.0 -37.0  -2.0
1 -60.0 -37.0  -4.0
2 -63.0 -37.0  -2.0
3 -68.0 -38.0  -2.0
4  -1.0  42.0 -14.0
(1765, 3)


In [123]:
# After reading roiAAL.csv
with open(os.path.join(base_path_data, 'roiAAL.csv'), 'r') as file:
    lines = file.readlines()
    print(f"Line 30 in roiAAL.csv: {lines[29]}")

Line 30 in roiAAL.csv: 29,Insula_L,Subcortical (Insular Cortex),1,3001,-36,7,3

