In [22]:
import scipy.io as sio
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import pickle
import joblib 
import dill 

import mne
from scipy.signal import welch, get_window
from scipy.signal.windows import hamming

from scipy.signal import butter, lfilter, sosfilt, filtfilt, sosfreqz, iirnotch
from scipy import fftpack
from scipy.stats import entropy

In [2]:
base_path_data = '../Data'

In [11]:
### TIME SERIES DATA

# dict_keys(['__header__', '__version__', '__globals__', 'SamplingFrequency', 'depth_elecs', 'mni_coords', 'patient_no', 'resected_ch', 'soz_ch', 'spike_24h', 'wake_clip'])
hup_atlas = sio.loadmat(os.path.join(base_path_data, 'HUP_atlas.mat'))
# dict_keys(['__header__', '__version__', '__globals__', 'AgeAtTimeOfStudy', 'ChannelName', 'ChannelPosition', 'ChannelRegion', 'ChannelType', 
# 'Data_N2', 'Data_N3', 'Data_R', 'Data_W', 'FacesLeft', 'FacesRight', 'Gender', 'Hemisphere', 'NodesLeft', 'NodesLeftInflated', 'NodesRegionLeft', 
# 'NodesRegionRight', 'NodesRight', 'NodesRightInflated', 'Patient', 'RegionName', 'SamplingFrequency'])
mni_atlas = sio.loadmat(os.path.join(base_path_data, 'MNI_atlas.mat'))

# (12000, 3431) @ x-axis is time steps, y-axis is electrodes
hup_ts = pd.DataFrame(hup_atlas['wake_clip'])
# (13600, 1765)  @ x-axis is time steps, y-axis is electrodes
mni_ts = pd.DataFrame(mni_atlas['Data_W'])

# accessing columns for patients
hup_patients = pd.DataFrame(hup_atlas['patient_no'])
mni_patients = pd.DataFrame(mni_atlas['Patient'])

# electrode counts
hup_patient_total_el_counts = len(hup_atlas['patient_no'])
mni_patient_total_el_counts = len(mni_atlas['Patient'])

# unique patient ids
hup_patient_ids = np.unique(hup_atlas['patient_no'])
mni_patient_ids = np.unique(mni_atlas['Patient']) 

# sampling frequency
mni_samp_freq = int(mni_atlas['SamplingFrequency'].flatten()[~np.isnan(mni_atlas['SamplingFrequency'].flatten())][0])
hup_samp_freq = int(hup_atlas['SamplingFrequency'].flatten()[~np.isnan(hup_atlas['SamplingFrequency'].flatten())][0])

# mapping electrodes to their respective patients
hup_patient_numbers = hup_atlas['patient_no'].flatten()
hup_el_to_pat_map_dict = {}
for idx, patient_num in enumerate(hup_patient_numbers):
    hup_el_to_pat_map_dict[idx] = patient_num
hup_idx_map_arr = np.array([patient_num for patient_num in hup_patient_numbers]) # arr equivalent

mni_patient_numbers = mni_atlas['Patient'].flatten()
mni_el_to_pat_map_dict = {}
for idx, patient_num in enumerate(mni_patient_numbers):
    mni_el_to_pat_map_dict[idx] = patient_num
mni_idx_map_arr = np.array([patient_num for patient_num in mni_patient_numbers])

## REGION MAPS

dk_atlas_df = pd.read_csv(os.path.join(base_path_data, 'desikanKilliany.csv'))
# columns: Index(['x', 'y', 'z', 'roiNum', 'snum', 'abvr', 'lobe', 'isSideLeft'], dtype='object')
hup_df = pd.read_csv(os.path.join(base_path_data, 'hup_df.csv'))
# columns: Index(['x', 'y', 'z', 'roiNum', 'snum', 'abvr', 'lobe', 'isSideLeft'], dtype='object')
mni_df = pd.read_csv(os.path.join(base_path_data, 'mni_df.csv'))


In [43]:
def get_norm_psd(iEEGnormal, data_timeS, sampling_frequency=200):
    """
    Function to compute normalized power spectral densities for different EEG frequency bands.
    
    Args:
    iEEGnormal (DataFrame): A DataFrame to append results to.
    data_timeS (array): Time domain EEG data for a single electrode (1D array)
    sampling_frequency (int): Sampling frequency of the EEG data.
    
    Returns:
    DataFrame: Updated DataFrame with new EEG features.
    """
    
    Fs = sampling_frequency
    window = Fs * 2
    NFFT = window
    
    # Compute PSD
    f, data_psd = welch(data_timeS, fs=Fs, window=hamming(window), 
                       nfft=NFFT, scaling='density', noverlap=window//2)
    
    # filter out noise frequency 57.5Hz to 62.5Hz
    noise_mask = (f >= 57.5) & (f <= 62.5)
    f = f[~noise_mask]
    # Handle 1D data_psd
    data_psd = data_psd[~noise_mask]
    
    def bandpower(psd, freqs, freq_range):
        """Calculate power in the given frequency range."""
        idx = np.logical_and(freqs >= freq_range[0], freqs <= freq_range[1])
        return np.trapezoid(psd[idx], freqs[idx]) 
    
    # Define frequency bands
    bands = {'delta': (1, 4), 'theta': (4, 8), 'alpha': (8, 13), 
             'beta': (13, 30), 'gamma': (30, 80), 'broad': (1, 80)}
    
    # Calculate band powers (using 1D data_psd)
    band_powers = {band: bandpower(data_psd, f, freq_range) 
                  for band, freq_range in bands.items()}
    
    # Compute log transform
    log_band_powers = {f'{band}log': np.log10(power + 1) 
                      for band, power in band_powers.items()}
    
    # Calculate total power
    total_band_power = np.sum([value for value in log_band_powers.values()])
    
    # Calculate relative powers
    relative_band_powers = {f'{band}Rel': log_band_powers[f'{band}log'] / total_band_power 
                          for band in bands}
    
    # Create DataFrame row
    data_to_append = pd.DataFrame([relative_band_powers])
    # data_to_append['broadlog'] = log_band_powers['broadlog']
    
    # Append to existing DataFrame
    iEEGnormal = pd.concat([iEEGnormal, data_to_append], ignore_index=True)
    
    return iEEGnormal

In [44]:
# not actually used
def compute_shannon_entropy(signal):
    """
    Compute Shannon entropy from EEG signal
    
    Parameters:
    signal: 1D array of EEG values
    
    Returns:
    float: Shannon entropy value
    """
    # 1. Estimate probability distribution using histogram
    hist, bin_edges = np.histogram(signal, bins='auto', density=True)
    
    # 2. Normalize to ensure probabilities sum to 1
    probabilities = hist / hist.sum()
    
    # 3. Remove any zero probabilities (since log(0) is undefined)
    probabilities = probabilities[probabilities > 0]
    
    # 4. Compute Shannon entropy
    H = -np.sum(probabilities * np.log2(probabilities))
    
    # 5. Log transform (optional, for feature scaling)
    H_log = np.log10(H + 1)
    
    return H_log

In [45]:
def get_norm_entropy_full_ts(ieeg_normal, data_time_s, sampling_frequency=200, window_size_mins=1, stride_mins=0.5):
    if data_time_s.ndim == 1:
        data_time_s = data_time_s.reshape(-1,1)
    
    # Calculate window parameters
    samples_per_window = sampling_frequency * 60 * window_size_mins
    stride_samples = sampling_frequency * 60 * stride_mins
    n_windows = int((len(data_time_s) - samples_per_window) // stride_samples + 1)
    
    entropies = []
    for i in range(n_windows):
        start_idx = int(i * stride_samples)
        end_idx = int(start_idx + samples_per_window)
        window_data = data_time_s[start_idx:end_idx, :]
        
        # Apply filters
        b, a = butter(3, 80/(sampling_frequency/2), btype='low')
        filtered = filtfilt(b, a, window_data.astype(float), axis=0)
        
        b, a = butter(3, 1/(sampling_frequency/2), btype='high')
        filtered = filtfilt(b, a, filtered, axis=0)
        
        b, a = iirnotch(60, 30, sampling_frequency)
        filtered = filtfilt(b, a, filtered, axis=0)
        
        # Compute entropy for this window
        signal = filtered[:, 0]
        
        # 1. Estimate probability distribution
        hist, _ = np.histogram(signal, bins='auto', density=True)
        
        # 2. Normalize probabilities
        probabilities = hist / hist.sum()
        
        # 3. Remove zeros
        probabilities = probabilities[probabilities > 0]
        
        # 4. Compute Shannon entropy
        H = -np.sum(probabilities * np.log2(probabilities))
        
        entropies.append(H)
    
    # Compute statistics across windows
    mean_entropy = np.mean(entropies)
    std_entropy = np.std(entropies)
    
    # 5. Log transform of mean entropy
    mean_entropy_log = np.log10(mean_entropy + 1)
    
    # Create features
    data_to_append = pd.DataFrame({
        'entropy_mean': [mean_entropy_log],
        'entropy_std': [std_entropy]
    })
    
    return pd.concat([ieeg_normal, data_to_append], ignore_index=True)

In [46]:
def get_norm_entropy_1min_seg(ieeg_normal, data_time_s, sampling_frequency=200):
    if data_time_s.ndim == 1:
        data_time_s = data_time_s.reshape(-1,1)
    
    # Get first minute of data
    data_seg = data_time_s[:sampling_frequency*60, :]
    
    # Low pass filter at 80Hz
    b, a = butter(3, 80/(sampling_frequency/2), btype='low')
    data_seg_filtered = filtfilt(b, a, data_seg.astype(float), axis=0)
    
    # High pass filter at 1Hz  
    b, a = butter(3, 1/(sampling_frequency/2), btype='high')
    data_seg_filtered = filtfilt(b, a, data_seg_filtered, axis=0)
    
    # Notch filter at 60Hz
    b, a = iirnotch(60, 30, sampling_frequency)
    data_seg_notch = filtfilt(b, a, data_seg_filtered, axis=0)

    # Compute Shannon entropy for each channel
    data_entropy = np.zeros((data_seg_notch.shape[1], 1))
    for chan in range(data_seg_notch.shape[1]):
        signal = data_seg_notch[:, chan]
        # Estimate probability distribution
        hist, _ = np.histogram(signal, bins='auto', density=True)
        hist = hist / hist.sum()
        data_entropy[chan] = entropy(hist)
    
    # Log transform of non-negative entropy
    data_entropy = np.log10(data_entropy + 1)
    
    # Create new row
    data_to_append = pd.DataFrame({'entropy': data_entropy.flatten()})
    
    return pd.concat([ieeg_normal, data_to_append], ignore_index=True)

In [47]:
# separate dfs for each feature type
hup_psd_features = pd.DataFrame()
hup_entropy_1min_features = pd.DataFrame()
hup_entropy_full_features = pd.DataFrame()

# Process HUP data
for patient in hup_patient_ids:
    patient_el_ids = np.where(hup_idx_map_arr == patient)[0]
    
    for idx in patient_el_ids:
        electrode_data = hup_ts.iloc[:, idx].values
        # Get PSD features
        hup_psd_features = get_norm_psd(hup_psd_features, electrode_data)
        # Get 1-minute entropy features
        hup_entropy_1min_features = get_norm_entropy_1min_seg(hup_entropy_1min_features, electrode_data)
        # Get full time series entropy features
        hup_entropy_fullts_features = get_norm_entropy_full_ts(hup_entropy_full_features, electrode_data)

# Combine all HUP features
hup_features = pd.concat([
    hup_psd_features,
    hup_entropy_1min_features[['entropy']].rename(columns={'entropy': 'entropy_1min'}),
    hup_entropy_fullts_features[['entropy_mean', 'entropy_std']]
], axis=1)

# Initialize for MNI data
mni_psd_features = pd.DataFrame()
mni_entropy_1min_features = pd.DataFrame()
mni_entropy_fullts_features = pd.DataFrame()

# Process MNI data
for patient in mni_patient_ids:
    patient_el_ids = np.where(mni_idx_map_arr == patient)[0]
    
    for idx in patient_el_ids:
        electrode_data = mni_ts.iloc[:, idx].values
        mni_psd_features = get_norm_psd(mni_psd_features, electrode_data)
        mni_entropy_1min_features = get_norm_entropy_1min_seg(mni_entropy_1min_features, electrode_data)
        mni_entropy_fullts_features = get_norm_entropy_full_ts(mni_entropy_fullts_features, electrode_data)

# Combine all MNI features
mni_features = pd.concat([
    mni_psd_features,
    mni_entropy_1min_features[['entropy']].rename(columns={'entropy': 'entropy_1min'}),
    mni_entropy_fullts_features[['entropy_mean', 'entropy_std']]
], axis=1)

In [13]:
# # Get the ID of the first patient in the HUP dataset
# first_patient_id = hup_patient_ids[0]

# # Create a boolean mask for electrodes belonging to the first patient
# first_patient_mask = (hup_patients[0] == first_patient_id).values.flatten()

# # Filter the time-series data to include only electrodes for the first patient
# first_patient_electrodes = hup_ts.loc[:, first_patient_mask]

# # Count the number of electrodes for the first patient
# num_electrodes_first_patient = first_patient_electrodes.shape[1]

# print(num_electrodes_first_patient)

22
