In [1]:
%%capture
import os
import pandas as pd
import numpy as np
import mne
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from scipy.signal import welch

In [2]:
%%capture
# Define a function to extract labels from filenames
def extract_label_from_filename(filename):
    # Assuming filenames are in the format: subject_task.bdf
    task = filename.split('_')[1].split('.')[0]
    return task

In [5]:
%%capture
#Convert to DataFrame
def convert_bdf_to_dataframe(bdf_filename):
    raw_data = mne.io.read_raw_bdf(bdf_filename, preload=True)
    eeg_data_raw = raw_data.get_data()
    channel_names = raw_data.ch_names
    time_index = raw_data.times
    eeg_data = pd.DataFrame(data=eeg_data_raw.T, columns=channel_names, index=time_index)
    col_names = ['O1', 'O2', 'F3', 'F4', 'C3' ,'C4' , 'Fp1', 'Fp2']
    eeg_data = eeg_data[col_names]
    segment_size = 1024
    num_segments = len(eeg_data) // segment_size
    reduced_df = pd.DataFrame(columns=eeg_data.columns)
    for i in range(num_segments):
        start_idx = i * segment_size
        end_idx = (i + 1) * segment_size
        segment_data = eeg_data.iloc[start_idx:end_idx]    
        mean_values = segment_data.mean()
        sampling_frequency = 1024  
        psd_data = {}  # Dictionary to store PSD results for each electrode
        reduced_df = reduced_df.append(mean_values, ignore_index=True)

    return reduced_df, raw_data

In [30]:
req_cols = ['F3', 'F4', 'C3' ,'C4' , 'Fp1', 'Fp2', 'O1', 'O2']
main_df_cols = []

for col in req_cols:
    if col == 'O1' or col == 'O2':
        main_df_cols.append(f'{col} gamma mean')
        main_df_cols.append(f'{col} gamma psd_mean')
    else:
        main_df_cols.append(f'{col} alpha mean')
        main_df_cols.append(f'{col} alpha psd_mean')
        main_df_cols.append(f'{col} beta mean')
        main_df_cols.append(f'{col} beta psd_mean')
main_df_cols.append('task')
        
main_df = pd.DataFrame(columns=main_df_cols)

In [31]:
main_df

Unnamed: 0,F3 alpha mean,F3 alpha psd_mean,F3 beta mean,F3 beta psd_mean,F4 alpha mean,F4 alpha psd_mean,F4 beta mean,F4 beta psd_mean,C3 alpha mean,C3 alpha psd_mean,...,Fp1 beta psd_mean,Fp2 alpha mean,Fp2 alpha psd_mean,Fp2 beta mean,Fp2 beta psd_mean,O1 gamma mean,O1 gamma psd_mean,O2 gamma mean,O2 gamma psd_mean,task


In [32]:
def extract_mean_and_psd_mean(reduced_decomp_df, label):
    values = []
    req_channels = reduced_decomp_df.columns
    for channel in req_channels:
        if channel not in req_channels:
            continue
        mean_val = reduced_decomp_df[channel].mean()
        _, psd = welch(reduced_decomp_df[channel], fs=256)
        values.append(mean_val)
        values.append(psd.mean())
    values.append(label)
    main_df.loc[main_df.shape[0]] = values

In [29]:
fname = 'sub-001_task-med1breath_eeg.bdf'
fname = fname.split('.')[0].split('_')[1].split('-')[1]
fname

'med1breath'

In [33]:
%%capture
# Load and preprocess the data
data_dir = './Dataset/'  # Replace with the path to your .bdf data directory

# Initialize empty lists to store data and labels
data = []
labels = []
raw_eegdata = []

# Loop through all files in the directory
for filename in os.listdir(data_dir):
    if filename.endswith('.bdf'):
        # Convert the .bdf file to a DataFrame
        eeg_data, eeg_raw = convert_bdf_to_dataframe(os.path.join(data_dir, filename))
        
        # Extract labels from filenames
        label = extract_label_from_filename(filename)
#         # Append data and labels
        data.append(eeg_data)
        raw_eegdata.append(eeg_raw)
        labels.extend([label] * len(eeg_data))
        channel_name_1 = ['O1', 'O2']
        channel_name_2 = ['F3', 'F4','C3' ,'C4' , 'Fp1', 'Fp2']  # Add more channel names as needed

#         # Create dictionaries to store the filtered data
        eeg_dataframe = pd.DataFrame()


        # Loop through each channel and filter data
        for channel_name in channel_name_2:
            eeg_channel = eeg_raw.copy().pick_channels([channel_name])
    
            # Filter for Alpha (8-13 Hz)
            alpha_filtered = eeg_channel.filter(l_freq=8, h_freq=13)
            alpha_decomp = alpha_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} alpha'] = alpha_decomp
    
            # Filter for Beta (13-30 Hz)
            beta_filtered = eeg_channel.filter(l_freq=13, h_freq=30)
            beta_decomp = beta_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} beta'] = beta_decomp
    
        for channel_name in channel_name_1:
            eeg_channel = eeg_raw.copy().pick_channels([channel_name])
    
            # Filter for Gamma (30-40 Hz)
            gamma_filtered = eeg_channel.filter(l_freq=30, h_freq=40)
            gamma_decomp = gamma_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} gamma'] = gamma_decomp

        # Grouping data for each second
        segment_size = 1024
        num_segments = len(eeg_dataframe) // segment_size
        reduced_decomp_df = pd.DataFrame(columns=eeg_dataframe.columns)
        for i in range(num_segments):
            start_idx = i * segment_size
            end_idx = (i + 1) * segment_size
            segment_data = eeg_dataframe.iloc[start_idx:end_idx]    
            mean_values = segment_data.mean()
            sampling_frequency = 1024  
            reduced_decomp_df = reduced_decomp_df.append(mean_values, ignore_index=True)
        
        extract_mean_and_psd_mean(reduced_decomp_df, label)

In [34]:
main_df

Unnamed: 0,F3 alpha mean,F3 alpha psd_mean,F3 beta mean,F3 beta psd_mean,F4 alpha mean,F4 alpha psd_mean,F4 beta mean,F4 beta psd_mean,C3 alpha mean,C3 alpha psd_mean,...,Fp1 beta psd_mean,Fp2 alpha mean,Fp2 alpha psd_mean,Fp2 beta mean,Fp2 beta psd_mean,O1 gamma mean,O1 gamma psd_mean,O2 gamma mean,O2 gamma psd_mean,task
0,-5.429434e-10,5.039194e-16,2.686645e-11,1.004214e-17,-4.91853e-10,5.249395e-16,7.623419e-11,1.0793290000000001e-17,-1.215472e-10,2.861505e-16,...,5.157847e-21,-5.699121e-10,5.658479e-16,-1.175518e-11,1.3618710000000001e-17,1.173535e-11,7.829112999999999e-19,2.2285e-11,3.4847779999999996e-19,task-med1breath
1,-5.133369e-10,1.327326e-16,-5.110144e-11,9.481876e-18,-5.010362e-10,1.529163e-16,9.624435e-13,1.12626e-17,-2.186645e-10,6.625611e-17,...,4.588916e-21,-6.115117e-10,1.751926e-16,-1.165412e-11,1.2082740000000002e-17,5.97598e-11,4.0539259999999996e-19,6.038006e-11,3.8238639999999995e-19,task-med2
2,-9.313443e-10,7.420854e-16,3.236768e-11,1.1272550000000001e-17,-9.369289e-10,7.383739e-16,-3.36917e-11,1.1326470000000001e-17,-6.612406e-10,4.649914e-16,...,5.269756e-21,-8.771896e-10,7.956243e-16,-1.373691e-11,1.2886150000000001e-17,3.610567e-11,4.553949999999999e-19,8.135557e-12,9.183567e-19,task-think1
3,4.173475e-10,5.626652e-16,-4.783866e-11,7.97825e-18,5.458868e-10,5.861192e-16,-7.540035e-12,9.177947e-18,3.614911e-12,3.045094e-16,...,5.615696e-21,5.818433e-10,6.212969e-16,-6.190433e-11,1.018916e-17,7.245244e-11,5.911956e-19,1.110158e-10,7.396388e-19,task-think2
4,-5.602067e-10,1.114053e-15,-2.489931e-10,1.8905220000000003e-17,-7.772884e-10,1.145573e-15,-1.328345e-10,2.0522560000000003e-17,-7.103407e-11,6.345948e-16,...,5.729356e-21,-4.25669e-10,1.10351e-15,-1.091693e-10,1.720036e-17,-5.787061e-11,1.017307e-18,3.224554e-11,2.436137e-18,task-med1breath
5,6.044236e-10,5.79236e-16,2.572398e-11,1.543672e-17,6.589854e-10,5.866879e-16,-1.366284e-12,1.7672650000000003e-17,4.493653e-10,3.518649e-16,...,5.890833e-21,5.87584e-10,5.652112e-16,-3.898808e-11,1.605431e-17,7.124439e-12,1.100158e-18,-8.627181e-11,2.4568680000000002e-18,task-med2
6,4.415423e-10,8.984739000000001e-17,1.97124e-10,1.542145e-17,2.520625e-10,1.095181e-16,2.021261e-10,1.643429e-17,8.446758e-11,6.022223e-17,...,5.024164e-21,3.920046e-10,9.883028000000001e-17,2.550571e-10,1.756384e-17,7.07756e-12,1.196177e-18,3.513366e-11,1.571127e-18,task-think2
