In [1]:
%%capture
import os
import pandas as pd
import numpy as np
import mne
from mne.preprocessing import ICA
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from scipy.signal import welch

In [2]:
%%capture
# Define a function to extract labels from filenames
def extract_label_from_filename(filename):
    # Assuming filenames are in the format: subject_task.bdf
    task = filename.split('_')[1].split('.')[0].split('-')[1]
    return task

In [3]:
%%capture
#Convert to DataFrame
def convert_bdf_to_dataframe(bdf_filename):
    
    # Loading Data
    raw_data = mne.io.read_raw_bdf(bdf_filename, preload=True)
    ## raw_data._data = raw_data._data ** 2
    
    # ICA
    ## n_components = 15
    ## ica = ICA(n_components=n_components, random_state=97, max_iter=800)
    ## ica.fit(raw_data)
    
    # Exclude components
    ## components_to_exclude = [7, 9]
    ## raw_cleaned = ica.apply(raw_data.copy(), exclude=components_to_exclude)
    
    # convert to dataframe
    eeg_data_raw = raw_cleaned.get_data()
    channel_names = raw_cleaned.ch_names
    time_index = raw_cleaned.times
        
    eeg_data = pd.DataFrame(data=eeg_data_raw.T, columns=channel_names, index=time_index)
    col_names = ['O1', 'O2', 'F3', 'F4', 'C3' ,'C4' , 'Fp1', 'Fp2']
    eeg_data = eeg_data[col_names]
    
    # Group by each second
    segment_size = 1024
    num_segments = len(eeg_data) // segment_size
    reduced_df = pd.DataFrame(columns=eeg_data.columns)
    for i in range(num_segments):
        start_idx = i * segment_size
        end_idx = (i + 1) * segment_size
        segment_data = eeg_data.iloc[start_idx:end_idx]    
        mean_values = segment_data.mean()
        reduced_df = reduced_df.append(mean_values, ignore_index=True)

    return reduced_df, raw_data

In [4]:
req_cols = ['F3', 'F4', 'C3' ,'C4' , 'Fp1', 'Fp2', 'O1', 'O2']
main_df_cols = []

for col in req_cols:
    if col == 'O1' or col == 'O2':
        main_df_cols.append(f'{col} gamma mean')
        main_df_cols.append(f'{col} gamma psd_mean')
    else:
        main_df_cols.append(f'{col} alpha mean')
        main_df_cols.append(f'{col} alpha psd_mean')
        main_df_cols.append(f'{col} beta mean')
        main_df_cols.append(f'{col} beta psd_mean')
main_df_cols.append('task')
        
main_df = pd.DataFrame(columns=main_df_cols)

In [5]:
main_df

Unnamed: 0,F3 alpha mean,F3 alpha psd_mean,F3 beta mean,F3 beta psd_mean,F4 alpha mean,F4 alpha psd_mean,F4 beta mean,F4 beta psd_mean,C3 alpha mean,C3 alpha psd_mean,...,Fp1 beta psd_mean,Fp2 alpha mean,Fp2 alpha psd_mean,Fp2 beta mean,Fp2 beta psd_mean,O1 gamma mean,O1 gamma psd_mean,O2 gamma mean,O2 gamma psd_mean,task


In [6]:
def extract_mean_and_psd_mean(reduced_decomp_df, label):
    values = []
    req_channels = reduced_decomp_df.columns
    for channel in req_channels:
        mean_val = reduced_decomp_df[channel].mean()
        _, psd = welch(reduced_decomp_df[channel], fs=256)
        values.append(mean_val)
        values.append(psd.mean())
    values.append(label)
    main_df.loc[main_df.shape[0]] = values

In [7]:
data_dir1 = '../Rishikest_MIT_Dataset/'# Replace with the path to your .bdf data directory (/Dataset - 3/)
data_dir2 = './Dataset-2'

In [8]:
%%capture

# Initialize empty lists to store data and labels
data = []
labels = []
raw_eegdata = []

# Loop through all files in the directory
for filename in os.listdir(data_dir1):
    if filename.endswith('.bdf'):
        # Convert the .bdf file to a DataFrame
        eeg_data, eeg_raw = convert_bdf_to_dataframe(os.path.join(data_dir1, filename))
        
        # Extract labels from filenames
        label = extract_label_from_filename(filename)
#         # Append data and labels
        raw_eegdata.append(eeg_raw)
    
        channel_name_1 = ['O1', 'O2']
        channel_name_2 = ['F3', 'F4','C3' ,'C4' , 'Fp1', 'Fp2']  # Add more channel names as needed

#         # Create dictionaries to store the filtered data
        eeg_dataframe = pd.DataFrame()


        # Loop through each channel and filter data
        for channel_name in channel_name_2:
            eeg_channel = eeg_raw.copy().pick_channels([channel_name])
    
            # Filter for Alpha (8-13 Hz)
            alpha_filtered = eeg_channel.filter(l_freq=8, h_freq=13)
            alpha_decomp = alpha_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} alpha'] = alpha_decomp
    
            # Filter for Beta (13-30 Hz)
            beta_filtered = eeg_channel.filter(l_freq=13, h_freq=30)
            beta_decomp = beta_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} beta'] = beta_decomp
    
        for channel_name in channel_name_1:
            eeg_channel = eeg_raw.copy().pick_channels([channel_name])
    
            # Filter for Gamma (30-40 Hz)
            gamma_filtered = eeg_channel.filter(l_freq=30, h_freq=40)
            gamma_decomp = gamma_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} gamma'] = gamma_decomp

        # Grouping data for each second
        segment_size = 1024
        num_segments = len(eeg_dataframe) // segment_size
        reduced_decomp_df = pd.DataFrame(columns=eeg_dataframe.columns)
        for i in range(num_segments):
            start_idx = i * segment_size
            end_idx = (i + 1) * segment_size
            segment_data = eeg_dataframe.iloc[start_idx:end_idx]    
            mean_values = segment_data.mean()
            sampling_frequency = 1024  
            reduced_decomp_df = reduced_decomp_df.append(mean_values, ignore_index=True)
        
        extract_mean_and_psd_mean(reduced_decomp_df, label)

In [9]:
main_df.to_csv('main_df3.csv')

In [None]:
%%capture

# Initialize empty lists to store data and labels
data = []
labels = []
raw_eegdata = []

for filename in os.listdir(data_dir2):
    if filename.endswith('.bdf'):
        # Convert the .bdf file to a DataFrame
        eeg_data, eeg_raw = convert_bdf_to_dataframe(os.path.join(data_dir2, filename))
        
        # Extract labels from filenames
        label = extract_label_from_filename(filename)
#         # Append data and labels
        raw_eegdata.append(eeg_raw)
    
        channel_name_1 = ['O1', 'O2']
        channel_name_2 = ['F3', 'F4','C3' ,'C4' , 'Fp1', 'Fp2']  # Add more channel names as needed

#         # Create dictionaries to store the filtered data
        eeg_dataframe = pd.DataFrame()


        # Loop through each channel and filter data
        for channel_name in channel_name_2:
            eeg_channel = eeg_raw.copy().pick_channels([channel_name])
    
            # Filter for Alpha (8-13 Hz)
            alpha_filtered = eeg_channel.filter(l_freq=8, h_freq=13)
            alpha_decomp = alpha_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} alpha'] = alpha_decomp
    
            # Filter for Beta (13-30 Hz)
            beta_filtered = eeg_channel.filter(l_freq=13, h_freq=30)
            beta_decomp = beta_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} beta'] = beta_decomp
    
        for channel_name in channel_name_1:
            eeg_channel = eeg_raw.copy().pick_channels([channel_name])
    
            # Filter for Gamma (30-40 Hz)
            gamma_filtered = eeg_channel.filter(l_freq=30, h_freq=40)
            gamma_decomp = gamma_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} gamma'] = gamma_decomp

        # Grouping data for each second
        segment_size = 1024
        num_segments = len(eeg_dataframe) // segment_size
        reduced_decomp_df = pd.DataFrame(columns=eeg_dataframe.columns)
        for i in range(num_segments):
            start_idx = i * segment_size
            end_idx = (i + 1) * segment_size
            segment_data = eeg_dataframe.iloc[start_idx:end_idx]    
            mean_values = segment_data.mean()
            sampling_frequency = 1024  
            reduced_decomp_df = reduced_decomp_df.append(mean_values, ignore_index=True)
        
        extract_mean_and_psd_mean(reduced_decomp_df, label)

In [None]:
main_df.to_csv('main_df1.csv')

In [None]:
df1 = pd.read_csv('main_df3.csv')

# Load the second CSV file into another DataFrame
df2 = pd.read_csv('merged_dataset.csv')

# Append df2 to df1
merged_df = pd.concat([df1, df2], ignore_index=True)

# Save the merged DataFrame to a new CSV file
merged_df.to_csv('merged_dataset_final.csv', index=False)

In [11]:
main_df = pd.read_csv('merged_data.csv')

In [12]:
main_df.drop('Unnamed: 0', axis = 1, inplace = True)

In [13]:
main_df

Unnamed: 0,F3 alpha mean,F3 alpha psd_mean,F3 beta mean,F3 beta psd_mean,F4 alpha mean,F4 alpha psd_mean,F4 beta mean,F4 beta psd_mean,C3 alpha mean,C3 alpha psd_mean,...,Fp1 beta psd_mean,Fp2 alpha mean,Fp2 alpha psd_mean,Fp2 beta mean,Fp2 beta psd_mean,O1 gamma mean,O1 gamma psd_mean,O2 gamma mean,O2 gamma psd_mean,task
0,-3.513743e-13,3.7261330000000002e-22,-2.682559e-14,3.288808e-23,-9.515559e-14,5.56255e-21,-5.775861e-13,7.106336e-22,-6.227103e-13,6.441535e-21,...,1.301683e-21,2.104581e-12,3.4917049999999995e-19,-8.972747e-12,3.815544e-20,1.462627e-12,8.390667000000001e-23,-1.834673e-13,1.2594519999999999e-24,med1breath
1,2.641169e-12,4.587082e-21,-9.25007e-13,5.641363000000001e-22,7.079107e-12,3.216913e-20,-5.932923e-14,3.037872e-21,1.046693e-12,7.630595e-22,...,1.351102e-21,1.886877e-11,4.980549e-19,-1.780069e-12,4.427011e-20,-4.275238e-13,1.377218e-22,-2.358534e-13,1.5240090000000003e-23,med2
2,-1.110026e-12,9.76972e-21,9.964152e-14,4.792419e-22,-4.087955e-12,8.125469e-20,5.467051e-13,4.18445e-21,-1.788294e-12,1.903302e-20,...,1.4846e-21,-9.369546e-12,2.9360299999999995e-19,1.199184e-12,1.653221e-20,1.045572e-14,6.581794e-27,-9.197855e-14,7.589472e-24,think1
3,-1.985656e-12,2.873449e-21,2.032677e-13,2.9739240000000004e-22,-6.230472e-12,3.9105419999999996e-20,1.097228e-12,3.8670609999999996e-21,-2.167441e-12,8.275744e-21,...,1.339161e-21,-1.426302e-11,1.3627509999999998e-19,1.125618e-12,1.4550479999999998e-20,9.151191e-16,4.195413e-27,-1.023871e-13,3.604797e-24,think2
4,6.52876e-12,3.5276699999999996e-20,-1.353519e-12,1.106202e-21,9.481486e-12,4.706429e-20,-1.541731e-12,1.674903e-21,4.119992e-13,1.01855e-22,...,1.603727e-21,1.986007e-11,2.7209549999999996e-19,-3.152343e-12,8.206781e-21,-7.601483e-14,9.007884000000001e-25,1.798727e-14,1.490968e-24,med1breath
5,-1.37928e-12,4.423076e-20,1.223617e-12,1.180685e-21,-3.906943e-12,8.441384999999999e-20,1.021773e-12,2.495726e-21,2.212569e-13,7.981192000000001e-22,...,1.377544e-21,-7.881025e-12,3.2644079999999996e-19,2.327577e-12,8.140498e-21,-3.391651e-14,1.063922e-24,-8.547079e-14,2.1961020000000003e-23,med2
6,-4.587835e-12,4.330082e-20,1.72718e-12,3.1955739999999998e-21,-3.870291e-12,4.189717e-20,1.403395e-12,3.225566e-21,-5.293694e-13,1.09189e-21,...,1.432964e-21,-1.996213e-11,7.977402999999999e-19,1.147115e-11,4.9530879999999996e-20,1.190654e-13,3.5545550000000005e-23,2.47498e-13,2.2257270000000003e-23,think1
7,1.329518e-12,3.092606e-20,-5.44105e-13,3.797635e-21,1.053348e-12,2.256651e-20,-1.16017e-13,2.483586e-21,3.385722e-13,7.190826e-23,...,1.303909e-21,1.977747e-12,4.945291e-19,-1.645501e-12,4.143383e-20,-2.415048e-13,5.554593e-24,-4.687246e-14,1.324148e-24,think2
8,-1.464838e-12,3.998122e-20,7.614474e-14,9.044639e-21,-2.269418e-12,3.0749249999999997e-20,-4.057946e-13,6.314228e-21,-2.429279e-12,3.773124e-21,...,1.277685e-21,1.529181e-12,1.355524e-20,9.406878e-13,2.428333e-21,-6.834998e-14,3.473931e-24,1.19815e-13,6.617865000000001e-23,med1breath
9,-6.682706e-13,4.35318e-20,1.798419e-12,6.372825e-21,-2.334771e-12,2.913618e-20,-3.185245e-12,3.965142e-21,-8.88018e-13,1.680661e-21,...,1.423573e-21,5.059446e-13,1.1226949999999999e-20,-1.420028e-12,1.473201e-21,-1.842385e-14,1.510362e-24,4.958239e-13,2.337877e-23,med2


In [14]:
X = main_df.drop('task', axis = 1)
y = main_df['task']

In [15]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score


min_max_scaler = MinMaxScaler()
X_min = min_max_scaler.fit_transform(X)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
class_labels = label_encoder.classes_

X_train, X_test, y_train, y_test = train_test_split(X_min, y_encoded, test_size=0.1, random_state=42)


In [16]:
from sklearn.linear_model import LogisticRegression
# Create a Random Forest classifier (you can try other classifiers as well)
clf = LogisticRegression(random_state=11)

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

accuracy_score(y_test, y_pred)

0.0

In [17]:
from sklearn.ensemble import AdaBoostClassifier

min_max_scaler = MinMaxScaler()
X = min_max_scaler.fit_transform(X)
model = AdaBoostClassifier()

model.fit(X_train,y_train)
preds = model.predict(X_test)
accuracy = accuracy_score(y_test, preds)
print(accuracy)

0.0


In [18]:
from xgboost import XGBClassifier

# Standardize the features (optional but recommended)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
model = XGBClassifier()
model.fit(X_train, y_train)
preds = model.predict(X_test)
accuracy_score(y_test, preds)

  from pandas import MultiIndex, Int64Index




0.0

In [19]:
import numpy as np
from sklearn.svm import SVC

# Create an SVM classifier
clf = SVC(kernel='linear', C=1.0)

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Evaluate the classifier's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')


Accuracy: 0.00


In [20]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=42)

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Evaluate the classifier on the test data (optional)
accuracy = clf.score(X_test, y_test)
print(f'Accuracy: {accuracy:.2f}')

# Get feature importances
feature_importances = clf.feature_importances_

# Print the importance of each feature
for feature, importance in zip(X.columns, feature_importances):
    print(f'{feature}: {importance:.4f}')

Accuracy: 0.50


AttributeError: 'numpy.ndarray' object has no attribute 'columns'

In [None]:
import tensorflow as tf
from tensorflow import keras

# Define a neural network model
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(len(class_labels), activation='softmax')  # Output layer with softmax activation
])
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

# Compile the model with categorical cross-entropy loss
model.compile(optimizer='adam',
              loss='categorical_crossentropy',  # Use categorical cross-entropy
              metrics=['accuracy'])

# Convert target labels to one-hot encoding
y_train_one_hot = keras.utils.to_categorical(y_train, len(class_labels))
y_test_one_hot = keras.utils.to_categorical(y_test, len(class_labels))

# Train the model
model.fit(X_train, y_train_one_hot, epochs=100, batch_size=64, validation_split=0.2, callbacks=[callback])

# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test, y_test_one_hot)
print(f'Test accuracy: {test_acc:.4f}')

# Make predictions
predictions = model.predict(X_test)

# Convert predicted labels back to string labels
predicted_labels = [class_labels[np.argmax(pred)] for pred in predictions]
predicted_labels_encoded = label_encoder.transform(predicted_labels)


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Assuming you have your data loaded as X_train, y_train, X_test, y_test
num_unique_values = X_train.shape[0]
input_sequence_length = 28
embedding_dim = 1
# Define the CNN model
model = keras.Sequential([
    layers.Embedding(input_dim=num_unique_values, output_dim=embedding_dim, input_length=input_sequence_length),
    layers.Conv1D(128, 5, activation='relu'),
    layers.MaxPooling1D(2),
    layers.Conv1D(64, 5, activation='relu'),
    layers.MaxPooling1D(2),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc:.4f}')

# Make predictions
predictions = model.predict(X_test)


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming y_test contains true labels and predicted_labels contains predicted labels
# These should be NumPy arrays or Python lists.

# Calculate the confusion matrix
conf_matrix = confusion_matrix(y_test, predicted_labels_encoded)

# Display the confusion matrix as a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# Replace these with your actual predicted and true labels
predicted_labels = y.unique()
true_labels = y.unique()

# Get the unique class names from the labels
class_names = np.unique(true_labels)

# Compute the confusion matrix
confusion = confusion_matrix(true_labels, predicted_labels, labels=class_names)

# Create a heatmap to visualize the confusion matrix
plt.figure(figsize=(8, 6))
sns.set(font_scale=1.2)
sns.heatmap(confusion, annot=True, fmt='d', cmap="Blues", xticklabels=class_names, yticklabels=class_names)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

# You can also print a classification report with precision, recall, and F1-score
print(classification_report(true_labels, predicted_labels, target_names=class_names))
