In [1]:
%%capture
import os
import pandas as pd
import numpy as np
import mne
from mne.preprocessing import ICA
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from scipy.signal import welch

In [2]:
%%capture
# Define a function to extract labels from filenames
def extract_label_from_filename(filename):
    # Assuming filenames are in the format: subject_task.bdf
    task = filename.split('_')[1].split('.')[0].split('-')[1]
    return task

In [3]:
%%capture
#Convert to DataFrame
def convert_bdf_to_dataframe(bdf_filename):
    
    # Loading Data
    raw_data = mne.io.read_raw_bdf(bdf_filename, preload=True)
    ## raw_data._data = raw_data._data ** 2
    
    # ICA
    n_components = 15
    ica = ICA(n_components=n_components, random_state=97, max_iter=800)
    ica.fit(raw_data)
    
    # Exclude components
    components_to_exclude = [7, 9]
    raw_cleaned = ica.apply(raw_data.copy(), exclude=components_to_exclude)
    
    # convert to dataframe
    eeg_data_raw = raw_cleaned.get_data()
    channel_names = raw_cleaned.ch_names
    time_index = raw_cleaned.times
        
    eeg_data = pd.DataFrame(data=eeg_data_raw.T, columns=channel_names, index=time_index)
    col_names = ["AF3", "AF4", "P7", "P8", "FC5", "FC6", "T7", "T8", "Fp1", "Fp2", "Fpz", "O1", "O2"]
    eeg_data = eeg_data[col_names]
    
    # Group by each second
    segment_size = 1024
    num_segments = len(eeg_data) // segment_size
    reduced_df = pd.DataFrame(columns=eeg_data.columns)
    for i in range(num_segments):
        start_idx = i * segment_size
        end_idx = (i + 1) * segment_size
        segment_data = eeg_data.iloc[start_idx:end_idx]    
        mean_values = segment_data.mean()
        reduced_df = reduced_df.append(mean_values, ignore_index=True)

    return reduced_df, raw_data

In [4]:
req_cols = ["AF3", "AF4", "P7", "P8", "FC5", "FC6", "T7", "T8", "Fp1", "Fp2", "Fpz", "O1", "O2"]
main_df_cols = []

for col in req_cols:
    if col == 'O1' or col == 'O2':
        main_df_cols.append(f'{col} gamma mean')
        main_df_cols.append(f'{col} gamma psd_mean')
    else:
        main_df_cols.append(f'{col} alpha mean')
        main_df_cols.append(f'{col} alpha psd_mean')
        main_df_cols.append(f'{col} beta mean')
        main_df_cols.append(f'{col} beta psd_mean')
main_df_cols.append('task')
        
main_df = pd.DataFrame(columns=main_df_cols)

In [5]:
main_df

Unnamed: 0,AF3 alpha mean,AF3 alpha psd_mean,AF3 beta mean,AF3 beta psd_mean,AF4 alpha mean,AF4 alpha psd_mean,AF4 beta mean,AF4 beta psd_mean,P7 alpha mean,P7 alpha psd_mean,...,Fp2 beta psd_mean,Fpz alpha mean,Fpz alpha psd_mean,Fpz beta mean,Fpz beta psd_mean,O1 gamma mean,O1 gamma psd_mean,O2 gamma mean,O2 gamma psd_mean,task


In [6]:
def extract_mean_and_psd_mean(reduced_decomp_df, label):
    values = []
    req_channels = reduced_decomp_df.columns
    for channel in req_channels:
        mean_val = reduced_decomp_df[channel].mean()
        _, psd = welch(reduced_decomp_df[channel], fs=256)
        values.append(mean_val)
        values.append(psd.mean())
    values.append(label)
    main_df.loc[main_df.shape[0]] = values

In [7]:
data_dir1 = 'C:\Dataset_meditation\Dataset-1'# Replace with the path to your .bdf data directory (/Dataset - 3/)
data_dir2 = 'C:\Dataset_meditation\Dataset-2'

In [8]:
%%capture

# Initialize empty lists to store data and labels
data = []
labels = []
raw_eegdata = []

# Loop through all files in the directory
for filename in os.listdir(data_dir1):
    if filename.endswith('.bdf'):
        # Convert the .bdf file to a DataFrame
        eeg_data, eeg_raw = convert_bdf_to_dataframe(os.path.join(data_dir1, filename))
        
        # Extract labels from filenames
        label = extract_label_from_filename(filename)
#         # Append data and labels
        raw_eegdata.append(eeg_raw)
    
        channel_name_1 = ['O1', 'O2']
        channel_name_2 = ["AF3", "AF4", "P7", "P8", "FC5", "FC6", "T7", "T8", "Fp1", "Fp2", "Fpz"]  # Add more channel names as needed

#         # Create dictionaries to store the filtered data
        eeg_dataframe = pd.DataFrame()


        # Loop through each channel and filter data
        for channel_name in channel_name_2:
            eeg_channel = eeg_raw.copy().pick_channels([channel_name])
    
            # Filter for Alpha (8-13 Hz)
            alpha_filtered = eeg_channel.filter(l_freq=8, h_freq=13)
            alpha_decomp = alpha_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} alpha'] = alpha_decomp
    
            # Filter for Beta (13-30 Hz)
            beta_filtered = eeg_channel.filter(l_freq=13, h_freq=30)
            beta_decomp = beta_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} beta'] = beta_decomp
    
        for channel_name in channel_name_1:
            eeg_channel = eeg_raw.copy().pick_channels([channel_name])
    
            # Filter for Gamma (30-40 Hz)
            gamma_filtered = eeg_channel.filter(l_freq=30, h_freq=40)
            gamma_decomp = gamma_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} gamma'] = gamma_decomp

        # Grouping data for each second
        segment_size = 1024
        num_segments = len(eeg_dataframe) // segment_size
        reduced_decomp_df = pd.DataFrame(columns=eeg_dataframe.columns)
        for i in range(num_segments):
            start_idx = i * segment_size
            end_idx = (i + 1) * segment_size
            segment_data = eeg_dataframe.iloc[start_idx:end_idx]    
            mean_values = segment_data.mean()
            sampling_frequency = 1024  
            reduced_decomp_df = reduced_decomp_df.append(mean_values, ignore_index=True)
        
        extract_mean_and_psd_mean(reduced_decomp_df, label)

In [9]:
main_df.to_csv('main_df3.csv')

In [10]:
%%capture

# Initialize empty lists to store data and labels
data = []
labels = []
raw_eegdata = []

for filename in os.listdir(data_dir2):
    if filename.endswith('.bdf'):
        # Convert the .bdf file to a DataFrame
        eeg_data, eeg_raw = convert_bdf_to_dataframe(os.path.join(data_dir2, filename))
        
        # Extract labels from filenames
        label = extract_label_from_filename(filename)
#         # Append data and labels
        raw_eegdata.append(eeg_raw)
    
        channel_name_1 = ['O1', 'O2']
        channel_name_2 = ["AF3", "AF4", "P7", "P8", "FC5", "FC6", "T7", "T8", "Fp1", "Fp2", "Fpz"]  # Add more channel names as needed

#         # Create dictionaries to store the filtered data
        eeg_dataframe = pd.DataFrame()


        # Loop through each channel and filter data
        for channel_name in channel_name_2:
            eeg_channel = eeg_raw.copy().pick_channels([channel_name])
    
            # Filter for Alpha (8-13 Hz)
            alpha_filtered = eeg_channel.filter(l_freq=8, h_freq=13)
            alpha_decomp = alpha_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} alpha'] = alpha_decomp
    
            # Filter for Beta (13-30 Hz)
            beta_filtered = eeg_channel.filter(l_freq=13, h_freq=30)
            beta_decomp = beta_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} beta'] = beta_decomp
    
        for channel_name in channel_name_1:
            eeg_channel = eeg_raw.copy().pick_channels([channel_name])
    
            # Filter for Gamma (30-40 Hz)
            gamma_filtered = eeg_channel.filter(l_freq=30, h_freq=40)
            gamma_decomp = gamma_filtered.get_data()[0]
            eeg_dataframe[f'{channel_name} gamma'] = gamma_decomp

        # Grouping data for each second
        segment_size = 1024
        num_segments = len(eeg_dataframe) // segment_size
        reduced_decomp_df = pd.DataFrame(columns=eeg_dataframe.columns)
        for i in range(num_segments):
            start_idx = i * segment_size
            end_idx = (i + 1) * segment_size
            segment_data = eeg_dataframe.iloc[start_idx:end_idx]    
            mean_values = segment_data.mean()
            sampling_frequency = 1024  
            reduced_decomp_df = reduced_decomp_df.append(mean_values, ignore_index=True)
        
        extract_mean_and_psd_mean(reduced_decomp_df, label)

In [11]:
main_df.to_csv('main_df1.csv')

In [12]:
df1 = pd.read_csv('main_df3.csv')

# Load the second CSV file into another DataFrame
df2 = pd.read_csv('main_df1.csv')

# Append df2 to df1
merged_df = pd.concat([df1, df2], ignore_index=True)

# Save the merged DataFrame to a new CSV file
merged_df.to_csv('merged_dataset_final_base.csv', index=False)

In [13]:
main_df = pd.read_csv('merged_dataset_final_base.csv')

In [14]:
main_df.drop('Unnamed: 0', axis = 1, inplace = True)

In [15]:
main_df

Unnamed: 0,AF3 alpha mean,AF3 alpha psd_mean,AF3 beta mean,AF3 beta psd_mean,AF4 alpha mean,AF4 alpha psd_mean,AF4 beta mean,AF4 beta psd_mean,P7 alpha mean,P7 alpha psd_mean,...,Fp2 beta psd_mean,Fpz alpha mean,Fpz alpha psd_mean,Fpz beta mean,Fpz beta psd_mean,O1 gamma mean,O1 gamma psd_mean,O2 gamma mean,O2 gamma psd_mean,task
0,-5.402378e-10,5.356978e-16,3.673941e-11,1.156484e-17,-5.923319e-10,5.714380e-16,2.194584e-11,1.374483e-17,4.221641e-10,1.148155e-16,...,1.361871e-17,-6.629276e-10,5.598126e-16,-1.856752e-11,1.266935e-17,1.173535e-11,7.829113e-19,2.228500e-11,3.484778e-19,med1breath
1,-4.836307e-10,1.463762e-16,-6.057227e-12,9.529278e-18,-5.647640e-10,1.789775e-16,4.804241e-11,1.248500e-17,9.497007e-11,4.112825e-17,...,1.208274e-17,-4.997515e-10,1.624686e-16,4.063321e-11,1.084634e-17,5.975980e-11,4.053926e-19,6.038006e-11,3.823864e-19,med2
2,-9.673828e-10,7.897841e-16,1.744998e-11,1.176160e-17,-1.022646e-09,8.437659e-16,-2.244134e-12,1.417950e-17,-1.833288e-10,1.492559e-16,...,1.288615e-17,-9.218771e-10,8.113731e-16,8.156524e-12,1.237399e-17,3.610567e-11,4.553950e-19,8.135557e-12,9.183567e-19,think1
3,5.053254e-10,5.912828e-16,-6.295857e-11,8.884157e-18,5.618380e-10,6.208325e-16,-6.218825e-11,1.151538e-17,2.379154e-10,1.015717e-16,...,1.018916e-17,6.104279e-10,6.253911e-16,-9.659438e-11,9.820985e-18,7.245244e-11,5.911956e-19,1.110158e-10,7.396388e-19,think2
4,-4.873027e-10,1.139989e-15,-1.993154e-10,1.929155e-17,-4.406758e-10,1.123961e-15,-9.637966e-11,1.963047e-17,4.936242e-10,5.143707e-16,...,1.720036e-17,-4.561655e-10,1.121919e-15,-1.240375e-10,1.678800e-17,-5.787061e-11,1.017307e-18,3.224554e-11,2.436137e-18,med1breath
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
583,-1.098158e-10,8.828169e-17,-1.552929e-10,9.390739e-18,-5.022041e-11,9.661515e-17,-1.501402e-10,1.003571e-17,1.001916e-10,6.353042e-17,...,1.066868e-17,-2.964541e-11,1.033703e-16,-1.440504e-10,1.077706e-17,4.599238e-11,7.205702e-17,1.189598e-12,1.767439e-18,think2
584,-6.446508e-10,1.673368e-15,2.044505e-10,1.212702e-17,-5.760835e-10,1.803364e-15,1.918649e-10,1.436054e-17,-6.191673e-11,2.056511e-16,...,1.304408e-17,-5.207523e-10,1.795228e-15,1.832008e-10,1.214871e-17,-3.354180e-11,5.831825e-19,-3.003426e-11,6.146522e-19,med1breath
585,5.076985e-11,2.968612e-16,-3.102505e-10,2.533674e-17,1.703507e-10,3.277631e-16,-3.748135e-10,4.062819e-17,-2.202837e-10,8.033518e-17,...,3.754514e-17,5.139101e-11,3.255343e-16,-3.669139e-10,3.408049e-17,-4.131331e-11,4.771140e-19,-3.894485e-11,4.450681e-19,med2
586,-5.923139e-10,6.431642e-16,-1.658459e-10,1.260369e-17,-5.512093e-10,7.196533e-16,-1.568048e-10,1.561231e-17,-7.559385e-10,1.400364e-16,...,1.304629e-17,-5.783068e-10,6.812808e-16,-1.725153e-10,1.257172e-17,-1.816352e-10,5.996827e-19,-2.015333e-10,5.794415e-19,think1


In [16]:
X = main_df.drop('task', axis = 1)
y = main_df['task']

In [17]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score


min_max_scaler = MinMaxScaler()
X_min = min_max_scaler.fit_transform(X)

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
class_labels = label_encoder.classes_

X_train, X_test, y_train, y_test = train_test_split(X_min, y_encoded, test_size=0.1, random_state=42)

In [18]:
from sklearn.linear_model import LogisticRegression
# Create a Random Forest classifier (you can try other classifiers as well)
clf = LogisticRegression(random_state=11)

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

accuracy_score(y_test, y_pred)

0.288135593220339

In [19]:
from sklearn.ensemble import AdaBoostClassifier

min_max_scaler = MinMaxScaler()
X = min_max_scaler.fit_transform(X)
model = AdaBoostClassifier()

model.fit(X_train,y_train)
preds = model.predict(X_test)
accuracy = accuracy_score(y_test, preds)
print(accuracy)

0.3898305084745763


In [20]:
pip install xgboost

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [21]:
from xgboost import XGBClassifier

# Standardize the features (optional but recommended)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
model = XGBClassifier()
model.fit(X_train, y_train)
preds = model.predict(X_test)
accuracy_score(y_test, preds)

0.711864406779661

In [22]:
import numpy as np
from sklearn.svm import SVC

# Create an SVM classifier
clf = SVC(kernel='linear', C=1.0)

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Make predictions on the test data
y_pred = clf.predict(X_test)

# Evaluate the classifier's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.37


In [23]:
from sklearn.tree import DecisionTreeClassifier

clf = DecisionTreeClassifier(random_state=42)

# Train the classifier on the training data
clf.fit(X_train, y_train)

# Evaluate the classifier on the test data (optional)
accuracy = clf.score(X_test, y_test)
print(f'Accuracy: {accuracy:.2f}')

# Get feature importances
feature_importances = clf.feature_importances_

# Print the importance of each feature
for feature, importance in zip(X.columns, feature_importances):
    print(f'{feature}: {importance:.4f}')

Accuracy: 0.71


AttributeError: 'numpy.ndarray' object has no attribute 'columns'

In [None]:
import tensorflow as tf
from tensorflow import keras

# Define a neural network model
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(len(class_labels), activation='softmax')  # Output layer with softmax activation
])
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

# Compile the model with categorical cross-entropy loss
model.compile(optimizer='adam',
              loss='categorical_crossentropy',  # Use categorical cross-entropy
              metrics=['accuracy'])

# Convert target labels to one-hot encoding
y_train_one_hot = keras.utils.to_categorical(y_train, len(class_labels))
y_test_one_hot = keras.utils.to_categorical(y_test, len(class_labels))

# Train the model
model.fit(X_train, y_train_one_hot, epochs=100, batch_size=64, validation_split=0.2, callbacks=[callback])

# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test, y_test_one_hot)
print(f'Test accuracy: {test_acc:.4f}')

# Make predictions
predictions = model.predict(X_test)

# Convert predicted labels back to string labels
predicted_labels = [class_labels[np.argmax(pred)] for pred in predictions]
predicted_labels_encoded = label_encoder.transform(predicted_labels)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Assuming you have your data loaded as X_train, y_train, X_test, y_test
num_unique_values = X_train.shape[0]
input_sequence_length = 28
embedding_dim = 1
# Define the CNN model
model = keras.Sequential([
    layers.Embedding(input_dim=num_unique_values, output_dim=embedding_dim, input_length=input_sequence_length),
    layers.Conv1D(128, 5, activation='relu'),
    layers.MaxPooling1D(2),
    layers.Conv1D(64, 5, activation='relu'),
    layers.MaxPooling1D(2),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, validation_split=0.2)

# Evaluate the model on the test data
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f'Test accuracy: {test_acc:.4f}')

# Make predictions
predictions = model.predict(X_test)

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Assuming y_test contains true labels and predicted_labels contains predicted labels
# These should be NumPy arrays or Python lists.

# Calculate the confusion matrix
conf_matrix = confusion_matrix(y_test, predicted_labels_encoded)

# Display the confusion matrix as a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

# Replace these with your actual predicted and true labels
predicted_labels = y.unique()
true_labels = y.unique()

# Get the unique class names from the labels
class_names = np.unique(true_labels)

# Compute the confusion matrix
confusion = confusion_matrix(true_labels, predicted_labels, labels=class_names)

# Create a heatmap to visualize the confusion matrix
plt.figure(figsize=(8, 6))
sns.set(font_scale=1.2)
sns.heatmap(confusion, annot=True, fmt='d', cmap="Blues", xticklabels=class_names, yticklabels=class_names)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

# You can also print a classification report with precision, recall, and F1-score
print(classification_report(true_labels, predicted_labels, target_names=class_names))