In [1]:
%pip install mne numpy scipy scikit-learn tensorflow keras colorlog shap

Collecting mne
  Downloading mne-1.7.1-py3-none-any.whl.metadata (13 kB)
Collecting colorlog
  Downloading colorlog-6.8.2-py3-none-any.whl.metadata (10 kB)
Collecting shap
  Downloading shap-0.46.0-cp311-cp311-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (24 kB)
Collecting pooch>=1.5 (from mne)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting slicer==0.0.8 (from shap)
  Downloading slicer-0.0.8-py3-none-any.whl.metadata (4.0 kB)
Downloading mne-1.7.1-py3-none-any.whl (7.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m62.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Downloading shap-0.46.0-cp311-cp311-manylinux_2_12_x86_64.manylinux2010_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (540 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m540.2/540.2 kB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownlo

In [2]:
import utils
import mne
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Conv2D, MaxPooling1D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
import numpy as np
import os
import shap
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

logger = utils.get_logger()

2024-08-18 19:29:50.864111: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-18 19:29:50.893151: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-18 19:29:50.901926: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-18 19:29:50.925325: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Neural networks
This notebook explores the usability of neural network architectures on the cleaned rsEEG data. This doesn't make use of the extracted features, which means that there is a larger chance of finding patterns in the raw data and might result in better generalizability to other patients.

In [3]:
labels_df = utils.get_metadata_df("dataset-cleaned", "Randomisatielijst.csv")
# patient_ids = ["02", "04", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18"]
patient_ids = ["08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18"]
train_patients, test_patients = train_test_split(patient_ids, test_size=0.2, random_state=42)

In [4]:
filtered_df = labels_df[
    (labels_df['eeg_type'] == 'rsEEG')
    & (labels_df['procedure'] == 'itbs')
    & (labels_df['patient_id'].isin(train_patients))]
epoch_files = filtered_df['filename'].tolist()
epochs_list = [mne.read_epochs(os.path.join("dataset-cleaned", file)) for file in epoch_files]
all_epochs = mne.concatenate_epochs(epochs_list)

# Training data & labels
X_1D = all_epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)
y_1D = np.array([0 if timing == 'pre' else 1 for timing in filtered_df['pre_post']])

# Fix length of data with labels
if len(X_1D) != len(y_1D):
    y_1D = np.repeat(y_1D, len(X_1D) // len(y_1D) + 1)[:len(X_1D)]
if len(X_1D) != len(y_1D):
    raise ValueError(f"Data cardinality is ambiguous: x sizes: {len(X_1D)}, y sizes: {len(y_1D)}")

Reading /project_ghent/Tomas_research/tms-research/dataset-cleaned/TMS-EEG-H_14_S2_rsEEG_post-epo.fif ...
    Found the data of interest:
        t =       0.00 ...    2000.00 ms
        0 CTF compensation matrices available
Not setting metadata
274 matching events found
No baseline correction applied
0 projection items activated
Reading /project_ghent/Tomas_research/tms-research/dataset-cleaned/TMS-EEG-H_11_S1_rsEEG_pre-epo.fif ...
    Found the data of interest:
        t =       0.00 ...    2000.00 ms
        0 CTF compensation matrices available
Not setting metadata
255 matching events found
No baseline correction applied
0 projection items activated
Reading /project_ghent/Tomas_research/tms-research/dataset-cleaned/TMS-EEG-H_18_S3_rsEEG_pre-epo.fif ...
    Found the data of interest:
        t =       0.00 ...    2000.00 ms
        0 CTF compensation matrices available
Not setting metadata
60 matching events found
No baseline correction applied
0 projection items activated
Reading

  all_epochs = mne.concatenate_epochs(epochs_list)


Not setting metadata
4073 matching events found
Applying baseline correction (mode: mean)


  X_1D = all_epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)


In [5]:
model_1D = Sequential([
    Conv1D(64, 3, activation='relu', input_shape=(X_1D.shape[1], X_1D.shape[2])),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.25),
    
    Conv1D(128, 3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.25),
    
    Conv1D(256, 3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(2),
    Dropout(0.25),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])

model_1D.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_1D.fit(X_1D, y_1D, epochs=30, batch_size=32)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-08-18 19:30:22.765253: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 10532 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:01:00.0, compute capability: 6.1
2024-08-18 19:30:22.766914: I tensorflow/core/common_runtime/gpu/gpu_device.cc:2021] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 10532 MB memory:  -> device: 1, name: NVIDIA GeForce GTX 1080 Ti, pci bus id: 0000:03:00.0, compute capability: 6.1


Epoch 1/30


I0000 00:00:1724009433.202322     691 service.cc:146] XLA service 0x7f89f80064d0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1724009433.202416     691 service.cc:154]   StreamExecutor device (0): NVIDIA GeForce GTX 1080 Ti, Compute Capability 6.1
I0000 00:00:1724009433.202424     691 service.cc:154]   StreamExecutor device (1): NVIDIA GeForce GTX 1080 Ti, Compute Capability 6.1
2024-08-18 19:30:33.307916: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-08-18 19:30:33.840070: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:531] Loaded cuDNN version 8907


[1m 11/128[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1s[0m 17ms/step - accuracy: 0.4587 - loss: 0.7735

I0000 00:00:1724009440.208303     691 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 60ms/step - accuracy: 0.4765 - loss: 0.8127
Epoch 2/30
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.5056 - loss: 0.7047
Epoch 3/30
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.5219 - loss: 0.6916
Epoch 4/30
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.5256 - loss: 0.6939
Epoch 5/30
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.5319 - loss: 0.6920
Epoch 6/30
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.5666 - loss: 0.6847
Epoch 7/30
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.5951 - loss: 0.6681
Epoch 8/30
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 11ms/step - accuracy: 0.5934 - loss: 0.6676
Epoch 9/30
[1m128/128[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x7f8cb4aef450>

In [10]:
# Currently, SHAP results in ResourceExhaustedError
USE_SHAP = False

if USE_SHAP:
    explainer = shap.DeepExplainer(model_1D, X_1D)
    shap_values_list = []

total_accuracy = []

for new_patient_id in test_patients:
    logger.info(f"Testing on patient {new_patient_id}")
    
    new_filtered_df = labels_df[
        (labels_df['procedure'] == 'itbs') 
        & (labels_df['eeg_type'] == 'rsEEG') 
        & (labels_df['patient_id'] == new_patient_id)
    ]
    new_epoch_files = new_filtered_df['filename'].tolist()
    new_epochs_list = [mne.read_epochs(os.path.join("dataset-cleaned", file)) for file in new_epoch_files]
    new_all_epochs = mne.concatenate_epochs(new_epochs_list)
    
    X_test = new_all_epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)
    y_test = np.array([0 if timing == 'pre' else 1 for timing in new_filtered_df['pre_post']])
    
    if len(X_test) != len(y_test):
        y_test = np.repeat(y_test, len(X_test) // len(y_test) + 1)[:len(X_test)]
    
    if len(X_test) != len(y_test):
        raise ValueError(f"Data cardinality is ambiguous: x sizes: {len(X_test)}, y sizes: {len(y_test)}")
    
    y_pred = model_1D.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int).flatten()
    
    accuracy = accuracy_score(y_test, y_pred)
    logger.info(f'Accuracy for patient {new_patient_id}: {accuracy}')
    total_accuracy.append(accuracy)
    
    if USE_SHAP:
        shap_values = explainer.shap_values(X_test)
        shap_values_list.append(shap_values)

average_accuracy = np.mean(total_accuracy)
logger.info(f'Total accuracy (averaged): {average_accuracy}')

if USE_SHAP:
    combined_shap_values = np.concatenate(shap_values_list, axis=0)
    shap.summary_plot(combined_shap_values, X_test)

[32m[2024-08-18 19:47:06,311] - INFO - Testing on patient 13[0m


Reading /project_ghent/Tomas_research/tms-research/dataset-cleaned/TMS-EEG-H_13_S2_rsEEG_pre-epo.fif ...
    Found the data of interest:
        t =       0.00 ...    2000.00 ms
        0 CTF compensation matrices available
Not setting metadata
298 matching events found
No baseline correction applied
0 projection items activated
Reading /project_ghent/Tomas_research/tms-research/dataset-cleaned/TMS-EEG-H_13_S2_rsEEG_post-epo.fif ...
    Found the data of interest:
        t =       0.00 ...    2000.00 ms
        0 CTF compensation matrices available
Not setting metadata
346 matching events found
No baseline correction applied
0 projection items activated


  new_all_epochs = mne.concatenate_epochs(new_epochs_list)


Not setting metadata
644 matching events found
Applying baseline correction (mode: mean)


  X_test = new_all_epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)


[1m21/21[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step


[32m[2024-08-18 19:47:09,950] - INFO - Accuracy for patient 13: 0.5170807453416149[0m
[32m[2024-08-18 19:47:09,951] - INFO - Testing on patient 08[0m


Reading /project_ghent/Tomas_research/tms-research/dataset-cleaned/TMS-EEG-H_08_S2_rsEEG_pre-epo.fif ...
    Found the data of interest:
        t =       0.00 ...    2000.00 ms
        0 CTF compensation matrices available
Not setting metadata
240 matching events found
No baseline correction applied
0 projection items activated
Reading /project_ghent/Tomas_research/tms-research/dataset-cleaned/TMS-EEG-H_08_S2_rsEEG_post-epo.fif ...
    Found the data of interest:
        t =       0.00 ...    2000.00 ms
        0 CTF compensation matrices available
Not setting metadata
317 matching events found
No baseline correction applied
0 projection items activated


  new_all_epochs = mne.concatenate_epochs(new_epochs_list)


Not setting metadata
557 matching events found
Applying baseline correction (mode: mean)


  X_test = new_all_epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step 


[32m[2024-08-18 19:47:12,916] - INFO - Accuracy for patient 08: 0.49730700179533216[0m
[32m[2024-08-18 19:47:12,917] - INFO - Testing on patient 17[0m


Reading /project_ghent/Tomas_research/tms-research/dataset-cleaned/TMS-EEG-H_17_S1_rsEEG_pre-epo.fif ...
    Found the data of interest:
        t =       0.00 ...    2000.00 ms
        0 CTF compensation matrices available
Not setting metadata
300 matching events found
No baseline correction applied
0 projection items activated
Reading /project_ghent/Tomas_research/tms-research/dataset-cleaned/TMS-EEG-H_17_S1_rsEEG_post-epo.fif ...
    Found the data of interest:
        t =       0.00 ...    2000.00 ms
        0 CTF compensation matrices available
Not setting metadata
258 matching events found
No baseline correction applied
0 projection items activated


  new_all_epochs = mne.concatenate_epochs(new_epochs_list)


Not setting metadata
558 matching events found
Applying baseline correction (mode: mean)


  X_test = new_all_epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)


[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step


[32m[2024-08-18 19:47:15,988] - INFO - Accuracy for patient 17: 0.4283154121863799[0m
[32m[2024-08-18 19:47:15,989] - INFO - Total accuracy (averaged): 0.4809010531077757[0m


## 2D Conv

In [11]:
X_2D = X_1D
y_2D = y_1D
X_CNN = X_2D.reshape(X_2D.shape[0], X_2D.shape[1], X_2D.shape[2], 1)

model_CNN = Sequential([
    Conv2D(64, (2, 2), activation='relu', input_shape=(X_CNN.shape[1], X_CNN.shape[2], X_CNN.shape[3])),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    
    Conv2D(128, (2, 2), activation='relu'),
    BatchNormalization(),
    MaxPooling2D((2, 2)),
    Dropout(0.25),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')
])
model_CNN.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model_CNN.fit(X_CNN, y_2D, epochs=10, batch_size=32)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-08-18 19:47:41.589068: W external/local_tsl/tsl/framework/bfc_allocator.cc:482] Allocator (GPU_0_bfc) ran out of memory trying to allocate 7.32GiB (rounded to 7860387840)requested by op StatelessRandomUniformV2
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2024-08-18 19:47:41.589285: I external/local_tsl/tsl/framework/bfc_allocator.cc:1039] BFCAllocator dump for GPU_0_bfc
2024-08-18 19:47:41.589325: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (256): 	Total Chunks: 1670, Chunks in use: 1670. 417.5KiB allocated for chunks. 417.5KiB in use in bin. 10.5KiB client-requested in use in bin.
2024-08-18 19:47:41.589348: I external/local_tsl/tsl/framework/bfc_allocator.cc:1046] Bin (512): 	Total Chunks: 22, Chunks in use: 22. 11.0KiB allocated f

ResourceExhaustedError: {{function_node __wrapped__StatelessRandomUniformV2_device_/job:localhost/replica:0/task:0/device:GPU:0}} OOM when allocating tensor with shape[15352320,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:StatelessRandomUniformV2] name: 

In [None]:
explainer = shap.DeepExplainer(model, X_1D)
shap_values_list = []
total_accuracy = []

for new_patient_id in test_patients:
    logger.info(f"Testing on patient {new_patient_id}")
    new_filtered_df = labels_df[(labels_df['procedure'] == 'itbs') & (labels_df['eeg_type'] == 'rsEEG') & (labels_df['patient_id'] == new_patient_id)]
    new_epoch_files = new_filtered_df['filename'].tolist()
    new_epochs_list = [mne.read_epochs(os.path.join("dataset-cleaned", file)) for file in new_epoch_files]
    new_all_epochs = mne.concatenate_epochs(new_epochs_list)
    
    X_test = new_all_epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)
    y_test = np.array([0 if timing == 'pre' else 1 for timing in new_filtered_df['pre_post']])
    
    # Ensure y has the same number of samples as X
    if len(X_test) != len(y_test):
        y_test = np.repeat(y_test, len(X_test) // len(y_test) + 1)[:len(X_test)]
    
    # Check if the lengths match
    if len(X_test) != len(y_test):
        raise ValueError(f"Data cardinality is ambiguous: x sizes: {len(X_test)}, y sizes: {len(y_test)}")
    
    y_pred = model.predict(X_test)
    y_pred = (y_pred > 0.5).astype(int).flatten()
    
    accuracy = accuracy_score(y_test, y_pred)
    logger.info(f'Accuracy for patient {new_patient_id}: {accuracy}')
    total_accuracy.append(accuracy)

    shap_values = explainer.shap_values(X_test)
    shap_values_list.append(shap_values)

average_accuracy = np.mean(total_accuracy)
logger.info(f'Total accuracy (averaged): {average_accuracy}')

combined_shap_values = np.concatenate(shap_values_list, axis=0)
shap.summary_plot(combined_shap_values, X_test)

## XAI

In [7]:
import shap
import numpy as np

explainer = shap.DeepExplainer(model_1D, X_1D[:100])
shap_values = explainer.shap_values(X_test[:10])
shap.summary_plot(shap_values, X_test)


total_accuracy = []
for new_patient_id in test_patients:
    logger.info(f"Testing on patient {new_patient_id}")
    new_filtered_df = labels_df[(labels_df['procedure'] == 'itbs') & (labels_df['eeg_type'] == 'rsEEG') & (labels_df['patient_id'] == new_patient_id)]
    new_epoch_files = new_filtered_df['filename'].tolist()
    new_epochs_list = [mne.read_epochs(os.path.join("dataset-cleaned", file)) for file in new_epoch_files]
    new_all_epochs = mne.concatenate_epochs(new_epochs_list)
    
    X_test = new_all_epochs.get_data()  # Shape: (n_epochs, n_channels, n_times)
    y_test = np.array([0 if timing == 'pre' else 1 for timing in new_filtered_df['pre_post']])
    
    # Ensure y has the same number of samples as X
    if len(X_test) != len(y_test):
        y_test = np.repeat(y_test, len(X_test) // len(y_test) + 1)[:len(X_test)]
    
    # Check if the lengths match
    if len(X_test) != len(y_test):
        raise ValueError(f"Data cardinality is ambiguous: x sizes: {len(X_test)}, y sizes: {len(y_test)}")

W0000 00:00:1723752777.443875     584 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1723752777.477465     584 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1723752777.478860     584 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1723752777.480710     584 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1723752777.482085     584 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1723752777.483750     584 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1723752777.485554     584 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1723752777.488094     584 gpu_timer.cc:114] Skipping the delay kernel, measurement accuracy will be reduced
W0000 00:00:1723752777.489782     584 gp

NameError: name 'X_test' is not defined