In [None]:
%pip install mne-icalabel

In [1]:
# Imports
import os
from ipywidgets import *
import numpy as np
import mne
from mne.preprocessing import ICA
from mne_icalabel import label_components
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d

import utils

# Specify graph rendering method
# %matplotlib widget
plt.switch_backend("TkAgg")

In [2]:
DATASET_PATH = "./dataset"
FILENAME_TEMPLATE = "TMS-EEG-H_02_S1b_{}_{}.vhdr"

spTEP_pre_raw = mne.io.read_raw_brainvision(
    os.path.join(DATASET_PATH, FILENAME_TEMPLATE.format("spTEP", "pre")), preload=True
)
sampling_rate = spTEP_pre_raw.info["sfreq"]
events, event_dict = mne.events_from_annotations(spTEP_pre_raw)
tms_indices = [event[0] for event in events if event[2] == 1]

rsEEG_pre_raw = mne.io.read_raw_brainvision(
    os.path.join(DATASET_PATH, FILENAME_TEMPLATE.format("rsEEG", "pre")), preload=True
)

Extracting parameters from ./dataset/TMS-EEG-H_02_S1b_spTEP_pre.vhdr...
Setting channel info structure...
Reading 0 ... 2696199  =      0.000 ...   539.240 secs...
Used Annotations descriptions: ['New Segment/', 'Stimulus/S  1']
Extracting parameters from ./dataset/TMS-EEG-H_02_S1b_rsEEG_pre.vhdr...
Setting channel info structure...
Reading 0 ... 3984899  =      0.000 ...   796.980 secs...


In [3]:
# Plotting utilities
def plot_single_response(eeg_data, channel="Pz", tmin=-0.005, tmax=0.01):
    events, event_dict = mne.events_from_annotations(eeg_data)
    event_id = event_dict["Stimulus/S  1"]
    epochs = mne.Epochs(
        eeg_data,
        events,
        event_id=event_id,
        tmin=tmin,
        tmax=tmax,
        baseline=None,
        preload=True,
        picks=channel,
    )

    epochs.plot(picks=channel, n_epochs=1, show=True, scalings={"eeg": 50e-4})


def plot_average_epoch(epochs, start=-0.05, end=0.25):
    data = epochs.get_data()
    mean_responses = np.mean(data, axis=0)
    time_points = np.linspace(-1, 1, data.shape[2])
    selected_indices = np.where((time_points >= start) & (time_points <= end))
    for i, mean_response in enumerate(mean_responses):
        selected_data = mean_response[selected_indices]
        selected_time_points = time_points[selected_indices]
        plt.plot(selected_time_points, selected_data, label=f"Channel {i+1}")
    plt.xlabel("Time points")
    plt.ylabel("Mean response")
    plt.show()


def plot_response(eeg):
    utils.plot_average_response(eeg, tmin=-0.05, tmax=0.25)  # Check full response
    utils.plot_single_response(
        eeg, channel="Pz", tmin=-0.05, tmax=0.05
    )  # Check for TMS pulse

# Cleaning - spTEP

The paper of Bertazzoli et al. (2021) compares 4 pipelines: ARTIST, TMSEEG, TESA and SOUND-SSP-SIR, all of which work decently well in varying degrees. There are common steps, but TESA will be the one that will be most closely followed. The current steps are as follows:

1. Remove EOG
2. Remove TMS pulse
3. Downsample
4. **ICA - 1**
5. Bandpass - Notch filters
6. **ICA - 2**
7. Rereference

Currently, there is no demeaning or bad channel rejection present as in TESA. Demeaning is done before the TMS-pulse interpolation, and baseline correction should be done as last step after rereferencing.


In [None]:
plot_response(spTEP_pre_raw)

![](img/raw-average.png)
![](img/single-pulse-raw.JPG)

The figure above already shows 2 major artifacts that cleaning will have the biggest impact on: TMS-pulse interpolation and demeaning/baseline correction. The major TMS-pulse artifact falls closely within the range of 2ms before until 5ms after the pulse.


In [None]:
spTEP_copy = spTEP_pre_raw.copy()

## EOG removal


In [7]:
def remove_EOG(eeg_data):
    eeg_data.drop_channels(["HEOG", "VEOG"])

In [None]:
remove_EOG(spTEP_copy)

## TMS pulse removal


In [None]:
def calculate_range_indices(tms_index, start, end, sampling_rate):
    """
    start and end are positive in seconds
    sampling rate in Hz
    """
    samples_before = int(start * sampling_rate)
    samples_after = int(end * sampling_rate)

    start_index = max(0, tms_index - samples_before)
    end_index = tms_index + samples_after

    return start_index, end_index

In [None]:
def interpolate_TMS_pulse(eeg_data_raw, tms_indices, start, end, sampling_rate):
    eeg_data = eeg_data_raw.get_data()
    num_electrodes = eeg_data.shape[0]
    for tms_index in tms_indices:
        start_index, end_index = calculate_range_indices(
            tms_index, start, end, sampling_rate
        )
        for i in range(num_electrodes):
            x = [start_index - 2, start_index - 1, end_index + 1, end_index + 2]
            y = [
                eeg_data[i, start_index - 2],
                eeg_data[i, start_index - 1],
                eeg_data[i, end_index + 1],
                eeg_data[i, end_index + 2],
            ]
            x_new = np.arange(start_index, end_index + 1)

            interp_func = interp1d(x, y, kind="cubic")
            eeg_data[i, start_index : end_index + 1] = interp_func(x_new)

    eeg_data_raw._data = eeg_data

In [None]:
interpolate_TMS_pulse(
    spTEP_copy, tms_indices, 0.005, 0.01, sampling_rate
)  # 2ms before, 5ms after

In [None]:
plot_response(spTEP_copy)

![](img/interpolation-5-10/interpolated-average.png)
![](img/interpolation-5-10/single-pulse-interpolated.JPG)


This succesfully removed the TMS pulse artifact by using cubic interpolation based on the 2 values before and 2 values after the range that is to be interpolated.


## Downsampling

The original data was captured with a sampling frequency of 5000 Hz. 1000 Hz is chosen as the frequency to be downsampled to, as this means that, following Nyquists theorem, the highest frequency that will be accurately recorded is 500 Hz, which should be more than enough for further analysis, as the gamma band is often referred to as 30-100 Hz.


In [12]:
def downsample(eeg_data, sample_rate=1000):
    eeg_data.resample(sample_rate, npad="auto")

In [None]:
downsample(spTEP_copy)

In [None]:
plot_response(spTEP_copy)

![](img/interpolation-5-10/single-downsampled.JPG)


## Epoching


In [None]:
def epoching(eeg_data):
    events, event_dict = mne.events_from_annotations(eeg_data)
    event_id = event_dict["Stimulus/S  1"]
    epochs = mne.Epochs(
        eeg_data,
        events,
        event_id=event_id,
        tmin=-1,
        tmax=1,
        baseline=None,
        preload=True,
    )
    return epochs

In [None]:
epochs = epoching(spTEP_copy)

In [None]:
plot_average_epoch(epochs)

![](img/interpolation-5-10/average_epoch.png)


## Demeaning/detrending

Demeaning is achieved by subtracting each value from each electrode with the average value of the corresponding electrode, essentially bringing the means from all electrodes to 0.

> TODO: check if other way of demeaning on complete electrode is possible to move value near 0 or better yet on 0


In [None]:
def demean(eeg_data):
    eeg_data.apply_function(lambda x: x - x.mean())

In [None]:
demean(spTEP_copy)

In [None]:
data = spTEP_copy.get_data()
mean_values = np.mean(data, axis=1)
total_mean = np.mean(mean_values)
print(f"Total mean: {total_mean}")

plot_response(spTEP_copy)

In [None]:
def demean_epochs(epochs):
    data = epochs.get_data()
    demeaned_data = data - np.mean(data, axis=2, keepdims=True)
    demeaned_epochs = mne.EpochsArray(
        demeaned_data, epochs.info, events=epochs.events, event_id=epochs.event_id
    )
    return demeaned_epochs

In [None]:
epochs = demean_epochs(epochs)

In [None]:
plot_average_epoch(epochs)

Below are two graphs with demeaning applied to both the full electrode and to individual epochs. Clearly, there has to be some error in demeaning the full electrode, as those averages aren't close enough to 0. However, when printing the mean values of the electrodes they are indeed close to 0 (e.g. 1\*e-20). For now, as the demeaned epochs are the desired result, the epochs will be used for further cleaning.

![](img/interpolation-5-10/average-demean.png)
![](img/interpolation-5-10/average_epoch_demeaned.png)


## ICA - 1

The first ICA filter is mainly to remove the primary large artifacts such as muscle and electrical charge. If demeaning were applied now, a graph as below is the result.

This is implemented by first fitting ICA to the signal, and then applying the threshold formula used by the TESA software to each component to either keep or remove each ICA component.


In [None]:
def ICA_1(epoch_data, T=3.5, b1=0.011, b2=0.030, n_components=20):
    ica = ICA(n_components=n_components, random_state=97)
    ica.fit(epoch_data)

    # Credits to Arne Callaert for the following code
    sources = ica.get_sources(epoch_data)
    averaged_sources = sources.get_data().mean(axis=0)
    times = sources.times
    sfreq = sources.info["sfreq"]
    indices = np.where((times >= (b1 / 1000)) & (times <= (b2 / 1000)))
    print("indices:", indices)
    components_to_remove = []

    for i, component in enumerate(averaged_sources):
        base = len(times) / 2
        b1_index = int(base + (b1 * sfreq))
        b2_index = int(base + (b2 * sfreq))
        x = np.mean(np.abs(component[b1_index:b2_index]))
        y = np.mean(np.abs(component))
        if x / y > T:
            print("FOUND:", x / y)
            components_to_remove.append(i)

    ica.exclude = components_to_remove

    epoch_data = ica.apply(epoch_data)

In [None]:
ICA_1(epochs)

In [None]:
plot_average_epoch(epochs)

![](img/interpolation-5-10/average_epoch_ica_1.png)
The graph below shows the result after applying the ICA filter. However, this looks like it results in muddier values than before. The goal was to filter out the initial peak values, residue from the TMS pulse, but these are still present.


## Bandpass - Notch


In [None]:
def bandpass_notch(epoch_data, low_freq=1, high_freq=100, notch_freqs=[50]):
    # Bandpass
    epoch_data.filter(low_freq, high_freq)

    # Notch (only directly available on raw object, not on epochs)
    data = epoch_data.get_data()
    notch_filtered = mne.filter.notch_filter(data, epochs.info["sfreq"], notch_freqs)
    filtered_epochs = mne.epochs.EpochsArray(
        notch_filtered, epochs.info, events=epochs.events, tmin=epochs.tmin
    )

    return filtered_epochs

In [None]:
epochs = bandpass_notch(epochs)

In [None]:
plot_average_epoch(epochs)

![](img/interpolation-5-10/average_epoch_filter.png)


## Rereference


In [None]:
def rereference(epochs):
    mne.set_eeg_reference(epochs, ref_channels="average")

In [None]:
rereference(epochs)

In [None]:
plot_average_epoch(epochs)

![](img/interpolation-5-10/average_epoch_rereference.png)


## ICA - 2

Li et al., (2022). MNE-ICALabel: Automatically annotating ICA components with ICLabel in Python. Journal of Open Source Software, 7(76), 4484, https://doi.org/10.21105/joss.04484


In [None]:
def ICA_2(epoch_data):
    ica = mne.preprocessing.ICA(n_components=20, random_state=42)
    ica.fit(epoch_data)
    ic_labels = label_components(epoch_data, ica, method="iclabel")

    print(ic_labels["labels"])

    labels = ic_labels["labels"]
    exclude_idx = [
        idx for idx, label in enumerate(labels) if label not in ["brain", "other"]
    ]
    print(f"Excluding these {len(exclude_idx)} ICA components: {exclude_idx}")

    ica.apply(epoch_data, exclude=exclude_idx)

In [None]:
epochs_copy = epochs.copy()

In [None]:
ICA_2(epochs_copy)

In [None]:
plot_average_epoch(epochs_copy)

![ica 2 result](img/interpolation-5-10/average_epoch_ica_2.png)


## Baseline correction


In [None]:
def baseline(epoch_data):
    epoch_data.apply_baseline((None, None))

In [None]:
baseline(epochs_copy)

In [None]:
plot_average_epoch(epochs_copy)

![](img/interpolation-5-10/average_epoch_baseline.png)


In [None]:
spTEP_pre_copy = spTEP_pre_raw.copy()
remove_EOG(spTEP_pre_copy)

spTEP_pre_copy.compute_psd().plot_topomap()

In [None]:
spTEP_pre_raw.plot_sensors(show_names=True)

In [None]:
def plot_full_average_epoch(epochs, electrodes=None, start=-0.05, end=0.25):
    epochs = epochs.copy()
    if electrodes is not None:
        epochs.pick_channels(electrodes)
    data = epochs.get_data()
    mean_responses = np.mean(data, axis=(0, 1))
    sem_responses = np.std(data, axis=(0, 1)) / np.sqrt(data.shape[0])
    time_points = np.linspace(-1, 1, data.shape[2])
    selected_indices = np.where((time_points >= start) & (time_points <= end))
    selected_data = mean_responses[selected_indices]
    selected_sem = sem_responses[selected_indices]
    selected_time_points = time_points[selected_indices]
    plt.plot(selected_time_points, selected_data, label="Average of all electrodes")
    plt.fill_between(
        selected_time_points,
        selected_data - selected_sem,
        selected_data + selected_sem,
        color="b",
        alpha=0.2,
    )
    plt.xlabel("Time points")
    plt.ylabel("Mean response")
    plt.legend()
    plt.show()

## Final result

In these final plots, the total average is plotted with the error range.


In [None]:
# F, FC
# 3, 5, 7

plot_full_average_epoch(epochs_copy, end=0.3)
plot_full_average_epoch(epochs_copy, ['F3', 'FC5', 'FC1'], end=0.3)

![](img/interpolation-5-10/average_epoch_all_electrodes.png)
![](img/interpolation-5-10/final-local.png)

In [None]:
# Save the epochs to a file
epochs_copy.save("processed_epochs-epo.fif")

## TODO

Current biggest things to find out:

- is there a way to further improve the filtering that ICA 1 is supposed to achieve? (Filtering out the residue of the TMS pulse)
- how can time ranges be plot on the scalp topography? like in the comparative paper

# Cleaning - rsEEG

Cleaning regular EEG data has similar steps to spTEP data, as it uses the same capturing techniques, but doesn't have to deal with TMS related artifacts and doesn't have the event data from TMS pulses. The most important filtering method used here is ICA filtering using the `mne_icalabel` library, paired with bandpass filtering, notch filtering and rereferencing.


In [46]:
def calculate_avg_std(eeg_data_raw):
    # Get the data
    data = eeg_data_raw.get_data()

    # Calculate the standard deviation
    std = np.std(data, axis=1)

    # Calculate the average standard deviation
    avg_std = np.mean(std)

    return avg_std

In [41]:
def plot_raw_time_domain(eeg_data_raw, start=0, duration=1, channels=None):
    # Get the data and times
    eeg_copy = eeg_data_raw.copy()
    if channels is not None:
        eeg_copy.pick(channels)
    data, times = eeg_copy[:, int(start*eeg_copy.info['sfreq']):int((start+duration)*eeg_copy.info['sfreq'])]

    # Create a new figure
    plt.figure()

    # Plot each channel
    for i in range(data.shape[0]):
        plt.plot(times, data[i, :], label=eeg_copy.info['ch_names'][i])

    # Add y label
    plt.ylabel('Voltage (V)')

    # Add legend
    plt.legend()

    # Show the plot
    plt.show()

In [5]:
rsEEG_copy = rsEEG_pre_raw.copy()

In [55]:
print("{:.6e}".format(calculate_avg_std(rsEEG_pre_raw)))
plot_raw_time_domain(rsEEG_pre_raw)

4.140313e-04


![raw rsEEG](img/rsEEG/rsEEG_raw.png)

## Remove EOG


## Downsampling


## Demean


In [13]:
def demean_brainvis(eeg_data_raw):
    eeg_data_raw.apply_function(lambda x: x - np.mean(x))

## Bandpass & notch


In [16]:
def rsEEG_filters(eeg_data, l_freq=1, h_freq=100, notch_freqs=[50]):
    eeg_data.filter(l_freq, h_freq)
    eeg_data.notch_filter(notch_freqs)

## ICA


In [19]:
def rsEEG_ICA(eeg_data):
    ica = ICA(n_components=20, random_state=97)
    ica.fit(eeg_data)
    ic_labels = label_components(eeg_data, ica, method="iclabel")
    
    labels = ic_labels["labels"]
    exclude_idx = [
        idx for idx, label in enumerate(labels) if label not in ["brain", "other"]
    ]
    
    print(f"Excluding ICA components: {exclude_idx}" 
    
    ica.apply(eeg_data, exclude=exclude_idx)

## Rereference


In [22]:
def rsEEG_rereference(eeg_data):
    eeg_data.set_eeg_reference(ref_channels="average")

## Final

DISCOVER-EEG:
1. Line noise removal
2. Bad channel rejection
3. Rereference
4. ICA
5. Bad channel interpolation
6. Bad time segments removal

Own pipeline without TMS filtering:
1. Remove EOG
2. Downsample
3. Demeaning
4. Bandpass & notch
5. ICA
6. Rereference
7. Baseline correction

In [None]:
def rsEEG_cleaning(eeg_data):
    remove_EOG(eeg_data)
    downsample(eeg_data)
    demean_brainvis(eeg_data)
    rsEEG_filters(eeg_data)
    rsEEG_ICA(eeg_data)
    rereference(eeg_data)

![cleaned rsEEG](img/rsEEG/rsEEG_cleaned.png)

In [57]:
rsEEG_copy.save(os.path.join(".", "filtered", "rsEEG_cleaned.fif"))

Writing /home/tomasgalle/UGent/thesis/tms-research/filtered/rsEEG_cleaned.fif


  rsEEG_copy.save(os.path.join(".", "filtered", "rsEEG_cleaned.fif"))


Closing /home/tomasgalle/UGent/thesis/tms-research/filtered/rsEEG_cleaned.fif
[done]
