# EEG - Flow

## 3. ICAs (1 AND 2 IN PARALLEL) + multiple runs ==> 4 ICAs at the same time

Last edit: 21.04.2023 15:45
@anguyen

In [None]:
import os
from copy import deepcopy
from joblib import Parallel, delayed

import numpy as np
import pandas as pd

from mne import pick_types, read_annotations
from mne.io import read_info, read_raw_fif
from mne.preprocessing import ICA
from mne.viz import set_browser_backend
from mne.viz.ica import _prepare_data_ica_properties
from mne_icalabel import label_components

from eeg_flow.config import load_config
from eeg_flow.utils.bids import get_fname, get_folder
from eeg_flow.utils.concurrency import lock_files

%matplotlib qt
set_browser_backend('qt')

_, DERIVATIVES_FOLDER_ROOT, _ = load_config()

The parameters of the file to process are defined below. Locks are created to prevent someone else from running the same task and from writing the same derivatives.

In [None]:
PARTICIPANT = 1        # int
GROUP       = 1         # int [1, 2, 3, 4, 5, 6, 7, 8]
TASK        = "oddball" # str [oddball, UT]
RUN         = [1, 2]         # int [1, 2]

filtered_sessions = []
ica_key = []

for run_nb in RUN:
    FNAME_STEM = get_fname(PARTICIPANT, GROUP, TASK, run_nb)
    DERIVATIVES_SUBFOLDER = get_folder(
        DERIVATIVES_FOLDER_ROOT, PARTICIPANT, GROUP, TASK, run_nb
    )

    # create locks
    derivatives = (
        DERIVATIVES_SUBFOLDER / (FNAME_STEM + "_step3_1st-ica.fif"),
        DERIVATIVES_SUBFOLDER / (FNAME_STEM + "_step3_2nd-ica.fif"),
        DERIVATIVES_SUBFOLDER / (FNAME_STEM + "_step3_iclabel.xlsx"),
    )
    locks = lock_files(*derivatives)

    # load previous steps
    ## load raw recording
    raw = read_raw_fif(
        DERIVATIVES_SUBFOLDER / (FNAME_STEM + "_step1_raw.fif"),
        preload=True,
    )
    ## load following annots
    info = read_info(DERIVATIVES_SUBFOLDER / (FNAME_STEM + "_step2_info.fif"))
    annot = read_annotations(
        DERIVATIVES_SUBFOLDER / (FNAME_STEM + "_step2_oddball_with_bads_annot.fif")
    )

    # merge info and annots into current raw
    raw.info["bads"] = info["bads"]
    raw.set_annotations(annot)

    ## 2.1 Prep ICA1 for mastoids
    raw_ica_fit1 = raw.copy()
    # Filter to final BP 40 Hz lowpass
    raw_ica_fit1.filter(
        l_freq=1.0,
        h_freq=40.0,
        picks="eeg",
        method="fir",
        phase="zero-double",
        fir_window="hamming",
        fir_design="firwin",
        pad="edge",
    )

    ## 2.2 Prep ICA2 for EEG
    #%% Clean the other channels
    # The first step is to prepare the raw object for an ICA, and for suggestions
    # from ICLabel. The steps are very similar to the previous ones.
    raw.drop_channels(["M1", "M2"])

    # filter
    raw_ica_fit2 = raw.copy()
    raw_ica_fit2.filter(
        l_freq=1.0,
        h_freq=100.0,  # Note the higher frequency
        picks=["eeg"],
        method="fir",
        phase="zero-double",
        fir_window="hamming",
        fir_design="firwin",
        pad="edge",
    )

    # change the reference to a common average reference (CAR)
    raw_ica_fit2.set_montage(None)
    raw_ica_fit2.add_reference_channels(ref_channels="CPz")
    raw_ica_fit2.set_montage("standard_1020")
    raw_ica_fit2.set_eeg_reference("average", projection=False)

    filtered_sessions.append([raw_ica_fit1, raw_ica_fit2])
    ica_id.append([f"{FNAME_STEM}_{run_nb}_ICA1", f"{FNAME_STEM}_{run_nb}_ICA2"])

    del raw_ica_fit1
    del raw_ica_fit2

In [None]:
%%time
#%%% Fit an ICA in parallel :)

ica = ICA(
    n_components=None,
    method="picard",
    max_iter="auto",
    fit_params=dict(ortho=False, extended=True),
    random_state=888,
)
# filtered_sessions = [raw_ica_fit1, raw_ica_fit2] #this is currently [ICA1_run1, ICA2_run1, ICA1_run2, ICA2_run2]
session_picks = [pick_types(filtered_sessions[i].info, eeg=True, exclude="bads") for i in range(4)] #### change back to 2? or count len of filtered session

def fit_ica_on_data(ica, i):
    ica = ica.fit(filtered_sessions[i],session_picks[i])
    return ica, i

fitted_icas = Parallel(n_jobs=4)(delayed(fit_ica_on_data)(deepcopy(ica), i) for i in range(4)) #### change back to 2? or count len of filtered session

In [None]:
del raw_ica_fit1
ica1 = fitted_icas[0][0]
ica2 = fitted_icas[1][0]

#####complete here

## 2.3 ICA2 ICLabel


In [None]:
%%time
#%% Label components
# Let's start by getting suggestion from the ICLabel model
component_dict = label_components(raw_ica_fit2, ica2, method="iclabel")
print(component_dict)

In [None]:
data_icalabel = {'y_pred': component_dict['y_pred_proba'], 
                 'labels': component_dict["labels"]}
df_icalabel = pd.DataFrame.from_dict(data_icalabel)
fname_icalabel = DERIVATIVES_SUBFOLDER / (FNAME_STEM + "_step3_iclabel.xlsx")
df_icalabel.to_excel(fname_icalabel)

In [None]:
# let's remove eye-blink and heart beat
labels = component_dict["labels"]
exclude = [
    k for k, name in enumerate(labels) if name in ("eye blink", "heart beat")
]

In [None]:
# let's remove other non-brain components that occur often
_, _, _, data = _prepare_data_ica_properties(
    raw_ica_fit2,
    ica2,
    reject_by_annotation=True,
    reject="auto",
)

ica_data = np.swapaxes(data, 0, 1)
var = np.var(ica_data, axis=2)  # (n_components, n_epochs)
var = np.var(var.T / np.linalg.norm(var, axis=1), axis=0)
# linear fit to determine the variance thresholds
z = np.polyfit(range(0, ica2.n_components_, 1), var, 1)
threshold = [z[0] * x + z[1] for x in range(0, ica2.n_components_, 1)]
# add non-brain ICs below-threshold to exclude
for k, label in enumerate(labels):
    if label in ("brain", "eye blink", "heart beat"):
        continue
    if threshold[k] <= var[k]:
        continue
    exclude.append(k)
ica2.exclude = exclude

## 2.4 Save derivatives

The ICA decomposition can be saved.

In [None]:
FNAME_ICA1 = DERIVATIVES_SUBFOLDER / (FNAME_STEM + "_step3_1st-ica.fif")
FNAME_ICA2 = DERIVATIVES_SUBFOLDER / (FNAME_STEM + "_step3_2nd-ica.fif")

ica1.save(FNAME_ICA1, overwrite=False)
ica2.save(FNAME_ICA2, overwrite=False)

Regardless of the success of the task, the locks must be released.
If this step is forgotten, someone might have to remove the corresponding `.lock` file manually.

In [None]:
for lock in locks:
    lock.release()
del locks  # delete would release anyway