# EEG - Flow

## 3. ICAs

Last edit: 14.04.2023 00:28


In [1]:
# from itertools import chain

from mne import read_annotations, pick_types
from mne.io import read_raw_fif, write_info, read_info
from mne.preprocessing import compute_bridged_electrodes, interpolate_bridged_electrodes, ICA
from mne.viz import set_browser_backend
from mne_icalabel import label_components
from pyprep import NoisyChannels

from eeg_flow.config import load_config
from eeg_flow.utils.annotations import merge_bad_annotations
from eeg_flow.utils.bids import get_fname, get_folder
from eeg_flow.utils.concurrency import lock_files
from eeg_flow.viz import plot_bridged_electrodes

import pandas as pd

%matplotlib qt
set_browser_backend('qt')

_, derivatives_folder, experimenter = load_config()

2023-04-14 00:20:49,389 - numexpr.utils - INFO - Note: NumExpr detected 16 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
2023-04-14 00:20:49,393 - numexpr.utils - INFO - NumExpr defaulting to 8 threads.


Using qt as 2D backend.


The parameters of the file to process are defined below. Locks are created to prevent someone else from running the same task and from writing the same derivatives.

In [2]:
participant = 13  # int
group       = 3  # int
task        = "UT"  # str
run         = 1  # int

derivatives_folder_preprocessed_p = get_folder(derivatives_folder / "preprocessed", participant, group)
derivatives_folder_plots_p = get_folder(derivatives_folder / "plots", participant, group)
fname_stem = get_fname(participant, group, task, run)

#condition: do we have 2 sets of annotations?

# create locks
derivatives = (
    derivatives_folder_preprocessed_p / fname_stem / (fname_stem + "_step4_first-ica.fif"),
    derivatives_folder_preprocessed_p / fname_stem / (fname_stem + "_step4_second-ica.fif"),
    derivatives_folder_preprocessed_p / fname_stem / (fname_stem + "_step4_icalabel.xlsx"),
    
)
locks = lock_files(*derivatives)

# load previous steps
## load raw recording
raw = read_raw_fif(derivatives_folder_preprocessed_p / fname_stem / (fname_stem + "_step1_raw.fif"), preload=True)
## load following annots
info = read_info(derivatives_folder_preprocessed_p / fname_stem / (fname_stem + "_step2_info.fif"))
annot = read_annotations(derivatives_folder_preprocessed_p / fname_stem / (fname_stem + "_step2_oddball_with_bads_annot.fif"))

# merge info and annots into current raw
raw.info["bads"] = info["bads"]
raw.annotations.__add__(annot)


Opening raw data file L:\EEG_Flow_data\derivatives\preprocessed\sub-P13-G3\sub-P13-G3_task-UT_run-1\sub-P13-G3_task-UT_run-1_step1_raw.fif...
    Reading extended channel information
    Range : 36839 ... 1071237 =     35.976 ...  1046.130 secs
Ready.
Reading 0 ... 1034398  =      0.000 ...  1010.154 secs...
    Reading extended channel information


<Annotations | 1936 segments: novel (192), standard (1546), target (198)>

## 2.1 ICA1 For mastoids


In [None]:
raw_ica_fit1 = raw.copy()
# Filter to final BP (1, 40) Hz  ### or load the raw that was already fit at a previous stage
raw_ica_fit1.filter(
    l_freq=1.0,
    h_freq=40.0,
    picks="eeg",
    method="fir",
    phase="zero-double",
    fir_window="hamming",
    fir_design="firwin",
    pad="edge",
)

In [None]:
%%time 
#%%% Fit an ICA
ica1 = ICA(
    n_components=10, 
    method="picard",
    max_iter="auto",
    fit_params=dict(ortho=False, extended=True),
    random_state = 888,
)
picks = pick_types(raw_ica_fit1.info, eeg=True, exclude="bads")
ica1.fit(raw_ica_fit1, picks=picks)
# notify

## 2.2 ICA2 For mastoids


In [None]:
#%% Clean the other channels
# The first step is to prepare the raw object for an ICA, and for suggestions
# from ICLabel. The steps are very similar to the previous ones.
raw.drop_channels(["M1", "M2"])

In [None]:
# filter
raw_ica_fit2 = raw.copy()
raw_ica_fit2.filter(
    l_freq=1.0,
    h_freq=100.0,  # Note the higher frequency
    picks=["eeg"],
    method="fir",
    phase="zero-double",
    fir_window="hamming",
    fir_design="firwin",
    pad="edge",
)

In [None]:
# change the reference to a common average reference (CAR)
raw_ica_fit2.set_montage(None)
raw_ica_fit2.add_reference_channels(ref_channels="CPz")
raw_ica_fit2.set_montage("standard_1020")
raw_ica_fit2.set_eeg_reference("average", projection=False)
# Note that the CAR is excluding the bad channels.


In [None]:
%%time 

# fit an ICA
ica2 = ICA(
    n_components=10,  # can be set to None
    method="picard",
    max_iter="auto",
    fit_params=dict(ortho=False, extended=True),
    random_state = 888,
)
picks = pick_types(raw_ica_fit2.info, eeg=True, exclude="bads")
ica2.fit(raw_ica_fit2, picks=picks)
#notify

## 2.3 ICA2 icalabel


In [None]:
%%time
#%% Label components
# Let's start by getting suggestion from the ICLabel model
component_dict = label_components(raw_ica_fit2, ica2, method="iclabel")
print(component_dict)

In [None]:
type(component_dict)

In [None]:
data_icalabel = {'y_pred': component_dict['y_pred_proba'], 
                 'labels': component_dict["labels"]}
df_icalabel = pd.DataFrame.from_dict(data_icalabel)

In [None]:
#or to_csv
fname_icalabel = derivatives_folder_preprocessed_p / fname_stem / (fname_stem + "_step4_icalabel.xlsx")

df_icalabel.to_excel(fname_icalabel)

In [None]:
# save_path = os.path.join(output_prep_path, "ICALabel.txt")
# file_ica = open(save_path,"w")
# file_ica.write("i \t y_pred \t labels\n")

# for i in range(len(component_dict["y_pred_proba"])):
#     file_ica.write(str(i) + "\t" + str(component_dict["y_pred_proba"][i]) + "\t" + str(component_dict["labels"][i]) + "\n")
    
# file_ica.close() #to change file access modes

In [None]:
# let's remove eye-blink and heart beat
labels = component_dict["labels"]
exclude = [
    k for k, name in enumerate(labels) if name in ("eye blink", "heart beat")
]

In [None]:
# let's remove other non-brain components that occur often
_, _, _, data = _prepare_data_ica_properties(
    raw_ica_fit,
    ica,
    reject_by_annotation=True,
    reject="auto",
)

ica_data = np.swapaxes(data, 0, 1)
var = np.var(ica_data, axis=2)  # (n_components, n_epochs)
var = np.var(var.T / np.linalg.norm(var, axis=1), axis=0)
# linear fit to determine the variance thresholds
z = np.polyfit(range(0, ica.n_components_, 1), var, 1)
threshold = [z[0] * x + z[1] for x in range(0, ica.n_components_, 1)]
# add non-brain ICs below-threshold to exclude
for k, label in enumerate(labels):
    if label in ("brain", "eye blink", "heart beat"):
        continue
    if threshold[k] <= var[k]:
        continue
    exclude.append(k)
ica.exclude = exclude

## 2.4 Save derivatives

The updated annotations can now be saved alongside the selected bad channels.

In [None]:
fname_ica1 = derivatives_folder_preprocessed_p / fname_stem / (fname_stem + "_step4_ica1.fif")
fname_ica2 = derivatives_folder_preprocessed_p / fname_stem / (fname_stem + "_step4_ica2.fif")

ica1.save(fname_ica1, overwrite = False)
ica2.save(fname_ica2, overwrite = False)

Regardless of the success of the task, the locks must be released.
If this step is forgotten, someone might have to remove the corresponding `.lock` file manually.

In [None]:
for lock in locks:
    lock.release()
del locks  # delete would release anyway