[ENH] Add a bad channel detection method using LOF algorithm (#66)

Co-authored-by: Nicolas Barascud <10333715+nbara@users.noreply.github.com>
nbara · May 9, 2023 · 30de2e7 · 30de2e7
1 parent 5e95ee5
commit 30de2e7
Show file tree

Hide file tree

Showing 9 changed files with 171 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -3,7 +3,7 @@
 [![codecov](https://codecov.io/gh/nbara/python-meegkit/branch/master/graph/badge.svg)](https://codecov.io/gh/nbara/python-meegkit)
 [![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/nbara/python-meegkit/master)
 [![DOI](https://zenodo.org/badge/117451752.svg)](https://zenodo.org/badge/latestdoi/117451752)
-[![twitter](https://img.shields.io/twitter/follow/lebababa?label=Twitter&style=flat&logo=Twitter)](https://twitter.com/intent/follow?screen_name=lebababa)
+[![twitter](https://img.shields.io/twitter/follow/lebababa?style=flat&logo=Twitter)](https://twitter.com/intent/follow?screen_name=lebababa)
 
 # MEEGkit
 
@@ -54,7 +54,7 @@ Other available options are `[docs]` (which installs dependencies required to bu
 
 ## References
 
-### 1. CCA, STAR, SNS, DSS, ZapLine, and robust detrending
+### 1. CCA, STAR, SNS, DSS, ZapLine, and Robust Detrending
 
 This is mostly a translation of Matlab code from the [NoiseTools toolbox](http://audition.ens.fr/adc/NoiseTools/) by Alain de Cheveigné. It builds on an initial python implementation by [Pedro Alcocer](https://github.com/pealco).
 
@@ -83,10 +83,9 @@ If you use this code, you should cite the relevant methods from the original art
     Journal of Neuroscience Methods, 168(1), 195–202. https://doi.org/10.1016/j.jneumeth.2007.09.012
 [10] de Cheveigné, A., & Simon, J. Z. (2007). Denoising based on time-shift PCA.
      Journal of Neuroscience Methods, 165(2), 297–305. https://doi.org/10.1016/j.jneumeth.2007.06.003
-
 ```
 
-### 2. Artifact subspace reconstruction (ASR)
+### 2. Artifact Subspace Reconstruction (ASR)
 
 The base code is inspired from the original [EEGLAB inplementation](https://github.com/sccn/clean_rawdata) [1], while the riemannian variant [2] was adapted from the [rASR toolbox](https://github.com/s4rify/rASRMatlab) by Sarah Blum.
 
@@ -101,7 +100,7 @@ If you use this code, you should cite the relevant methods from the original art
     13, 141.
 ```
 
-### 3. Rhythmic entrainment source separation (RESS)
+### 3. Rhythmic Entrainment Source Separation (RESS)
 
 The code is based on [Matlab code from Mike X. Cohen](https://mikexcohen.com/data/) [1]
 
@@ -130,3 +129,15 @@ If you use this, you should cite the following articles:
     "High-speed spelling with a noninvasive brain-computer interface",
     Proc. Int. Natl. Acad. Sci. U. S. A, 112(44): E6058-6067, 2015.
 ```
+
+### 5. Local Outlier Factor (LOF)
+
+If you use this, you should cite the following article:
+
+```sql
+[1] Breunig M, Kriegel HP, Ng RT, Sander J. 2000. LOF: identifying density-based 
+    local outliers. SIGMOD Rec. 29, 2, 93-104. https://doi.org/10.1145/335191.335388
+[2] Kumaravel VP, Buiatti M, Parise E, Farella E. 2022. Adaptable and Robust 
+    EEG Bad Channel Detection Using Local Outlier Factor (LOF). Sensors (Basel). 
+    2022 Sep 27;22(19):7314. https://doi.org/10.3390/s22197314.
+```
diff --git a/doc/index.rst b/doc/index.rst
@@ -37,6 +37,7 @@ Here is a list of the methods and techniques available in ``meegkit``:
    ~meegkit.cca
    ~meegkit.dss
    ~meegkit.detrend
+   ~meegkit.lof
    ~meegkit.ress
    ~meegkit.sns
    ~meegkit.star

diff --git a/doc/modules/meegkit.lof.rst b/doc/modules/meegkit.lof.rst
@@ -0,0 +1,23 @@
+meegkit.lof
+===========
+
+.. automodule:: meegkit.lof
+
+   .. rubric:: Functions
+
+   .. autosummary::
+
+      LOF
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/meegkit/__init__.py b/meegkit/__init__.py
@@ -1,7 +1,7 @@
 """M/EEG denoising utilities in python."""
 __version__ = '0.1.3'
 
-from . import asr, cca, detrend, dss, sns, star, ress, trca, tspca, utils
+from . import asr, cca, detrend, dss, lof, sns, star, ress, trca, tspca, utils
 
-__all__ = ['asr', 'cca', 'detrend', 'dss', 'ress', 'sns', 'star', 'trca',
+__all__ = ['asr', 'cca', 'detrend', 'dss', 'lof', 'ress', 'sns', 'star', 'trca',
            'tspca', 'utils']
diff --git a/meegkit/lof.py b/meegkit/lof.py
@@ -0,0 +1,85 @@
+"""Local Outlier Factor (LOF)."""
+# Authors: Velu Prabhakar Kumaravel <vkumaravel@fbk.eu>
+# License: BSD-3-Clause
+
+import logging
+from sklearn.neighbors import LocalOutlierFactor
+
+
+class LOF():
+    """Local Outlier Factor.
+
+    Local Outlier Factor (LOF) is an automatic, density-based outlier detection
+    algorithm based on [1]_ and [2]_.
+
+    Parameters
+    ----------
+    n_neighbours : int
+        Number of neighbours defining the local neighbourhood.
+    metric: str in {'euclidean', 'nan_euclidean', 'cosine',
+                    'cityblock', 'manhattan'}
+        Metric to use for distance computation. Default is “euclidean”
+    threshold : float
+        Threshold to define outliers. Theoretical threshold ranges anywhere
+        between 1.0 and any integer. Default: 1.5
+
+    Notes
+    -----
+    It is recommended to perform a CV (e.g., 10-fold) on training set to
+    calibrate this parameter for the given M/EEG dataset.
+
+    See [2]_ for details.
+
+    References
+    ----------
+    .. [1] Breunig M, Kriegel HP, Ng RT, Sander J.
+        2000. LOF: identifying density-based local outliers.
+        SIGMOD Rec. 29, 2, 93-104. https://doi.org/10.1145/335191.335388
+    .. [2] Kumaravel VP, Buiatti M, Parise E, Farella E.
+        2022. Adaptable and Robust EEG Bad Channel Detection Using
+        Local Outlier Factor (LOF). Sensors (Basel). 2022 Sep 27;22(19):7314.
+        doi: 10.3390/s22197314. PMID: 36236413; PMCID: PMC9571252.
+
+    """
+
+    def __init__(self, n_neighbors=20,  metric='euclidean',
+                 threshold=1.5, **kwargs):
+
+        self.n_neighbors = n_neighbors
+        self.metric = metric
+        self.threshold = threshold
+
+    def predict(self, X):
+        """Detect bad channels using Local Outlier Factor algorithm.
+
+        Parameters
+        ----------
+        X : array, shape=(n_channels, n_samples)
+            The data X should have been high-pass filtered.
+
+        Returns
+        -------
+        bad_channel_indices : Detected bad channel indices.
+
+        """
+        if X.ndim == 3:  # in case the input data is epoched
+            logging.warning('Expected input data with shape '
+                            '(n_channels, n_samples)')
+            return []
+
+        if self.n_neighbors >= X.shape[0]:
+            logging.warning('Number of neighbours cannot be greater than the '
+                            'number of channels')
+            return []
+
+        if self.threshold < 1.0:
+            logging.warning('Invalid threshold. Try a positive integer >= 1.0')
+            return []
+
+        clf = LocalOutlierFactor(self.n_neighbors)
+        logging.debug('[LOF] Predicting bad channels')
+        clf.fit_predict(X)
+        lof_scores = clf.negative_outlier_factor_
+        bad_channel_indices = -lof_scores >= self.threshold
+
+        return bad_channel_indices
diff --git a/tests/data/lofdata.mat b/tests/data/lofdata.mat
diff --git a/tests/test_asr.py b/tests/test_asr.py
@@ -9,8 +9,6 @@
 from meegkit.utils.matrix import sliding_window
 from scipy import signal
 
-np.random.seed(9)
-
 # Data files
 THIS_FOLDER = os.path.dirname(os.path.abspath(__file__))
 # file = os.path.join(THIS_FOLDER, 'data', 'eeg_raw.fif')
@@ -175,6 +173,7 @@ def test_asr_functions(show=False, method='riemann'):
 @pytest.mark.parametrize(argnames='reref', argvalues=(False, True))
 def test_asr_class(method, reref, show=False):
     """Test ASR class (simulate online use)."""
+    np.random.default_rng(9)
     raw = np.load(os.path.join(THIS_FOLDER, 'data', 'eeg_raw.npy'))
     sfreq = 250
     # Train on a clean portion of data

diff --git a/tests/test_lof.py b/tests/test_lof.py
@@ -0,0 +1,37 @@
+"""LOF test."""
+import os
+
+import numpy as np
+import pytest
+import scipy.io as sio
+
+from meegkit.lof import LOF
+
+np.random.seed(9)
+
+# Data files
+THIS_FOLDER = os.path.dirname(os.path.abspath(__file__)) # data folder of MEEGKIT
+
+
+@pytest.mark.parametrize(argnames='n_neighbors', argvalues=(8, 20, 40, 2048))
+def test_lof(n_neighbors, show=False):
+    mat = sio.loadmat(os.path.join(THIS_FOLDER, 'data', 'lofdata.mat'))
+    X = mat['X']
+    lof = LOF(n_neighbors)
+    bad_channel_indices = lof.predict(X)
+    print(bad_channel_indices)
+
+@pytest.mark.parametrize(argnames='metric',
+                         argvalues=('euclidean', 'nan_euclidean',
+                                    'cosine', 'cityblock', 'manhattan'))
+def test_lof2(metric, show=False):
+    mat = sio.loadmat(os.path.join(THIS_FOLDER, 'data', 'lofdata.mat'))
+    X = mat['X']
+    lof = LOF(20, metric)
+    bad_channel_indices = lof.predict(X)
+    print(bad_channel_indices)
+
+if __name__ == "__main__":
+    pytest.main([__file__])
+    #test_lof(20, True)
+    #test_lof(metric='euclidean')
diff --git a/tests/test_tspca.py b/tests/test_tspca.py
@@ -51,7 +51,8 @@ def test_tspca_sns_dss():  # TODO
     y_tspca_sns_dss = fold(
         np.dot(unfold(y_tspca_sns), todss), y_tspca_sns.shape[0])
 
-    return y_tspca, y_tspca_sns, y_tspca_sns_dss
+    # TODO do something with it
+    assert y_tspca_sns_dss.shape == noisy_data.shape
 
 
 def test_tsr(show=True):
@@ -107,6 +108,7 @@ def test_tsr(show=True):
         plt.show()
 
 if __name__ == '__main__':
-    # import pytest
-    # pytest.main([__file__])
-    test_tsr()
+    import pytest
+    pytest.main([__file__])
+    # test_tspca_sns_dss()
+    # test_tsr()