sappelhoff · sappelhoff · May 7, 2021 · May 6, 2021 · May 6, 2021 · May 6, 2021
diff --git a/docs/whats_new.rst b/docs/whats_new.rst
@@ -53,6 +53,7 @@ Bug
 - Changed "bad channel by deviation" and "bad channel by correlation" detection code in :class:`NoisyChannels` to compute IQR and quantiles in the same manner as MATLAB, thus producing identical results to MATLAB PREP, by `Austin Hurst`_ (:gh:`57`)
 - Fixed a bug where EEG data was getting reshaped into RANSAC windows incorrectly (channel samples were not sequential), which was causing considerable variability and noise in RANSAC results, by `Austin Hurst`_ (:gh:`67`)
 - Fixed RANSAC to avoid making unnecessary signal predictions for known-bad channels, matching MATLAB behaviour and reducing RAM requirements, by `Austin Hurst`_ (:gh:`72`)
+- Fixed a bug in :meth:`NoisyChannels.find_bad_by_correlation` that prevented it from being able to handle channels with dropouts (intermittent flat regions), by `Austin Hurst`_ (:gh:`81`).
 
 API
 ~~~

diff --git a/pyprep/find_noisy_channels.py b/pyprep/find_noisy_channels.py
@@ -366,23 +366,25 @@ def find_bad_by_correlation(
         for k in range(0, w_correlation):
             eeg_portion = np.transpose(np.squeeze(EEG_new_win[:, :, k]))
             data_portion = np.transpose(np.squeeze(data_win[:, :, k]))
-            window_correlation = np.corrcoef(np.transpose(eeg_portion))
+            with np.errstate(invalid='ignore'):  # suppress divide-by-zero warnings
+                window_correlation = np.corrcoef(np.transpose(eeg_portion))
             abs_corr = np.abs(
                 np.subtract(window_correlation, np.diag(np.diag(window_correlation)))
             )
             channel_correlation[k, :] = _mat_quantile(abs_corr, 0.98, axis=0)
-            noiselevels[k, :] = np.divide(
-                robust.mad(np.subtract(data_portion, eeg_portion), c=1),
-                robust.mad(eeg_portion, c=1),
-            )
+            with np.errstate(invalid='ignore'):  # suppress divide-by-zero warnings
+                noiselevels[k, :] = np.divide(
+                    robust.mad(np.subtract(data_portion, eeg_portion), c=1),
+                    robust.mad(eeg_portion, c=1),
+                )
             channel_deviations[k, :] = 0.7413 * _mat_iqr(data_portion, axis=0)
         for i in range(0, w_correlation):
             for j in range(0, self.new_dimensions[0]):
                 drop[i, j] = np.int(
                     np.isnan(channel_correlation[i, j]) or np.isnan(noiselevels[i, j])
                 )
                 if drop[i, j] == 1:
-                    channel_deviations[i, j] = 0
+                    channel_correlation[i, j] = 0
                     noiselevels[i, j] = 0
         maximum_correlations[self.channels_interpolate, :] = np.transpose(
             channel_correlation

diff --git a/pyprep/utils.py b/pyprep/utils.py
@@ -5,7 +5,6 @@
 import mne
 import numpy as np
 import scipy.interpolate
-from scipy.stats import iqr
 from scipy.signal import firwin, lfilter, lfilter_zi
 from psutil import virtual_memory
 
@@ -74,10 +73,27 @@ def _mat_quantile(arr, q, axis=None):
     This function mimics MATLAB's logic to produce identical results.
 
     """
+    def _mat_quantile_1d(arr, q):
+        arr = arr[~np.isnan(arr)]
+        n = len(arr)
+        if n == 0:
+            return np.NaN
+        elif n == 1:
+            return arr[0]
+        else:
+            q_adj = ((q - 0.5) * n / (n - 1)) + 0.5
+            return np.quantile(arr, np.clip(q_adj, 0, 1))
+
+    # Make sure inputs are both Numpy arrays
+    arr = np.asarray(arr)
     q = np.asarray(q, dtype=np.float64)
-    n = len(arr)
-    q_adj = ((q - 0.5) * n / (n - 1)) + 0.5
-    return np.quantile(arr, np.clip(q_adj, 0, 1), axis=axis)
+
+    if axis is not None and len(arr.shape) > 1:
+        # If an axis is specified, calculate quantiles along it
+        return np.apply_along_axis(_mat_quantile_1d, axis, arr, q)
+    else:
+        # Otherwise, calculate the quantile for the full sample
+        return _mat_quantile_1d(arr, q)
 
 
 def _mat_iqr(arr, axis=None):
@@ -103,10 +119,7 @@ def _mat_iqr(arr, axis=None):
     See notes for :func:`utils._mat_quantile`.
 
     """
-    iqr_q = np.asarray([25, 75], dtype=np.float64)
-    n = len(arr)
-    iqr_adj = ((iqr_q - 50) * n / (n - 1)) + 50
-    return iqr(arr, rng=np.clip(iqr_adj, 0, 100), axis=axis)
+    return _mat_quantile(arr, 0.75, axis) - _mat_quantile(arr, 0.25, axis)
 
 
 def _eeglab_create_highpass(cutoff, srate):

diff --git a/tests/test_find_noisy_channels.py b/tests/test_find_noisy_channels.py
@@ -85,6 +85,23 @@ def test_findnoisychannels(raw, montage):
     nd = NoisyChannels(raw_tmp, random_state=rng)
     nd.find_bad_by_correlation()
     assert rand_chn_lab in nd.bad_by_correlation
+    bad_by_correlation_orig = nd.bad_by_correlation  # save for dropout tests
+
+    # Test for channels with signal dropouts (reuse data from correlation tests)
+    dropout_idx = rand_chn_idx - 1 if rand_chn_idx > 0 else 1
+    # Make 2nd and 4th quarters of the dropout channel completely flat
+    raw_tmp._data[dropout_idx, :int(n/4)] = 0
+    raw_tmp._data[dropout_idx, int(3*n/4):] = 0
+    # Run correlation and dropout detection on data
+    nd = NoisyChannels(raw_tmp, random_state=rng)
+    nd.find_bad_by_correlation()  # also does dropout detection
+    # Test if dropout channel detected correctly
+    assert raw_tmp.ch_names[dropout_idx] in nd.bad_by_dropout
+    # Test if correlations still detected correctly
+    bad_orig_plus_dropout = bad_by_correlation_orig + nd.bad_by_dropout
+    same_bads = set(nd.bad_by_correlation) == set(bad_by_correlation_orig)
+    same_plus_dropout = set(nd.bad_by_correlation) == set(bad_orig_plus_dropout)
+    assert same_bads or same_plus_dropout
 
     # Test for high freq noise detection
     raw_tmp = raw.copy()

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -33,6 +33,11 @@ def test_mat_quantile_iqr():
        quantile(tst, 0.98);
        iqr(tst);
 
+       % Add NaNs to input and re-test
+       tst(1, :) = nan;
+       quantile(tst, 0.98);
+       iqr(tst);
+
     """
     # Generate test data
     np.random.seed(435656)
@@ -50,6 +55,22 @@ def test_mat_quantile_iqr():
     iqr_actual = _mat_iqr(tst, axis=0)
     assert all(np.isclose(iqr_expected, iqr_actual, atol=0.001))
 
+    # Add NaNs to test data
+    tst_nan = tst.copy()
+    tst_nan[0, :] = np.NaN
+
+    # Create arrays containing MATLAB results for NaN test case
+    quantile_expected = np.asarray([0.9712, 0.9880, 0.9807])
+    iqr_expected = np.asarray([0.4764, 0.5188, 0.5044])
+
+    # Test quantile equivalence with MATLAB for array with NaN
+    quantile_actual = _mat_quantile(tst_nan, 0.98, axis=0)
+    assert all(np.isclose(quantile_expected, quantile_actual, atol=0.001))
+
+    # Test IQR equivalence with MATLAB for array with NaN
+    iqr_actual = _mat_iqr(tst_nan, axis=0)
+    assert all(np.isclose(iqr_expected, iqr_actual, atol=0.001))
+
 
 def test_get_random_subset():
     """Test the function for getting random channel subsets."""