Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MATLAB-style implementations of the quantile and IQR functions #57

Merged
merged 5 commits into from Apr 10, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/whats_new.rst
Expand Up @@ -42,6 +42,7 @@ Bug
~~~
- Fixed RANSAC to give consistent results with a fixed seed across different chunk sizes, by `Austin Hurst`_ and `Yorguin Mantilla`_ (:gh:`43`)
- Fixed "bad channel by flat" threshold in :meth:`NoisyChannels.find_bad_by_nan_flat` to be consistent with MATLAB PREP, by `Austin Hurst`_ (:gh:`60`)
- Changed "bad channel by deviation" and "bad channel by correlation" detection code in :class:`NoisyChannels` to produce identical results to MATLAB PREP, by `Austin Hurst`_ (:gh:`57`)
sappelhoff marked this conversation as resolved.
Show resolved Hide resolved

API
~~~
Expand Down
11 changes: 5 additions & 6 deletions pyprep/find_noisy_channels.py
Expand Up @@ -3,12 +3,11 @@
import numpy as np
from mne.utils import check_random_state
from scipy import signal
from scipy.stats import iqr
from statsmodels import robust

from pyprep.ransac import find_bad_by_ransac
from pyprep.removeTrend import removeTrend
from pyprep.utils import filter_design
from pyprep.utils import filter_design, _mat_quantile, _mat_iqr


class NoisyChannels:
Expand Down Expand Up @@ -195,8 +194,8 @@ def find_bad_by_deviation(self, deviation_threshold=5.0):
deviation_channel_mask = [False] * (self.new_dimensions[0])
channel_deviation = np.zeros(self.new_dimensions[0])
for i in range(0, self.new_dimensions[0]):
channel_deviation[i] = 0.7413 * iqr(self.EEGData[i, :])
channel_deviationSD = 0.7413 * iqr(channel_deviation)
channel_deviation[i] = 0.7413 * _mat_iqr(self.EEGData[i, :])
channel_deviationSD = 0.7413 * _mat_iqr(channel_deviation)
channel_deviationMedian = np.nanmedian(channel_deviation)
robust_channel_deviation = np.divide(
np.subtract(channel_deviation, channel_deviationMedian), channel_deviationSD
Expand Down Expand Up @@ -325,12 +324,12 @@ def find_bad_by_correlation(
abs_corr = np.abs(
np.subtract(window_correlation, np.diag(np.diag(window_correlation)))
)
channel_correlation[k, :] = np.quantile(abs_corr, 0.98, axis=0)
channel_correlation[k, :] = _mat_quantile(abs_corr, 0.98, axis=0)
noiselevels[k, :] = np.divide(
robust.mad(np.subtract(data_portion, eeg_portion), c=1),
robust.mad(eeg_portion, c=1),
)
channel_deviations[k, :] = 0.7413 * iqr(data_portion, axis=0)
channel_deviations[k, :] = 0.7413 * _mat_iqr(data_portion, axis=0)
for i in range(0, w_correlation):
for j in range(0, self.new_dimensions[0]):
drop[i, j] = np.int(
Expand Down
66 changes: 66 additions & 0 deletions pyprep/utils.py
Expand Up @@ -5,6 +5,7 @@
import mne
import numpy as np
import scipy.interpolate
from scipy.stats import iqr
from psutil import virtual_memory


Expand All @@ -20,6 +21,71 @@ def _intersect(list1, list2):
return list(set(list1).intersection(set(list2)))


def _mat_quantile(arr, q, axis=None):
"""Calculate the numeric value at quantile (q) for a given distribution.
sappelhoff marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
arr : np.ndarray
Input array containing samples from the distribution to summarize.
q : float
The quantile to calculate for the input data. Must be between 0 and 1,
inclusive.
axis : {int, tuple of int, None}, optional
Axis along which quantile values should be calculated. Defaults to
calculating the value at the given quantile for the entire array.

Returns
-------
quantile : scalar or np.ndarray
If no axis is specified, returns the value at quantile (q) for the full
input array as a single numeric value. Otherwise, returns an
``np.ndarray`` containing the values at quantile (q) for each row along
the specified axis.

Notes
-----
MATLAB calculates quantiles using different logic than Numpy: Numpy treats
the provided values as a whole population, whereas MATLAB treats them as a
sample from a population of unknown size and adjusts quantiles accordingly.
This function mimics MATLAB's logic to produce identical results.

"""
q = np.asarray(q, dtype=np.float64)
n = len(arr)
q_adj = ((q - 0.5) * n / (n - 1)) + 0.5
return np.quantile(arr, np.clip(q_adj, 0, 1), axis=axis)


def _mat_iqr(arr, axis=None):
"""Calculate the inter-quartile range (IQR) for a given distribution.

Parameters
----------
arr : np.ndarray
Input array containing samples from the distribution to summarize.
axis : {int, tuple of int, None}, optional
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where did you get the idea to document the different options in curly brackets?

I usually do: axis : int | tuple of int | None, and then write what it defaults to in the description string below (not writing "optional" explicitly)

which doesn't mean that that's better (hence my question).

Copy link
Collaborator Author

@a-hurst a-hurst Apr 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I just copy/pasted that bit directly from the docstring of numpy.quantile since that arguments getting passed unmodified to that function anyway. No rationale whatsoever on my part, just figured I'd go with whatever the Numpy maintainers thought was the correct format (I'm new to Numpy docstrings, I've previously only used Google-style ones for Python).

If you'd like I can change it to your preferred format for the sake of consistency across PyPREP!

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah thanks, I read the numpydoc styleguide again, and it seems like the curly brackets is advertised. I think I picked up the | style from MNE-Python. Let's stick with the curly braces here and in general slowly convert to that style whenever we touch (git diff) a line that still uses the | style in the remaining codebase.

Axis along which IQRs should be calculated. Defaults to calculating the
IQR for the entire array.

Returns
-------
iqr : scalar or np.ndarray
If no axis is specified, returns the IQR for the full input array as a
single numeric value. Otherwise, returns an ``np.ndarray`` containing
the IQRs for each row along the specified axis.

Notes
-----
See notes for :func:`utils._mat_quantile`.

"""
iqr_q = np.asarray([25, 75], dtype=np.float64)
n = len(arr)
iqr_adj = ((iqr_q - 50) * n / (n - 1)) + 50
return iqr(arr, rng=np.clip(iqr_adj, 0, 100), axis=axis)


def filter_design(N_order, amp, freq):
"""Create FIR low-pass filter for EEG data using frequency sampling method.

Expand Down
37 changes: 37 additions & 0 deletions tests/test_utils.py
@@ -0,0 +1,37 @@
"""Test various helper functions."""
import numpy as np

from pyprep.utils import _mat_quantile, _mat_iqr


def test_mat_quantile_iqr():
"""Test MATLAB-compatible quantile and IQR functions.

MATLAB code used to generate the comparison results:

.. code-block:: matlab

% Generate test data
rng(435656);
tst = rand(100, 3);

% Calculate IQR and 0.98 quantile for test data
quantile(tst, 0.98);
iqr(tst);

"""
# Generate test data
np.random.seed(435656)
sappelhoff marked this conversation as resolved.
Show resolved Hide resolved
tst = np.transpose(np.random.rand(3, 100))

# Create arrays containing MATLAB results
quantile_expected = np.asarray([0.9710, 0.9876, 0.9802])
iqr_expected = np.asarray([0.4776, 0.5144, 0.4851])

# Test quantile equivalence with MATLAB
quantile_actual = _mat_quantile(tst, 0.98, axis=0)
assert all(np.isclose(quantile_expected, quantile_actual, atol=0.001))

# Test IQR equivalence with MATLAB
iqr_actual = _mat_iqr(tst, axis=0)
assert all(np.isclose(iqr_expected, iqr_actual, atol=0.001))