Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improving feature names #42

Merged
merged 6 commits into from
Sep 28, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/plot_mne_sample_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
# Prepare for the classification task:

pipe = Pipeline([('scaler', StandardScaler()),
('lr', LogisticRegression(random_state=42))])
('lr', LogisticRegression(random_state=42, solver='lbfgs'))])
y = labels

###############################################################################
Expand Down
2 changes: 1 addition & 1 deletion examples/plot_mne_sample_gridsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
selected_funcs=['app_entropy',
'mean'])),
('scaler', StandardScaler()),
('clf', LogisticRegression(random_state=42))])
('clf', LogisticRegression(random_state=42, solver='lbfgs'))])
skf = StratifiedKFold(n_splits=3, random_state=42)
y = labels

Expand Down
2 changes: 1 addition & 1 deletion examples/plot_user_defined_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def compute_medfilt(arr):
pipe = Pipeline([('fe', FeatureExtractor(sfreq=raw.info['sfreq'],
selected_funcs=selected_funcs)),
('scaler', StandardScaler()),
('clf', LogisticRegression(random_state=42))])
('clf', LogisticRegression(random_state=42, solver='lbfgs'))])
skf = StratifiedKFold(n_splits=3, random_state=42)
y = labels

Expand Down
36 changes: 25 additions & 11 deletions mne_features/feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from .bivariate import get_bivariate_funcs
from .univariate import get_univariate_funcs
from .utils import _get_python_func


class FeatureFunctionTransformer(FunctionTransformer):
Expand Down Expand Up @@ -69,10 +70,32 @@ def transform(self, X, y='deprecated'):
self.output_shape_ = X_out.shape[0]
return X_out

def fit(self, X, y=None):
"""Fit the FeatureFunctionTransformer (does not extract features).

Parameters
----------
X : ndarray, shape (n_channels, n_times)

y : ignored

Returns
-------
self
"""
self._check_input(X)
_feature_func = _get_python_func(self.func)
if hasattr(_feature_func, 'get_feature_names'):
_params = self.get_params()
self.feature_names_ = _feature_func.get_feature_names(X, **_params)
return self

def get_feature_names(self):
"""Mapping of the feature indices to feature names."""
if not hasattr(self, 'output_shape_'):
raise ValueError('Call `transform` or `fit_transform` first.')
raise ValueError('Call `fit_transform` first.')
elif hasattr(self, 'feature_names_'):
return self.feature_names_
else:
return np.arange(self.output_shape_).astype(str)

Expand All @@ -85,16 +108,7 @@ def get_params(self, deep=True):
If True, the method will get the parameters of the transformer.
(See :class:`~sklearn.preprocessing.FunctionTransformer`).
"""
_params = super(FeatureFunctionTransformer, self).get_params(deep=deep)
if hasattr(_params['func'], 'func'):
# If `_params['func'] is of type `functools.partial`
func_to_inspect = _params['func'].func
elif hasattr(_params['func'], 'py_func'):
# If `_params['func'] is a jitted Python function
func_to_inspect = _params['func'].py_func
else:
# If `_params['func'] is an actual Python function
func_to_inspect = _params['func']
func_to_inspect = _get_python_func(self.func)
# Get code object from the function
if hasattr(func_to_inspect, 'func_code'):
func_code = func_to_inspect.func_code
Expand Down
2 changes: 1 addition & 1 deletion mne_features/tests/test_feature_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def test_gridsearch_feature_extractor():
('clf', CheckingClassifier(
check_X=lambda arr: arr.shape[1:] == (X.shape[1],)))])
params_grid = {'FE__higuchi_fd__kmax': [5, 10]}
gs = GridSearchCV(estimator=pipe, param_grid=params_grid)
gs = GridSearchCV(estimator=pipe, param_grid=params_grid, cv=3)
gs.fit(X, y)
assert_equal(hasattr(gs, 'cv_results_'), True)

Expand Down
66 changes: 66 additions & 0 deletions mne_features/tests/test_univariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import numpy as np
from numpy.testing import assert_equal, assert_almost_equal, assert_raises

from mne_features.feature_extraction import extract_features
from mne_features.univariate import (_slope_lstsq, _accumulate_std,
_accumulate_min, _accumulate_max,
compute_mean, compute_variance,
Expand Down Expand Up @@ -178,6 +179,49 @@ def test_pow_freq_bands():
ratios='only'), expected_ratios)


def test_feature_names_pow_freq_bands():
_data = data[:, :2, :] # keep only 2 channels for the sake of simplicity
selected_funcs = ['pow_freq_bands']
fb1 = np.array([[4., 8.], [30., 70.]])
fb2 = {'theta': [4, 8], 'low-gamma': np.array([30, 70])}
_fb = [fb1, fb2]
ratios_col_names1 = ['ch0_band0/band1', 'ch0_band1/band0',
'ch1_band0/band1', 'ch1_band1/band0']
ratios_col_names2 = ['ch0_theta/low-gamma', 'ch0_low-gamma/theta',
'ch1_theta/low-gamma', 'ch1_low-gamma/theta']
_ratios_names = [ratios_col_names1, ratios_col_names2]
pow_col_names1 = ['ch0_band0', 'ch0_band1', 'ch1_band0', 'ch1_band1']
pow_col_names2 = ['ch0_theta', 'ch0_low-gamma',
'ch1_theta', 'ch1_low-gamma']
_pow_names = [pow_col_names1, pow_col_names2]

for fb, ratios_names, pow_names in zip(_fb, _ratios_names, _pow_names):
# With `ratios = 'only'`:
df_only = extract_features(
_data, sfreq, selected_funcs,
funcs_params={'pow_freq_bands__ratios': 'only',
'pow_freq_bands__freq_bands': fb},
return_as_df=True)
assert_equal(df_only.columns.get_level_values(1).values, ratios_names)

# With `ratios = 'all'`:
df_all = extract_features(
_data, sfreq, selected_funcs,
funcs_params={'pow_freq_bands__ratios': 'all',
'pow_freq_bands__freq_bands': fb},
return_as_df=True)
assert_equal(df_all.columns.get_level_values(1).values,
pow_names + ratios_names)

# With `ratios = None`:
df = extract_features(
_data, sfreq, selected_funcs,
funcs_params={'pow_freq_bands__ratios': None,
'pow_freq_bands__freq_bands': fb},
return_as_df=True)
assert_equal(df.columns.get_level_values(1).values, pow_names)


def test_hjorth_mobility_spect():
expected = 0.005 * (5 ** 2) + 0.00125 * (33 ** 2)
assert_almost_equal(compute_hjorth_mobility_spect(sfreq, data_sin),
Expand Down Expand Up @@ -255,6 +299,26 @@ def test_energy_freq_bands():
assert_equal(band_energy > 0.98 * tot_energy, True)


def test_feature_names_energy_freq_bands():
_data = data[:, :2, :] # keep only 2 channels for the sake of simplicity
selected_funcs = ['energy_freq_bands']
fb1 = np.array([[4., 8.], [30., 70.]])
fb2 = {'theta': [4, 8], 'low-gamma': np.array([30, 70])}
_fb = [fb1, fb2]
expected_names1 = ['ch0_band0', 'ch0_band1', 'ch1_band0', 'ch1_band1']
expected_names2 = ['ch0_theta', 'ch0_low-gamma',
'ch1_theta', 'ch1_low-gamma']
_expected_names = [expected_names1, expected_names2]

for fb, feat_names in zip(_fb, _expected_names):

df = extract_features(
_data, sfreq, selected_funcs,
funcs_params={'energy_freq_bands__freq_bands': fb},
return_as_df=True)
assert_equal(df.columns.get_level_values(1).values, feat_names)


def test_spect_slope():
"""Test for :func:`compute_powercurve_deviation`.

Expand Down Expand Up @@ -368,13 +432,15 @@ def test_shape_output_teager_kaiser_energy():
test_samp_entropy()
test_decorr_time()
test_pow_freq_bands()
test_feature_names_pow_freq_bands()
test_hjorth_mobility_spect()
test_hjorth_complexity_spect()
test_hjorth_mobility()
test_hjorth_complexity()
test_higuchi_fd()
test_katz_fd()
test_energy_freq_bands()
test_feature_names_energy_freq_bands()
test_spect_slope()
test_spect_entropy()
test_spect_edge_freq()
Expand Down
109 changes: 85 additions & 24 deletions mne_features/univariate.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,17 +566,21 @@ def compute_pow_freq_bands(sfreq, data, freq_bands=np.array([0.5, 4., 8., 13.,

data : ndarray, shape (n_channels, n_times)

freq_bands : ndarray, shape (n_freq_bands + 1,) or (n_freq_bands, 2)
Array defining the frequency bands. If ``freq_bands`` has shape
``(n_freq_bands + 1,)`` the entries of ``freq_bands`` define
``n_freq_bands`` **contiguous** frequency bands as follows: the i-th
frequency bands is defined as [freq_bands[i], freq_bands[i + 1]]
(0 <= i <= n_freq_bands - 1). If ``freq_bands`` has shape
``(n_freq_bands, 2)``, the rows of ``freq_bands`` define
``n_freq_bands`` **non-contiguous** frequency bands. By default,
``freq_bands`` is: ``np.array([0.5, 4., 8., 13., 30., 100.])``.
The entries of ``freq_bands`` should be between 0 and sfreq / 2 (the
Nyquist frequency) as the function uses the one-sided PSD.
freq_bands : ndarray or dict (default: np.array([.5, 4, 8, 13, 30, 100]))
The parameter ``freq_bands`` should be either a ndarray with shape
``(n_freq_bands + 1,)`` or ``(n_freq_bands, 2)`` or a dict. If ndarray
with shape ``(n_freq_bands + 1,)``, the entries define **contiguous**
frequency bands as follows: the i-th frequency band is defined as:
[freq_bands[i], freq_bands[i + 1]] (0 <= i <= n_freq_bands - 1). If
ndarray with shape ``(n_freq_bands, 2)``, the rows of ``freq_bands``
define **non-contiguous** frequency bands. If dict, the keys should be
strings (names of the frequency bands) and the values, the
corresponding bands (as ndarray with shape (2,) or list of length 2).
When ``freq_bands`` is of type dict, the keys are used to generate the
feature names (only used when features are extracted with
``return_as_df=True``). The values of ``freq_bands`` should be between
0 and sfreq / 2 (the Nyquist frequency) as the function uses the
one-sided PSD.

normalize : bool (default: True)
If True, the average power in each frequency band is normalized by
Expand Down Expand Up @@ -607,7 +611,11 @@ def compute_pow_freq_bands(sfreq, data, freq_bands=np.array([0.5, 4., 8., 13.,
Neuroscience Methods, 200(2), 257-271.
"""
n_channels = data.shape[0]
fb = _freq_bands_helper(sfreq, freq_bands)
if isinstance(freq_bands, dict):
_freq_bands = np.asarray([freq_bands[n] for n in freq_bands])
else:
_freq_bands = np.asarray(freq_bands)
fb = _freq_bands_helper(sfreq, _freq_bands)
n_freq_bands = fb.shape[0]
psd, freqs = power_spectrum(sfreq, data, return_db=False)
pow_freq_bands = np.empty((n_channels, n_freq_bands))
Expand All @@ -634,6 +642,33 @@ def compute_pow_freq_bands(sfreq, data, freq_bands=np.array([0.5, 4., 8., 13.,
return band_ratios.ravel()


def _compute_pow_freq_bands_feat_names(data, freq_bands, normalize, ratios):
"""Utility function to create feature names compatible with the output
of :func:`compute_pow_freq_bands`."""
n_channels = data.shape[0]
if isinstance(freq_bands, dict):
n_freq_bands = len(freq_bands)
_band_names = [str(n) for n in freq_bands]
else:
n_freq_bands = (freq_bands.shape[0] - 1 if freq_bands.ndim == 1
else freq_bands.shape[0])
_band_names = ['band' + str(j) for j in range(n_freq_bands)]
ratios_names = ['ch%s_%s/%s' % (ch, _band_names[i], _band_names[j])
for ch in range(n_channels) for _, i, j in
_idxiter(n_freq_bands, triu=False)]
pow_names = ['ch%s_%s' % (ch, _band_names[i]) for ch in
range(n_channels) for i in range(n_freq_bands)]
if ratios is None:
return pow_names
elif ratios == 'only':
return ratios_names
else:
return pow_names + ratios_names


compute_pow_freq_bands.get_feature_names = _compute_pow_freq_bands_feat_names


def compute_hjorth_mobility_spect(sfreq, data, normalize=False):
"""Hjorth mobility (per channel).

Expand Down Expand Up @@ -1135,17 +1170,21 @@ def compute_energy_freq_bands(sfreq, data, freq_bands=np.array([0.5, 4., 8.,

data : ndarray, shape (n_channels, n_times)

freq_bands : ndarray, shape (n_freq_bands + 1,) or (n_freq_bands, 2)
Array defining the frequency bands. If ``freq_bands`` has shape
``(n_freq_bands + 1,)`` the entries of ``freq_bands`` define
``n_freq_bands`` **contiguous** frequency bands as follows: the i-th
frequency bands is defined as [freq_bands[i], freq_bands[i + 1]]
(0 <= i <= n_freq_bands - 1). If ``freq_bands`` has shape
``(n_freq_bands, 2)``, the rows of ``freq_bands`` define
``n_freq_bands`` **non-contiguous** frequency bands. By default,
``freq_bands`` is: ``np.array([0.5, 4., 8., 13., 30., 100.])``.
The entries of ``freq_bands`` should be between 0 and sfreq / 2 (the
Nyquist frequency) as the function uses the one-sided PSD.
freq_bands : ndarray or dict (default: np.array([.5, 4, 8, 13, 30, 100]))
The parameter ``freq_bands`` should be either a ndarray with shape
``(n_freq_bands + 1,)`` or ``(n_freq_bands, 2)`` or a dict. If ndarray
with shape ``(n_freq_bands + 1,)``, the entries define **contiguous**
frequency bands as follows: the i-th frequency band is defined as:
[freq_bands[i], freq_bands[i + 1]] (0 <= i <= n_freq_bands - 1). If
ndarray with shape ``(n_freq_bands, 2)``, the rows of ``freq_bands``
define **non-contiguous** frequency bands. If dict, the keys should be
strings (names of the frequency bands) and the values, the
corresponding bands (as ndarray with shape (2,) or list of length 2).
When ``freq_bands`` is of type dict, the keys are used to generate the
feature names (only used when features are extracted with
``return_as_df=True``). The values of ``freq_bands`` should be between
0 and sfreq / 2 (the Nyquist frequency) as the function uses the
one-sided PSD.

deriv_filt : bool (default: False)
If True, a derivative filter is applied to the input data before
Expand All @@ -1165,7 +1204,11 @@ def compute_energy_freq_bands(sfreq, data, freq_bands=np.array([0.5, 4., 8.,
detection using intracranial EEG. Epilepsy & Behavior, 22, S29-S35.
"""
n_channels = data.shape[0]
fb = _freq_bands_helper(sfreq, freq_bands)
if isinstance(freq_bands, dict):
_freq_bands = np.asarray([freq_bands[n] for n in freq_bands])
else:
_freq_bands = np.asarray(freq_bands)
fb = _freq_bands_helper(sfreq, _freq_bands)
n_freq_bands = fb.shape[0]
band_energy = np.empty((n_channels, n_freq_bands))
if deriv_filt:
Expand All @@ -1178,6 +1221,24 @@ def compute_energy_freq_bands(sfreq, data, freq_bands=np.array([0.5, 4., 8.,
return band_energy.ravel()


def _compute_energy_fb_feat_names(data, freq_bands, deriv_filt):
"""Utility function to create feature names compatible with the output of
:func:`mne_features.univariate.compute_energy_freq_bands`."""
n_channels = data.shape[0]
if isinstance(freq_bands, dict):
n_freq_bands = len(freq_bands)
_band_names = [str(n) for n in freq_bands]
else:
n_freq_bands = (freq_bands.shape[0] - 1 if freq_bands.ndim == 1
else freq_bands.shape[0])
_band_names = ['band' + str(j) for j in range(n_freq_bands)]
return ['ch%s_%s' % (ch, _band_names[i]) for ch in range(n_channels) for
i in range(n_freq_bands)]


compute_energy_freq_bands.get_feature_names = _compute_energy_fb_feat_names


def compute_spect_edge_freq(sfreq, data, ref_freq=None, edge=None):
"""Spectal Edge Frequency (per channel).

Expand Down
23 changes: 23 additions & 0 deletions mne_features/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,29 @@ def _get_feature_funcs(sfreq, module_name):
return feature_funcs


def _get_python_func(func):
"""Get the Python function underlying partial or jitted functions.

Parameters
----------
func : function or instance of `functools.partial` or jitted function.
Transfomed feature function.

Returns
-------
function
"""
if hasattr(func, 'func'):
# If `func` is of type `functools.partial`
return func.func
elif hasattr(func, 'py_func'):
# If `func` is a jitted Python function
return func.py_func
else:
# If `func` is an actual Python function
return func


def _wavelet_coefs(data, wavelet_name='db4'):
"""Compute Discrete Wavelet Transform coefficients.

Expand Down