Skip to content

Commit

Permalink
Create preprocessing.py (#238)
Browse files Browse the repository at this point in the history
* Create preprocessing.py

* add comments to preprocessing.py

* [pre-commit.ci] auto fixes from pre-commit.com hooks

* use ndscaler in plot_financial_data

* typo

* missing import

* [pre-commit.ci] auto fixes from pre-commit.com hooks

* tests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

* missing import

* pytest import not required

* api doc

* correct docstring

* Change NDRobustScaler -> NdRobustScaler

* Update pyriemann_qiskit/utils/preprocessing.py

Co-authored-by: Quentin Barthélemy <q.barthelemy@gmail.com>

* Update pyriemann_qiskit/utils/preprocessing.py

Co-authored-by: Quentin Barthélemy <q.barthelemy@gmail.com>

* fix: syntax error

* [pre-commit.ci] auto fixes from pre-commit.com hooks

* Update pyriemann_qiskit/utils/preprocessing.py

Co-authored-by: Quentin Barthélemy <q.barthelemy@gmail.com>

* Update pyriemann_qiskit/utils/preprocessing.py

Co-authored-by: Quentin Barthélemy <q.barthelemy@gmail.com>

* Update pyriemann_qiskit/utils/preprocessing.py

Co-authored-by: Quentin Barthélemy <q.barthelemy@gmail.com>

* Update tests/test_preprocessing.py

Co-authored-by: Quentin Barthélemy <q.barthelemy@gmail.com>

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Quentin Barthélemy <q.barthelemy@gmail.com>
  • Loading branch information
3 people authored Jan 24, 2024
1 parent 402bbd2 commit 6a27692
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 33 deletions.
10 changes: 10 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ Hyper-parameters generation
get_spsa
get_spsa_parameters

Preprocessing
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. _preprocessing_api:
.. currentmodule:: pyriemann_qiskit.utils.preprocessing

.. autosummary::
:toctree: generated/

NdRobustScaler

Filtering
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. _filtering_api:
Expand Down
38 changes: 6 additions & 32 deletions examples/other_datasets/plot_financial_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from sklearn.model_selection import HalvingGridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, LabelEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score
Expand All @@ -44,6 +44,7 @@
from pyriemann.estimation import XdawnCovariances
from pyriemann.utils.viz import plot_waveforms
from pyriemann_qiskit.classification import QuanticSVM
from pyriemann_qiskit.utils.preprocessing import NdRobustScaler
from matplotlib import pyplot as plt
import warnings
import pandas as pd
Expand All @@ -68,7 +69,7 @@

def plot_ERP(X, title, n=10, ylim=None, add_digest=False):
epochs = ToEpochs(n=n).transform(X)
reduced_centered_epochs = NDRobustScaler().fit_transform(epochs)
reduced_centered_epochs = NdRobustScaler().fit_transform(epochs)
fig = plot_waveforms(reduced_centered_epochs, "mean+/-std")
fig.axes[0].set_title(f"{title} ({len(X)})")
if ylim is None:
Expand All @@ -85,7 +86,7 @@ def plot_ERP(X, title, n=10, ylim=None, add_digest=False):


def merge_2axes(fig1, fig2, file_name1="f1.png", file_name2="f2.png"):
# Modified from [5]
# Modified from [4]
fig1.savefig(file_name1)
fig2.savefig(file_name2)
plt.close(fig1)
Expand Down Expand Up @@ -174,9 +175,6 @@ def plot_ERPs(X, y, n=10, label0="Fraud", label1="Genuine"):
lambda x: 2023 - x.year
)

# features["PK_TSINSERCION"] = pd.to_datetime(features["PK_TSINSERCION"])
# features["PK_TSINSERCION"] = features["PK_TSINSERCION"].apply(lambda x: x.value)

# Let's encode our categorical variables (LabelEncoding):
# features["IP_TERMINAL"] = features["IP_TERMINAL"].astype("category").cat.codes
le = LabelEncoder()
Expand Down Expand Up @@ -216,28 +214,6 @@ def transform(self, X):
return all_epochs


# Apply one scaler by channel:
# See Stackoverflow link for more details [4]
class NDRobustScaler(TransformerMixin):
def __init__(self):
self._scalers = []

def fit(self, X, y=None, **kwargs):
_, n_channels, _ = X.shape
self._scalers = []
for i in range(n_channels):
scaler = RobustScaler()
scaler.fit(X[:, i, :])
self._scalers.append(scaler)
return self

def transform(self, X, **kwargs):
n_channels = len(self._scalers)
for i in range(n_channels):
X[:, i, :] = self._scalers[i].transform(X[:, i, :])
return X


def slim(x, keep_diagonal=True):
# Vectorize covariance matrices by removing redundant information.
length = len(x) // 2
Expand Down Expand Up @@ -285,7 +261,7 @@ def transform(self, X):
# then adds at the end a classical SVM
pipe = make_pipeline(
ToEpochs(n=10),
NDRobustScaler(),
NdRobustScaler(),
XdawnCovariances(nfilter=1),
OptionalWhitening(process=True, n_components=4),
SlimVector(keep_diagonal=True),
Expand Down Expand Up @@ -491,6 +467,4 @@ def predict(self, X):
# https://zenodo.org/records/7418458
# .. [3] https://pyriemann.readthedocs.io/en/latest/auto_examples/ERP/plot_ERP.html
#
# [4] https://stackoverflow.com/questions/50125844/how-to-standard-scale-a-3d-matrix
#
# [5] https://stackoverflow.com/questions/16748577
# .. [4] https://stackoverflow.com/questions/16748577
3 changes: 2 additions & 1 deletion pyriemann_qiskit/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import hyper_params_factory, filtering
from . import hyper_params_factory, filtering, preprocessing
from .quantum_provider import get_provider, get_devices, get_simulator
from .math import cov_to_corr_matrix
from .docplex import (
Expand All @@ -23,6 +23,7 @@
__all__ = [
"hyper_params_factory",
"filtering",
"preprocessing",
"get_provider",
"get_devices",
"get_simulator",
Expand Down
71 changes: 71 additions & 0 deletions pyriemann_qiskit/utils/preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from sklearn.base import TransformerMixin
from sklearn.preprocessing import RobustScaler


class NdRobustScaler(TransformerMixin):
"""Apply one robust scaler by feature.
RobustScaler of scikit-learn [1]_ is adapted to 3d inputs [2]_.
References
----------
.. [1] \
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html
.. [2] \
https://stackoverflow.com/questions/50125844/how-to-standard-scale-a-3d-matrix
Notes
-----
.. versionadded:: 0.2.0
"""

def __init__(self):
self._scalers = []

"""Fits one robust scaler on each feature of the training data.
Parameters
----------
X : ndarray, shape (n_matrices, n_features, n_samples)
Training matrices.
_y : ndarray, shape (n_samples,)
Unused. Kept for scikit-learn compatibility.
Returns
-------
self : NdRobustScaler instance
The NdRobustScaler instance.
"""

def fit(self, X, _y=None, **kwargs):
_, n_features, _ = X.shape
self._scalers = []
for i in range(n_features):
scaler = RobustScaler().fit(X[:, i, :])
self._scalers.append(scaler)
return self

"""Apply the previously trained robust scalers (on scaler by feature)
Parameters
----------
X : ndarray, shape (n_matrices, n_features, n_samples)
Matrices to scale.
_y : ndarray, shape (n_samples,)
Unused. Kept for scikit-learn compatibility.
Returns
-------
self : NdRobustScaler instance
The NdRobustScaler instance.
"""

def transform(self, X, **kwargs):
_, n_features, _ = X.shape
if n_features != len(self._scalers):
raise ValueError(
"Input has not the same number of features as the fitted scaler"
)
for i in range(n_features):
X[:, i, :] = self._scalers[i].transform(X[:, i, :])
return X
28 changes: 28 additions & 0 deletions tests/test_preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import numpy as np
from pyriemann_qiskit.utils.preprocessing import NdRobustScaler
from sklearn.preprocessing import RobustScaler


def test_ndrobustscaler(get_covmats):
n_matrices, n_features = 5, 3

X = get_covmats(n_matrices, n_features)

scaler = NdRobustScaler()
transformed_X = scaler.fit_transform(X)

assert transformed_X.shape == X.shape

# Check that each feature is scaled using RobustScaler
for i in range(n_features):
feature_before_scaling = X[:, i, :]
feature_after_scaling = transformed_X[:, i, :]

# Use RobustScaler to manually scale the feature and compare
manual_scaler = RobustScaler()
manual_scaler.fit(feature_before_scaling)
manual_scaled_feature = manual_scaler.transform(feature_before_scaling)

np.testing.assert_allclose(
feature_after_scaling, manual_scaled_feature, rtol=1e-5
)

0 comments on commit 6a27692

Please sign in to comment.