Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create preprocessing.py #238

Merged
merged 22 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,16 @@ Hyper-parameters generation
get_spsa
get_spsa_parameters

Preprocessing
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. _preprocessing_api:
.. currentmodule:: pyriemann_qiskit.utils.preprocessing

.. autosummary::
:toctree: generated/

NdRobustScaler

Filtering
~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. _filtering_api:
Expand Down
38 changes: 6 additions & 32 deletions examples/other_datasets/plot_financial_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from sklearn.model_selection import HalvingGridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, LabelEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score
Expand All @@ -44,6 +44,7 @@
from pyriemann.estimation import XdawnCovariances
from pyriemann.utils.viz import plot_waveforms
from pyriemann_qiskit.classification import QuanticSVM
from pyriemann_qiskit.utils.preprocessing import NdRobustScaler
from matplotlib import pyplot as plt
import warnings
import pandas as pd
Expand All @@ -68,7 +69,7 @@

def plot_ERP(X, title, n=10, ylim=None, add_digest=False):
epochs = ToEpochs(n=n).transform(X)
reduced_centered_epochs = NDRobustScaler().fit_transform(epochs)
reduced_centered_epochs = NdRobustScaler().fit_transform(epochs)
fig = plot_waveforms(reduced_centered_epochs, "mean+/-std")
fig.axes[0].set_title(f"{title} ({len(X)})")
if ylim is None:
Expand All @@ -85,7 +86,7 @@ def plot_ERP(X, title, n=10, ylim=None, add_digest=False):


def merge_2axes(fig1, fig2, file_name1="f1.png", file_name2="f2.png"):
# Modified from [5]
# Modified from [4]
fig1.savefig(file_name1)
fig2.savefig(file_name2)
plt.close(fig1)
Expand Down Expand Up @@ -174,9 +175,6 @@ def plot_ERPs(X, y, n=10, label0="Fraud", label1="Genuine"):
lambda x: 2023 - x.year
)

# features["PK_TSINSERCION"] = pd.to_datetime(features["PK_TSINSERCION"])
# features["PK_TSINSERCION"] = features["PK_TSINSERCION"].apply(lambda x: x.value)

# Let's encode our categorical variables (LabelEncoding):
# features["IP_TERMINAL"] = features["IP_TERMINAL"].astype("category").cat.codes
le = LabelEncoder()
Expand Down Expand Up @@ -216,28 +214,6 @@ def transform(self, X):
return all_epochs


# Apply one scaler by channel:
# See Stackoverflow link for more details [4]
class NDRobustScaler(TransformerMixin):
def __init__(self):
self._scalers = []

def fit(self, X, y=None, **kwargs):
_, n_channels, _ = X.shape
self._scalers = []
for i in range(n_channels):
scaler = RobustScaler()
scaler.fit(X[:, i, :])
self._scalers.append(scaler)
return self

def transform(self, X, **kwargs):
n_channels = len(self._scalers)
for i in range(n_channels):
X[:, i, :] = self._scalers[i].transform(X[:, i, :])
return X


def slim(x, keep_diagonal=True):
# Vectorize covariance matrices by removing redundant information.
length = len(x) // 2
Expand Down Expand Up @@ -285,7 +261,7 @@ def transform(self, X):
# then adds at the end a classical SVM
pipe = make_pipeline(
ToEpochs(n=10),
NDRobustScaler(),
NdRobustScaler(),
XdawnCovariances(nfilter=1),
OptionalWhitening(process=True, n_components=4),
SlimVector(keep_diagonal=True),
Expand Down Expand Up @@ -491,6 +467,4 @@ def predict(self, X):
# https://zenodo.org/records/7418458
# .. [3] https://pyriemann.readthedocs.io/en/latest/auto_examples/ERP/plot_ERP.html
#
# [4] https://stackoverflow.com/questions/50125844/how-to-standard-scale-a-3d-matrix
#
# [5] https://stackoverflow.com/questions/16748577
# .. [4] https://stackoverflow.com/questions/16748577
3 changes: 2 additions & 1 deletion pyriemann_qiskit/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import hyper_params_factory, filtering
from . import hyper_params_factory, filtering, preprocessing
from .quantum_provider import get_provider, get_devices, get_simulator
from .math import cov_to_corr_matrix
from .docplex import (
Expand All @@ -23,6 +23,7 @@
__all__ = [
"hyper_params_factory",
"filtering",
"preprocessing",
"get_provider",
"get_devices",
"get_simulator",
Expand Down
71 changes: 71 additions & 0 deletions pyriemann_qiskit/utils/preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from sklearn.base import TransformerMixin
from sklearn.preprocessing import RobustScaler


class NdRobustScaler(TransformerMixin):
"""Apply one robust scaler by feature.

RobustScaler of scikit-learn [1]_ is adapted to 3d inputs [2]_.

References
----------
.. [1] \
gcattan marked this conversation as resolved.
Show resolved Hide resolved
https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.RobustScaler.html
.. [2] \
https://stackoverflow.com/questions/50125844/how-to-standard-scale-a-3d-matrix

Notes
-----
.. versionadded:: 0.2.0
"""

def __init__(self):
self._scalers = []

"""Fits one robust scaler on each feature of the training data.

Parameters
----------
X : ndarray, shape (n_matrices, n_features, n_samples)
Training matrices.
_y : ndarray, shape (n_samples,)
Unused. Kept for scikit-learn compatibility.

Returns
-------
self : NdRobustScaler instance
The NdRobustScaler instance.
"""

def fit(self, X, _y=None, **kwargs):
_, n_features, _ = X.shape
qbarthelemy marked this conversation as resolved.
Show resolved Hide resolved
self._scalers = []
for i in range(n_features):
scaler = RobustScaler().fit(X[:, i, :])
self._scalers.append(scaler)
return self

"""Apply the previously trained robust scalers (on scaler by feature)

Parameters
----------
X : ndarray, shape (n_matrices, n_features, n_samples)
Matrices to scale.
_y : ndarray, shape (n_samples,)
Unused. Kept for scikit-learn compatibility.

Returns
-------
self : NdRobustScaler instance
The NdRobustScaler instance.
"""

def transform(self, X, **kwargs):
_, n_features, _ = X.shape
if n_features != len(self._scalers):
raise ValueError(
"Input has not the same number of features as the fitted scaler"
)
for i in range(n_features):
X[:, i, :] = self._scalers[i].transform(X[:, i, :])
return X
28 changes: 28 additions & 0 deletions tests/test_preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import numpy as np
from pyriemann_qiskit.utils.preprocessing import NdRobustScaler
from sklearn.preprocessing import RobustScaler


def test_NdRobustScaler(get_covmats):
gcattan marked this conversation as resolved.
Show resolved Hide resolved
n_matrices, n_features = 5, 3

X = get_covmats(n_matrices, n_features)

scaler = NdRobustScaler()
transformed_X = scaler.fit_transform(X)

assert transformed_X.shape == X.shape

# Check that each feature is scaled using RobustScaler
for i in range(n_features):
feature_before_scaling = X[:, i, :]
feature_after_scaling = transformed_X[:, i, :]

# Use RobustScaler to manually scale the feature and compare
manual_scaler = RobustScaler()
manual_scaler.fit(feature_before_scaling)
manual_scaled_feature = manual_scaler.transform(feature_before_scaling)

np.testing.assert_allclose(
feature_after_scaling, manual_scaled_feature, rtol=1e-5
)
Loading