Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add class-distinctiveness function and its example code #215

Merged
merged 12 commits into from Dec 13, 2022
106 changes: 106 additions & 0 deletions examples/simulated/plot_class_distinctiveness.py
@@ -0,0 +1,106 @@
"""
=====================================================================
Select best class-separated SPD matrix dataset on a manifold
=====================================================================
Selecting SPD matrix dataset with better separation
between two classes on the manifold.
This could be used as the metric for selecting the best frequency band
or time window among multiple choices.
"""
# Authors: Maria Sayu Yamamoto <maria-sayu.yamamoto@universite-paris-saclay.fr>
#
# License: BSD (3-clause)
msyamamoto marked this conversation as resolved.
Show resolved Hide resolved

import numpy as np
import matplotlib.pyplot as plt

from pyriemann.embedding import SpectralEmbedding
from pyriemann.datasets import make_gaussian_blobs
from pyriemann.preprocessing import class_distinctiveness


print(__doc__)

###############################################################################
# Set parameters for sampling from the Riemannian Gaussian distribution

n_matrices = 100 # how many SPD matrices to generate
n_dim = 2 # number of dimensions of the SPD matrices
random_state = 42 # ensure reproducibility
epsilons_array = [.6, .6, .7] # parameter for the distance between centers
sigmas_array = [.2, .7, .2] # dispersion of the Gaussian distribution

###############################################################################
# Generate the samples on three different class separability conditions

# data0...small distance between class centroids
# and small dispersion within class
data0_X, data0_y = make_gaussian_blobs(n_matrices=n_matrices, n_dim=n_dim,
class_sep=epsilons_array[0],
class_disp=sigmas_array[0],
random_state=random_state, n_jobs=4)

# data1...small distance between class centroids
# and large dispersion within class
data1_X, data1_y = make_gaussian_blobs(n_matrices=n_matrices, n_dim=n_dim,
class_sep=epsilons_array[1],
class_disp=sigmas_array[1],
random_state=random_state, n_jobs=4)

# data2...large distance between class centroids
# and small dispersion within class
data2_X, data2_y = make_gaussian_blobs(n_matrices=n_matrices, n_dim=n_dim,
class_sep=epsilons_array[2],
class_disp=sigmas_array[2],
random_state=random_state, n_jobs=4)

datasets = [data0_X, data1_X, data2_X]
labels = [data0_y, data1_y, data2_y]

###############################################################################
# Apply classDis for each dataset

all_classDis = []
qbarthelemy marked this conversation as resolved.
Show resolved Hide resolved
for data_ind in range(len(datasets)):
classDis = class_distinctiveness(datasets[data_ind],
labels[data_ind], nume_denomi=False)
all_classDis.append(format(classDis, '.4f'))

###############################################################################
# Select the dataset with the highest classDis value

max_classDis_ind = np.argmax(all_classDis)
print('Best class-separated dataset is dataset' + str(max_classDis_ind))

###############################################################################
# Plot sample distribution of each dataset

fig, ax = plt.subplots(figsize=(13.5, 4.4), ncols=3, sharey=True)
plt.subplots_adjust(wspace=0.10)
steps = [0, 1, 2]
titles = [r'dataset0($\varepsilon = 0.60, \sigma = 0.20$)',
r'dataset1($\varepsilon = 0.60, \sigma = 0.70$)',
r'dataset2($\varepsilon = 0.70, \sigma = 0.20$)']
for axi, step, title in zip(ax, steps, titles):
X = datasets[step]
y = labels[step]

# Embed samples in 2D
emb = SpectralEmbedding(n_components=2, metric='riemann')
embedded_points = emb.fit_transform(X)

# Plot the embedded points on plain
axi.scatter(
embedded_points[y == 0][:, 0],
embedded_points[y == 0][:, 1],
c='C0', s=50, alpha=0.50, label="class 0")
axi.scatter(
embedded_points[y == 1][:, 0],
embedded_points[y == 1][:, 1],
c='C1', s=50, alpha=0.50, label="class 1")
axi.set_title(title, fontsize=14)
axi.legend(loc="upper right")

ax[max_classDis_ind].set_title('best class-separated dataset\n'
+ titles[max_classDis_ind], fontsize=14)
plt.show()
qbarthelemy marked this conversation as resolved.
Show resolved Hide resolved
64 changes: 63 additions & 1 deletion pyriemann/preprocessing.py
Expand Up @@ -2,12 +2,14 @@

import numpy as np
from scipy.linalg import eigh
from itertools import combinations

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.extmath import stable_cumsum

from .utils.mean import mean_covariance
from .utils.mean import mean_covariance, mean_riemann
from .utils.base import sqrtm, invsqrtm
from .utils.distance import distance_riemann


class Whitening(BaseEstimator, TransformerMixin):
Expand Down Expand Up @@ -200,3 +202,63 @@ def inverse_transform(self, X):
"""
Xiw = self.inv_filters_.T @ X @ self.inv_filters_
return Xiw


def class_distinctiveness(X, y, nume_denomi=False):
"""Measure class separability between classes on a manifold.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you write the formula for class distinctiveness value for k classes?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I’m not super comfortable with the current formula of class distinctiveness.
Because Fisher criterion is defined as the ratio of the variance between the classes to the variance within the classes.
IMO, I think all distances in code should be at a power of 2.

We can solve this difference, adding a parameter p being the exponent of power:
p=2 will measure class dispersions with respect means, ie Frechet variances minimized during means estimation;
p=1 will measure class dispersions with respect medians, ie standard deviations.

Copy link
Contributor Author

@msyamamoto msyamamoto Dec 7, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a quick update, but when I was writing the formula for the documentation, I realized that my implementation of the second commit did not correctly reflect the equation of multi-class distinctiveness (Equation 7 in the original paper). I am sorry for that. I will modify it in the third commit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, I am not sure if I fully understood your suggestion of using parameter p. That would be a different equation than the original paper's one, wouldn't it?
https://iopscience.iop.org/article/10.1088/1741-2552/aac577/meta?casa_token=bZ_Sz-fcfXIAAAAA:_lr-H99Z4_Y-GrbxMgL6ELEOvx03b8I4Xa7gedXTciHcUdvz5TzQhGCZ1WGcKqNOOpWWRmbVKXx5

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the article, a power of 2 was lost bewteen Eq(4) and Eq(5), giving a mean absolute deviation instead of a variance.

I suggest to add an argument $p$:
$$\mathrm{classDis}(A, B, p) = \frac{d \left(\bar{X}^{A}, \bar{X}^{B}\right)^p} {\frac{1}{2} \left( \sigma_{X^{A}}^p + \sigma_{X^{B}}^p \right)}$$
where
$$\sigma_{X^{K}}^p = \frac{1}{m} \sum_{i=1}^m d \left(X_i, \bar{X}^{K}\right)^p$$
When $p=1$, it gives equation from article.
When $p=2$, it really generalizes Fisher criterion, defined as the ratio of the variance between the classes to the variance within the classes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you very much for the clear explanation! I got it!
Indeed, it is good for the user that they can choose from the two ways of quantification by the parameter p.
Thank you so much for the suggestion:-)

Parameters
----------
X : ndarray, shape (n_matrices, n_channels, n_channels)
Set of SPD matrices.
y : ndarray, shape (n_matrices,)
Labels for each matrix.
nume_denomi : bool, default=False
msyamamoto marked this conversation as resolved.
Show resolved Hide resolved
Whether to return numerator and denominator of class_dis.

Returns
-------
class_dis : float
class distinctiveness value
numerator : float
Numerator value of class_dis
denominator : float
Denominator value of class_dis

References
----------
.. [1] `Defining and quantifying users’ mental imagery-based
BCI skills: a first step
<https://hal.archives-ouvertes.fr/hal-01846434/>`_
F. Lotte, and C. Jeunet. Journal of neural engineering,
15(4), 046030, 2018.
"""
msyamamoto marked this conversation as resolved.
Show resolved Hide resolved

classes = np.unique(y)
qbarthelemy marked this conversation as resolved.
Show resolved Hide resolved

# numerator computation
all_dis_between = []
covmeans = [mean_riemann(X[y == ll]) for ll in classes]
for set in combinations(classes, 2):
dis_between = distance_riemann(covmeans[int(set[0])],
covmeans[int(set[1])])
all_dis_between.append(dis_between)
numerator = np.sum(all_dis_between)

# denominator computation
all_sigma = []
for ll in classes:
X_temp = X[y == ll]
dis_within = [distance_riemann(X_temp[r, :, :], covmeans[int(ll)])
qbarthelemy marked this conversation as resolved.
Show resolved Hide resolved
for r in range(len(X_temp))]
sigma = np.sum(dis_within) / len(X_temp)
all_sigma.append(sigma)
denominator = 0.5 * (np.sum(all_sigma))

class_dis = numerator / denominator

if not nume_denomi:
return class_dis

else:
return class_dis, numerator, denominator
msyamamoto marked this conversation as resolved.
Show resolved Hide resolved