Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster sampling 2D Riemannian Gaussian #198

Merged
merged 26 commits into from Aug 25, 2022
Merged
Show file tree
Hide file tree
Changes from 23 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whatsnew.rst
Expand Up @@ -19,6 +19,8 @@ v0.3.1.dev
- Enhance AJD: add ``init`` to :func:`pyriemann.utils.ajd.ajd_pham` and :func:`pyriemann.utils.ajd.rjd`,
add ``warm_restart`` to :class:`pyriemann.spatialfilters.AJDC`. :pr:`196` by :user:`qbarthelemy`

- Add parameter ``sampling_method`` to :func:`pyriemann.dataset.sample_gaussian_spd`, with ``rejection`` accelerating 2x2 matrices generation. :pr:`198` by :user:`Artim436`

v0.3 (July 2022)
----------------

Expand Down
2 changes: 1 addition & 1 deletion examples/simulated/plot_riemannian_gaussian.py
Expand Up @@ -23,7 +23,7 @@
###############################################################################
# Set parameters for sampling from the Riemannian Gaussian distribution
n_matrices = 100 # how many SPD matrices to generate
n_dim = 4 # number of dimensions of the SPD matrices
n_dim = 2 # number of dimensions of the SPD matrices
sigma = 1.0 # dispersion of the Gaussian distribution
epsilon = 4.0 # parameter for controlling the distance between centers
random_state = 42 # ensure reproducibility
Expand Down
154 changes: 146 additions & 8 deletions pyriemann/datasets/sampling.py 100644 → 100755
@@ -1,6 +1,7 @@
from functools import partial
import warnings
import numpy as np
from scipy.stats import multivariate_normal
from sklearn.utils import check_random_state
from joblib import Parallel, delayed

Expand Down Expand Up @@ -42,6 +43,105 @@ def _pdf_r(r, sigma):
return np.exp(partial_1 + partial_2)


def _rejection_sampling_2D_gfunction_plus(sigma, r_sample):
"""Auxiliary function for the 2D rejection sampling algorithm.

It is used in the case where r is sampled with the function g+.

Parameters
----------
sigma : float
Dispersion of the Riemannian Gaussian distribution.
r_samples : ndarray, shape (1, n_dim)
Sample of the r parameters of the Riemannian Gaussian distribution.

Returns
-------
p : float
Probability of acceptation.
"""
mu_a = np.array([sigma**2/2, -sigma**2/2])
cov_matrix = (sigma**2)*np.eye(2)
m = np.pi*(sigma**2)*np.exp(sigma**2/4)
if r_sample[0] >= r_sample[1]:
num = np.exp(-1/(2*sigma**2) * np.sum(r_sample**2))\
* np.sinh((r_sample[0] - r_sample[1])/2) * 1/m
den = multivariate_normal.pdf(r_sample, mean=mu_a, cov=cov_matrix)
return num / den
return 0


def _rejection_sampling_2D_gfunction_minus(sigma, r_sample):
"""Auxiliary function for the 2D rejection sampling algorithm.

It is used in the case where r is sampled with the function g-.

Parameters
----------
sigma : float
Dispersion of the Riemannian Gaussian distribution.
r_samples : ndarray, shape (1, n_dim)
Sample of the r parameters of the Riemannian Gaussian distribution.

Returns
-------
p : float
Probability of acceptation.
"""
mu_b = np.array([-sigma**2/2, sigma**2/2])
cov_matrix = (sigma**2)*np.eye(2)
m = np.pi*(sigma**2)*np.exp(sigma**2/4)
if r_sample[0] < r_sample[1]:
num = np.exp(-1/(2*sigma**2) * np.sum(r_sample**2))\
* np.sinh((r_sample[1] - r_sample[0])/2)
den = multivariate_normal.pdf(r_sample, mean=mu_b, cov=cov_matrix)*m
return num/den
return 0


def _rejection_sampling_2D(n_samples, sigma, random_state=None):
"""Rejection sampling algorithm for the 2D case.

Implementation of a rejection sampling algorithm. The implementation
follows the description given in page 528 of Christopher Bishop's book
"Pattern recognition and Machine Learning" (2006).

Parameters
----------
n_samples : int
Number of samples to get from the target distribution.
sigma : float
Dispersion of the Riemannian Gaussian distribution.
random_state : int, RandomState instance or None, default=None
Pass an int for reproducible output across multiple function calls.

Returns
-------
r_samples : ndarray, shape (n_samples, n_dim)
Samples of the r parameters of the Riemannian Gaussian distribution.
"""
mu_a = np.array([sigma**2/2, -sigma**2/2])
mu_b = np.array([-sigma**2/2, sigma**2/2])
cov_matrix = (sigma**2)*np.eye(2)
r_samples = []
cpt = 0
rs = check_random_state(random_state)
while cpt != n_samples:
agramfort marked this conversation as resolved.
Show resolved Hide resolved
if rs.binomial(1, 0.5, 1) == 1:
r_sample = multivariate_normal.rvs(mu_a, cov_matrix, 1, rs)
res = _rejection_sampling_2D_gfunction_plus(sigma, r_sample)
if rs.rand(1) < res:
r_samples.append(r_sample)
cpt += 1
else:
r_sample = multivariate_normal.rvs(mu_b, cov_matrix, 1, rs)
res = _rejection_sampling_2D_gfunction_minus(sigma, r_sample)
if rs.rand(1) < res:
r_samples.append(r_sample)
cpt += 1
return np.array(r_samples)


def _slice_one_sample(ptarget, x0, w, rs):
"""Slice sampling for one sample

Expand Down Expand Up @@ -167,13 +267,14 @@ def _slice_sampling(ptarget, n_samples, x0, n_burnin=20, thin=10,
return samples


def _sample_parameter_r(n_samples, n_dim, sigma, random_state=None, n_jobs=1):
def _sample_parameter_r(n_samples, n_dim, sigma,
random_state=None, n_jobs=1, sampling_method='auto'):
"""Sample the r parameters of a Riemannian Gaussian distribution.

Sample the logarithm of the eigenvalues of a SPD matrix following a
Riemannian Gaussian distribution.

See https://arxiv.org/pdf/1507.01760.pdf for the mathematical details.
See [1]_ for the mathematical details.

Parameters
----------
Expand All @@ -188,13 +289,34 @@ def _sample_parameter_r(n_samples, n_dim, sigma, random_state=None, n_jobs=1):
n_jobs : int, default=1
The number of jobs to use for the computation. This works by computing
each of the class centroid in parallel. If -1 all CPUs are used.
sampling_method : str, default='auto'
Name of the sampling method used to sample samples_r. It can be
'auto', 'slice' or 'rejection'. If it is 'auto', the sampling_method
will be equal to 'slice' for n_dim != 2 and equal to
'rejection' for n_dim = 2.
.. versionadded:: 0.3.1
qbarthelemy marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
r_samples : ndarray, shape (n_samples, n_dim)
Samples of the r parameters of the Riemannian Gaussian distribution.
"""

References
----------
.. [1] S. Said, L. Bombrun, Y. Berthoumieu, and J. Manton, “Riemannian
Gaussian distributions on the space of symmetric positive definite
matrices”, IEEE Trans Inf Theory, vol. 63, pp. 2153–2170, 2017.
https://arxiv.org/pdf/1507.01760.pdf
"""
if sampling_method not in ['slice', 'rejection', 'auto']:
raise ValueError(f'Unknown sampling method {sampling_method},'
'try slice or rejection')
if n_dim == 2 and sampling_method != "slice":
return _rejection_sampling_2D(n_samples, sigma,
random_state=random_state)
if n_dim != 2 and sampling_method == "rejection":
raise ValueError(
f'n_dim={n_dim} is not yet supported with rejection sampling')
rs = check_random_state(random_state)
x0 = rs.randn(n_dim)
ptarget = partial(_pdf_r, sigma=sigma)
Expand Down Expand Up @@ -242,9 +364,8 @@ def _sample_parameter_U(n_samples, n_dim, random_state=None):
return u_samples


def _sample_gaussian_spd_centered(
n_matrices, n_dim, sigma, random_state=None, n_jobs=1
):
def _sample_gaussian_spd_centered(n_matrices, n_dim, sigma, random_state=None,
n_jobs=1, sampling_method='auto'):
"""Sample a Riemannian Gaussian distribution centered at the Identity.

Sample SPD matrices from a Riemannian Gaussian distribution centered at the
Expand All @@ -264,6 +385,13 @@ def _sample_gaussian_spd_centered(
n_jobs : int, default=1
The number of jobs to use for the computation. This works by computing
each of the class centroid in parallel. If -1 all CPUs are used.
sampling_method : str, default='auto'
Name of the sampling method used to sample samples_r. It can be
'auto', 'slice' or 'rejection'. If it is 'auto', the sampling_method
will be equal to 'slice' for n_dim != 2 and equal to
'rejection' for n_dim = 2.

.. versionadded:: 0.3.1
qbarthelemy marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
Expand All @@ -286,7 +414,8 @@ def _sample_gaussian_spd_centered(
n_dim=n_dim,
sigma=sigma,
random_state=random_state,
n_jobs=n_jobs)
n_jobs=n_jobs,
sampling_method=sampling_method)
samples_U = _sample_parameter_U(n_samples=n_matrices,
n_dim=n_dim,
random_state=random_state)
Expand All @@ -301,7 +430,8 @@ def _sample_gaussian_spd_centered(
return samples


def sample_gaussian_spd(n_matrices, mean, sigma, random_state=None, n_jobs=1):
def sample_gaussian_spd(n_matrices, mean, sigma, random_state=None,
n_jobs=1, sampling_method='auto'):
"""Sample a Riemannian Gaussian distribution.

Sample SPD matrices from a Riemannian Gaussian distribution centered at
Expand All @@ -324,6 +454,13 @@ def sample_gaussian_spd(n_matrices, mean, sigma, random_state=None, n_jobs=1):
n_jobs : int, default=1
The number of jobs to use for the computation. This works by computing
each of the class centroid in parallel. If -1 all CPUs are used.
sampling_method : str, default='auto'
Name of the sampling method used to sample samples_r. It can be
sylvchev marked this conversation as resolved.
Show resolved Hide resolved
'auto', 'slice' or 'rejection'. If it is 'auto', the sampling_method
will be equal to 'slice' for n_dim != 2 and equal to
'rejection' for n_dim = 2.

.. versionadded:: 0.3.1
qbarthelemy marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
Expand All @@ -350,6 +487,7 @@ def sample_gaussian_spd(n_matrices, mean, sigma, random_state=None, n_jobs=1):
sigma=sigma / np.sqrt(n_dim),
random_state=random_state,
n_jobs=n_jobs,
sampling_method=sampling_method
)

# apply the parallel transport to mean on each of the samples
Expand Down
18 changes: 15 additions & 3 deletions pyriemann/datasets/simulated.py
Expand Up @@ -79,7 +79,8 @@ def make_masks(n_masks, n_dim0, n_dim1_min, rs):

def make_gaussian_blobs(n_matrices=100, n_dim=2, class_sep=1.0, class_disp=1.0,
return_centers=False, random_state=None, *,
mat_mean=.0, mat_std=1., n_jobs=1):
mat_mean=.0, mat_std=1., n_jobs=1,
sampling_method='auto'):
"""Generate SPD dataset with two classes sampled from Riemannian Gaussian.

Generate a dataset with SPD matrices drawn from two Riemannian Gaussian
Expand Down Expand Up @@ -108,6 +109,13 @@ def make_gaussian_blobs(n_matrices=100, n_dim=2, class_sep=1.0, class_disp=1.0,
n_jobs : int, default=1
The number of jobs to use for the computation. This works by computing
each of the class centroid in parallel. If -1 all CPUs are used.
sampling_method : str, default='auto'
Name of the sampling method used to sample samples_r. It can be
'auto', 'slice' or 'rejection'. If it is 'auto', the sampling_method
will be equal to 'slice' for n_dim != 2 and equal to
'rejection' for n_dim = 2.

.. versionadded:: 0.3.1
qbarthelemy marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
Expand Down Expand Up @@ -140,7 +148,9 @@ def make_gaussian_blobs(n_matrices=100, n_dim=2, class_sep=1.0, class_disp=1.0,
mean=C0,
sigma=class_disp,
random_state=random_state,
n_jobs=n_jobs)
n_jobs=n_jobs,
sampling_method=sampling_method
)
y0 = np.zeros(n_matrices)

# generate dataset for class 1
Expand All @@ -151,7 +161,9 @@ def make_gaussian_blobs(n_matrices=100, n_dim=2, class_sep=1.0, class_disp=1.0,
mean=C1,
sigma=class_disp,
random_state=random_state,
n_jobs=n_jobs)
n_jobs=n_jobs,
sampling_method=sampling_method
)
y1 = np.ones(n_matrices)

X = np.concatenate([X0, X1])
Expand Down
39 changes: 30 additions & 9 deletions tests/test_sampling.py
Expand Up @@ -8,20 +8,41 @@


@pytest.mark.parametrize("n_jobs", [1, -1])
def test_sample_gaussian_spd(n_jobs):
"""Test Riemannian Gaussian sampling."""
n_matrices, n_dim, sigma = 10, 8, 1.
@pytest.mark.parametrize("sampling_method", ['auto', 'slice', 'rejection'])
def test_sample_gaussian_spd_dim2(n_jobs, sampling_method):
"""Test Riemannian Gaussian sampling for dim=2."""
n_matrices, n_dim, sigma = 5, 2, 1.
mean = np.eye(n_dim)
X = sample_gaussian_spd(
n_matrices, mean, sigma, random_state=42, n_jobs=n_jobs
)
X = sample_gaussian_spd(n_matrices, mean, sigma, random_state=42,
n_jobs=n_jobs, sampling_method=sampling_method)
assert X.shape == (n_matrices, n_dim, n_dim) # X shape mismatch
assert is_spd(X) # X is an array of SPD matrices


@pytest.mark.parametrize("n_dim", [3, 4])
@pytest.mark.parametrize("n_jobs", [1, -1])
@pytest.mark.parametrize("sampling_method", ['auto', 'slice'])
def test_sample_gaussian_spd_dimsup(n_dim, n_jobs, sampling_method):
"""Test Riemannian Gaussian sampling for dim>2."""
n_matrices, sigma = 5, 1.
mean = np.eye(n_dim)
X = sample_gaussian_spd(n_matrices, mean, sigma, random_state=42,
n_jobs=n_jobs, sampling_method=sampling_method)
assert X.shape == (n_matrices, n_dim, n_dim) # X shape mismatch
assert is_spd(X) # X is an array of SPD matrices


def test_sample_gaussian_spd_error():
with pytest.raises(ValueError): # unknown sampling method
sample_gaussian_spd(5, np.eye(2), 1., sampling_method='blabla')
with pytest.raises(ValueError): # dim=3 not yet supported with rejection
n_dim = 3
sample_gaussian_spd(5, np.eye(n_dim), 1., sampling_method='rejection')


def test_generate_random_spd_matrix():
"""Test generating random SPD matrix"""
n_dim = 16
n_dim = 4
X = generate_random_spd_matrix(n_dim, random_state=None)
assert X.shape == (n_dim, n_dim) # X shape mismatch
assert is_spd(X) # X is a SPD matrix
Expand All @@ -30,7 +51,7 @@ def test_generate_random_spd_matrix():
@pytest.mark.parametrize("n_jobs", [1, -1])
def test_sigma_gaussian_spd(n_jobs):
"""Test sigma parameter from Riemannian Gaussian sampling."""
n_matrices, n_dim, sig_1, sig_2 = 10, 8, 1., 2.
n_matrices, n_dim, sig_1, sig_2 = 5, 4, 1., 2.
mean = np.eye(n_dim)
X1 = sample_gaussian_spd(
n_matrices, mean, sig_1, random_state=42, n_jobs=n_jobs
Expand All @@ -44,7 +65,7 @@ def test_sigma_gaussian_spd(n_jobs):


def test_functions_error():
n_matrices, n_dim = 10, 16
n_matrices, n_dim = 3, 4
mean, sigma = np.eye(n_dim), 2.
with pytest.raises(ValueError): # mean is not a matrix
sample_gaussian_spd(n_matrices, np.ones(n_dim), sigma)
Expand Down