Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Faster sampling 2D Riemannian Gaussian #198

Merged
merged 26 commits into from Aug 25, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 7 additions & 4 deletions examples/simulated/plot_riemannian_gaussian.py
Expand Up @@ -23,7 +23,7 @@
###############################################################################
# Set parameters for sampling from the Riemannian Gaussian distribution
n_matrices = 100 # how many SPD matrices to generate
n_dim = 4 # number of dimensions of the SPD matrices
n_dim = 2 # number of dimensions of the SPD matrices
sigma = 1.0 # dispersion of the Gaussian distribution
epsilon = 4.0 # parameter for controlling the distance between centers
random_state = 42 # ensure reproducibility
Expand All @@ -34,15 +34,18 @@
samples_1 = sample_gaussian_spd(n_matrices=n_matrices,
mean=mean,
sigma=sigma,
random_state=random_state)
random_state=random_state,
sampling_method='rejection')
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would introduce a value called "auto" for the sampling_method method that would default to "rejection" if dim == 2 and slice otherwise so you don't have to expose these options in the tutorial and it will just become faster for users without any code change

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure I understand your point. I have the impression that the default value "None" already fulfils the role of the auto variable you want to define.

agramfort marked this conversation as resolved.
Show resolved Hide resolved
samples_2 = sample_gaussian_spd(n_matrices=n_matrices,
mean=mean,
sigma=sigma/2,
random_state=random_state)
random_state=random_state,
sampling_method='rejection')
agramfort marked this conversation as resolved.
Show resolved Hide resolved
samples_3 = sample_gaussian_spd(n_matrices=n_matrices,
mean=epsilon*mean,
sigma=sigma,
random_state=random_state)
random_state=random_state,
sampling_method='rejection')
agramfort marked this conversation as resolved.
Show resolved Hide resolved

# Stack all of the samples into one data array for the embedding
samples = np.concatenate([samples_1, samples_2, samples_3])
Expand Down
131 changes: 124 additions & 7 deletions pyriemann/datasets/sampling.py 100644 → 100755
@@ -1,6 +1,7 @@
from functools import partial
import warnings
import numpy as np
from scipy.stats import multivariate_normal
from sklearn.utils import check_random_state
from joblib import Parallel, delayed

Expand Down Expand Up @@ -42,6 +43,96 @@ def _pdf_r(r, sigma):
return np.exp(partial_1 + partial_2)


def _rejection_sampling_2D_gfunction_plus(sigma, r_sample):
""" auxiliary function used for the 2D rejection sampling
algorithm in the case where r is sampled with
the function g+.
Parameters
----------
sigma : float
Dispersion of the Riemannian Gaussian distribution.
r_samples : ndarray, shape (1, n_dim)
Sample of the r parameters of the Riemannian Gaussian distribution.
Returns
-------
probability_of_acceptation : float
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
"""
MU_A = np.array([sigma**2/2, -sigma**2/2])
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
COV_MATRIX = (sigma**2)*np.eye(2)
M = np.pi*(sigma**2)*np.exp(sigma**2/4)
if r_sample[0] >= r_sample[1]:
num = np.exp(-1/(2*sigma**2) * np.sum(r_sample**2))\
* np.sinh((r_sample[0] - r_sample[1])/2) * 1/M
den = multivariate_normal.pdf(r_sample, mean=MU_A, cov=COV_MATRIX)
return num / den
return 0


def _rejection_sampling_2D_gfunction_minus(sigma, r_sample):
""" auxiliary function used for the 2D rejection sampling
algorithm in the case where r is sampled with
the function g-.
Parameters
----------
sigma : float
Dispersion of the Riemannian Gaussian distribution.
r_samples : ndarray, shape (n_samples, n_dim)
Samples of the r parameters of the Riemannian Gaussian distribution.
Returns
-------
probability_of_acceptation : float
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
"""
MU_B = np.array([-sigma**2/2, sigma**2/2])
COV_MATRIX = (sigma**2)*np.eye(2)
M = np.pi*(sigma**2)*np.exp(sigma**2/4)
if r_sample[0] < r_sample[1]:
num = np.exp(-1/(2*sigma**2) * np.sum(r_sample**2))\
* np.sinh((r_sample[1] - r_sample[0])/2)
den = multivariate_normal.pdf(r_sample, mean=MU_B, cov=COV_MATRIX)*M
return num/den
return 0


def _rejection_sampling_2D(n_samples, sigma, random_state=None):
""" Rejection sampling algorithm for the 2D case. The algorithm is
optimized for spatial complexity but not for time complexity.
Works very well for low sigma values

Parameters
----------
n_samples : int
Number of samples to get from the ptarget distribution.
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
sigma : float
Dispersion of the Riemannian Gaussian distribution.
random_state : int, RandomState instance or None, default=None
Pass an int for reproducible output across multiple function calls.
Returns
-------
r_samples : ndarray, shape (n_samples, n_dim)
Samples of the r parameters of the Riemannian Gaussian distribution.
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
"""
MU_A = np.array([sigma**2/2, -sigma**2/2])
MU_B = np.array([-sigma**2/2, sigma**2/2])
COV_MATRIX = (sigma**2)*np.eye(2)
RES = []
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
cpt = 0
rs = check_random_state(random_state)
while cpt != n_samples:
agramfort marked this conversation as resolved.
Show resolved Hide resolved
if rs.binomial(1, 0.5, 1) == 1:
r_sample = multivariate_normal.rvs(MU_A, COV_MATRIX, 1, rs)
res = _rejection_sampling_2D_gfunction_plus(sigma, r_sample)
if rs.rand(1) < res:
RES.append(r_sample)
cpt += 1
else:
r_sample = multivariate_normal.rvs(MU_B, COV_MATRIX, 1, rs)
res = _rejection_sampling_2D_gfunction_minus(sigma, r_sample)
if rs.rand(1) < res:
RES.append(r_sample)
cpt += 1
return np.array(RES)


def _slice_one_sample(ptarget, x0, w, rs):
"""Slice sampling for one sample

Expand Down Expand Up @@ -167,7 +258,8 @@ def _slice_sampling(ptarget, n_samples, x0, n_burnin=20, thin=10,
return samples


def _sample_parameter_r(n_samples, n_dim, sigma, random_state=None, n_jobs=1):
def _sample_parameter_r(n_samples, n_dim, sigma,
random_state=None, n_jobs=1, sampling_method=None):
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
"""Sample the r parameters of a Riemannian Gaussian distribution.

Sample the logarithm of the eigenvalues of a SPD matrix following a
Expand All @@ -188,13 +280,26 @@ def _sample_parameter_r(n_samples, n_dim, sigma, random_state=None, n_jobs=1):
n_jobs : int, default=1
The number of jobs to use for the computation. This works by computing
each of the class centroid in parallel. If -1 all CPUs are used.
sampling_method : str, default=None
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
Name of the sampling method used to sample samples_r. It can be
'slice' or 'rejection'. If it is None, the sampling_method
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
will be equal to 'slice' for n_dim != 2 and equal to
'rejection' for n_dim = 2.

Returns
-------
r_samples : ndarray, shape (n_samples, n_dim)
Samples of the r parameters of the Riemannian Gaussian distribution.
"""

if sampling_method not in ['slice', 'rejection', None]:
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(f'Unknown sampling method {sampling_method},\
try slice or rejection')
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
if n_dim == 2 and sampling_method != "slice":
return _rejection_sampling_2D(n_samples, sigma,
random_state=random_state)
if n_dim != 2 and sampling_method == "rejection":
raise ValueError(
f'n_dim={n_dim} is not yet supported with rejection sampling')
rs = check_random_state(random_state)
x0 = rs.randn(n_dim)
ptarget = partial(_pdf_r, sigma=sigma)
Expand Down Expand Up @@ -242,9 +347,8 @@ def _sample_parameter_U(n_samples, n_dim, random_state=None):
return u_samples


def _sample_gaussian_spd_centered(
n_matrices, n_dim, sigma, random_state=None, n_jobs=1
):
def _sample_gaussian_spd_centered(n_matrices, n_dim, sigma, random_state=None,
n_jobs=1, sampling_method=None):
qbarthelemy marked this conversation as resolved.
Show resolved Hide resolved
"""Sample a Riemannian Gaussian distribution centered at the Identity.

Sample SPD matrices from a Riemannian Gaussian distribution centered at the
Expand All @@ -264,6 +368,11 @@ def _sample_gaussian_spd_centered(
n_jobs : int, default=1
The number of jobs to use for the computation. This works by computing
each of the class centroid in parallel. If -1 all CPUs are used.
sampling_method : str, default=None
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
Name of the sampling method used to sample samples_r. It can be
'slice' or 'rejection'. If it is None, the sampling_method
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
will be equal to 'slice' for n_dim != 2 and equal to
'rejection' for n_dim = 2.

Returns
-------
Expand All @@ -286,7 +395,8 @@ def _sample_gaussian_spd_centered(
n_dim=n_dim,
sigma=sigma,
random_state=random_state,
n_jobs=n_jobs)
n_jobs=n_jobs,
sampling_method=sampling_method)
samples_U = _sample_parameter_U(n_samples=n_matrices,
n_dim=n_dim,
random_state=random_state)
Expand All @@ -301,7 +411,8 @@ def _sample_gaussian_spd_centered(
return samples


def sample_gaussian_spd(n_matrices, mean, sigma, random_state=None, n_jobs=1):
def sample_gaussian_spd(n_matrices, mean, sigma, random_state=None,
n_jobs=1, sampling_method=None):
qbarthelemy marked this conversation as resolved.
Show resolved Hide resolved
"""Sample a Riemannian Gaussian distribution.

Sample SPD matrices from a Riemannian Gaussian distribution centered at
Expand All @@ -324,6 +435,11 @@ def sample_gaussian_spd(n_matrices, mean, sigma, random_state=None, n_jobs=1):
n_jobs : int, default=1
The number of jobs to use for the computation. This works by computing
each of the class centroid in parallel. If -1 all CPUs are used.
sampling_method : str, default=None
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
Name of the sampling method used to sample samples_r. It can be
sylvchev marked this conversation as resolved.
Show resolved Hide resolved
'slice' or 'rejection'. If it is None, the sampling_method
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
will be equal to 'slice' for n_dim != 2 and equal to
'rejection' for n_dim = 2.

Returns
-------
Expand All @@ -350,6 +466,7 @@ def sample_gaussian_spd(n_matrices, mean, sigma, random_state=None, n_jobs=1):
sigma=sigma / np.sqrt(n_dim),
random_state=random_state,
n_jobs=n_jobs,
sampling_method=sampling_method
)

# apply the parallel transport to mean on each of the samples
Expand Down
16 changes: 13 additions & 3 deletions pyriemann/datasets/simulated.py
Expand Up @@ -79,7 +79,8 @@ def make_masks(n_masks, n_dim0, n_dim1_min, rs):

def make_gaussian_blobs(n_matrices=100, n_dim=2, class_sep=1.0, class_disp=1.0,
return_centers=False, random_state=None, *,
mat_mean=.0, mat_std=1., n_jobs=1):
mat_mean=.0, mat_std=1., n_jobs=1,
sampling_method=None):
Artim436 marked this conversation as resolved.
Show resolved Hide resolved
"""Generate SPD dataset with two classes sampled from Riemannian Gaussian.

Generate a dataset with SPD matrices drawn from two Riemannian Gaussian
Expand Down Expand Up @@ -108,6 +109,11 @@ def make_gaussian_blobs(n_matrices=100, n_dim=2, class_sep=1.0, class_disp=1.0,
n_jobs : int, default=1
The number of jobs to use for the computation. This works by computing
each of the class centroid in parallel. If -1 all CPUs are used.
sampling_method : str, default=None
Name of the sampling method used to sample samples_r. It can be
'slice' or 'rejection'. If it is None, the sampling_method
will be equal to 'slice' for n_dim != 2 and equal to
'rejection' for n_dim = 2.

Returns
-------
Expand Down Expand Up @@ -140,7 +146,9 @@ def make_gaussian_blobs(n_matrices=100, n_dim=2, class_sep=1.0, class_disp=1.0,
mean=C0,
sigma=class_disp,
random_state=random_state,
n_jobs=n_jobs)
n_jobs=n_jobs,
sampling_method=sampling_method
)
y0 = np.zeros(n_matrices)

# generate dataset for class 1
Expand All @@ -151,7 +159,9 @@ def make_gaussian_blobs(n_matrices=100, n_dim=2, class_sep=1.0, class_disp=1.0,
mean=C1,
sigma=class_disp,
random_state=random_state,
n_jobs=n_jobs)
n_jobs=n_jobs,
sampling_method=sampling_method
)
y1 = np.ones(n_matrices)

X = np.concatenate([X0, X1])
Expand Down