Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use sparse array in ALE, ALESubtraction, SCALE, KDA, and MKDADensity #725

Merged
merged 54 commits into from Jul 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
54 commits
Select commit Hold shift + click to select a range
21ff371
Use 4D array in ALE
JulioAPeraza Jun 27, 2022
0851642
Use 4D sparse array
JulioAPeraza Jun 28, 2022
fdef663
Add support to sparse array in CBMAEstimator._fit()
JulioAPeraza Jun 29, 2022
790f335
calculate histogram on sparse array
JulioAPeraza Jun 30, 2022
04ad43e
Merge branch 'neurostuff:main' into ale-sparse
JulioAPeraza Jul 1, 2022
c3efea7
Update nimare/meta/cbma/ale.py
JulioAPeraza Jul 1, 2022
9b398b2
Update nimare/meta/cbma/ale.py
JulioAPeraza Jul 1, 2022
2021117
Use sparse in KDA
JulioAPeraza Jul 5, 2022
5c0c7a7
Use sparse in MKDADensity
JulioAPeraza Jul 5, 2022
9dc0d11
Suport sparse in generate.create_coordinate_dataset
JulioAPeraza Jul 5, 2022
8e43549
Add support for sparse when `null_method="reduced_montecarlo"`
JulioAPeraza Jul 5, 2022
ce0f3a4
Fix issue with tests
JulioAPeraza Jul 5, 2022
66c099f
get sample_sizes only if self.fwhm is None
JulioAPeraza Jul 5, 2022
16d5f05
build sparse for coordinates inside mask_data
JulioAPeraza Jul 7, 2022
6e0c982
pass image object to compute_ale_ma in generate
JulioAPeraza Jul 7, 2022
54b7277
Add support for np.ndarray back again
JulioAPeraza Jul 7, 2022
cb960b5
Mask coordinates before creating the sparse array
JulioAPeraza Jul 7, 2022
ca919f3
Fix sparse support in KDADensity
JulioAPeraza Jul 7, 2022
d4d9e85
modify unique_rows to handle mask when sum_overlap in KDA
JulioAPeraza Jul 8, 2022
8ce82ef
save both histogram_bins and histogram_means
JulioAPeraza Jul 8, 2022
48b62a7
fix style issues
JulioAPeraza Jul 8, 2022
35c29e6
consider `value` when creating `counts`
JulioAPeraza Jul 8, 2022
18e63c5
Support sparse in _run_fwe_permutation for MKDAChi2
JulioAPeraza Jul 8, 2022
c903cf1
keep unique_rows as general as possible
JulioAPeraza Jul 8, 2022
53df2c1
Update docstring of the modified functions
JulioAPeraza Jul 13, 2022
22ec4fd
Drop support of np.ndarray on some functions
JulioAPeraza Jul 13, 2022
de49037
Make return_type="sparse" default in _collect_ma_maps
JulioAPeraza Jul 13, 2022
be11662
Use return_type="array" in SCALE until sparse is supported
JulioAPeraza Jul 13, 2022
b2036bc
Use sparse in SCALE
JulioAPeraza Jul 13, 2022
46ce6ff
Use sparse in ALESubtraction
JulioAPeraza Jul 13, 2022
f864129
Fix typo
JulioAPeraza Jul 13, 2022
4c9c1b8
Small update: replace return_type="array" by "sparse"
JulioAPeraza Jul 14, 2022
bb98158
Set n_mask_voxels as a private parameter
JulioAPeraza Jul 14, 2022
8ec751f
Apply @tsalo and @jdkent review
JulioAPeraza Jul 14, 2022
a9060cf
Move the check of mask type to _preprocess_input
JulioAPeraza Jul 14, 2022
5c9bf97
Return sparse if pre-generated MA maps are found
JulioAPeraza Jul 15, 2022
6c384e9
Drop support for np.ndarray in ALE
JulioAPeraza Jul 15, 2022
c1a68b3
Remove unnecessary `gc.collect()` lines
JulioAPeraza Jul 15, 2022
5ed41d0
Update mkda.py
JulioAPeraza Jul 15, 2022
3966e61
Remove gc import
JulioAPeraza Jul 15, 2022
0669ae1
Drop support for np.ndarray in mkda
JulioAPeraza Jul 15, 2022
622a755
Move the check of the masker to _fit
JulioAPeraza Jul 15, 2022
6c877e6
Update mkda.py
JulioAPeraza Jul 15, 2022
1a2fa1c
Drop support for list in mkda
JulioAPeraza Jul 15, 2022
7765228
Drop support for list in ale
JulioAPeraza Jul 15, 2022
5a8617d
Update ale.py
JulioAPeraza Jul 15, 2022
65cc1fe
Use memmap in SCALE
JulioAPeraza Jul 18, 2022
c0235ca
Remove map reuse test in ALE for memory limit parameter
JulioAPeraza Jul 18, 2022
ab45def
Add versionchanged directives
JulioAPeraza Jul 18, 2022
c458d5b
Update utils.py
JulioAPeraza Jul 18, 2022
5f1a42f
Remove line from docstring when sparse is the only option
JulioAPeraza Jul 19, 2022
91bd39c
Add test for estimator with non-NiftiMasker
JulioAPeraza Jul 19, 2022
291feca
Add histogram_means description to docstring
JulioAPeraza Jul 19, 2022
010827e
Update nimare/meta/utils.py
JulioAPeraza Jul 19, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
11 changes: 7 additions & 4 deletions nimare/generate.py
Expand Up @@ -2,6 +2,7 @@
from itertools import zip_longest

import numpy as np
import sparse

from nimare.dataset import Dataset
from nimare.io import convert_neurovault_to_dataset
Expand Down Expand Up @@ -266,20 +267,22 @@ def _create_foci(foci, foci_percentage, fwhm, n_studies, n_noise_foci, rng, spac
# create a probability map for each peak
kernel = get_ale_kernel(template_img, fwhm)[1]
foci_prob_maps = {
tuple(peak): compute_ale_ma(template_data.shape, np.atleast_2d(peak), kernel)
tuple(peak): compute_ale_ma(template_img, np.atleast_2d(peak), kernel=kernel).reshape(
template_data.shape
)
for peak in ground_truth_foci_ijks
if peak.size
}

# get study specific instances of each foci
signal_studies = int(round(foci_percentage * n_studies))
signal_ijks = {
peak: np.argwhere(prob_map)[
peak: sparse.argwhere(prob_map)[
rng.choice(
np.argwhere(prob_map).shape[0],
sparse.argwhere(prob_map).shape[0],
size=signal_studies,
replace=True,
p=prob_map[np.nonzero(prob_map)] / sum(prob_map[np.nonzero(prob_map)]),
p=(prob_map[prob_map.nonzero()] / sum(prob_map[prob_map.nonzero()])).todense(),
)
]
for peak, prob_map in foci_prob_maps.items()
Expand Down
101 changes: 72 additions & 29 deletions nimare/meta/cbma/ale.py
Expand Up @@ -3,6 +3,7 @@

import numpy as np
import pandas as pd
import sparse
from joblib import Parallel, delayed
from tqdm.auto import tqdm

Expand Down Expand Up @@ -33,6 +34,10 @@
class ALE(CBMAEstimator):
"""Activation likelihood estimation.

.. versionchanged:: 0.0.12

- Use a 4D sparse array for modeled activation maps.

Parameters
----------
kernel_transformer : :obj:`~nimare.meta.kernel.KernelTransformer`, optional
Expand Down Expand Up @@ -151,32 +156,43 @@ def __init__(

def _compute_summarystat_est(self, ma_values):
stat_values = 1.0 - np.prod(1.0 - ma_values, axis=0)

# np.array type is used by _determine_histogram_bins to calculate max_poss_ale
if isinstance(stat_values, sparse._coo.core.COO):
# NOTE: This may not work correctly with a non-NiftiMasker.
mask_data = self.masker.mask_img.get_fdata().astype(bool)

stat_values = stat_values.todense().reshape(-1) # Indexing a .reshape(-1) is faster
stat_values = stat_values[mask_data.reshape(-1)]

# This is used by _compute_null_approximate
self.__n_mask_voxels = stat_values.shape[0]

return stat_values

def _determine_histogram_bins(self, ma_maps):
"""Determine histogram bins for null distribution methods.

Parameters
----------
ma_maps
ma_maps : :obj:`sparse._coo.core.COO`
MA maps.

Notes
-----
This method adds one entry to the null_distributions_ dict attribute: "histogram_bins".
"""
if isinstance(ma_maps, list):
ma_values = self.masker.transform(ma_maps)
elif isinstance(ma_maps, np.ndarray):
ma_values = ma_maps
else:
if not isinstance(ma_maps, sparse._coo.core.COO):
raise ValueError(f"Unsupported data type '{type(ma_maps)}'")

# Determine bins for null distribution histogram
# Remember that numpy histogram bins are bin edges, not centers
# Assuming values of 0, .001, .002, etc., bins are -.0005-.0005, .0005-.0015, etc.
INV_STEP_SIZE = 100000
step_size = 1 / INV_STEP_SIZE
max_ma_values = np.max(ma_values, axis=1)
# Need to convert to dense because np.ceil is too slow with sparse
max_ma_values = ma_maps.max(axis=[1, 2, 3]).todense()

# round up based on resolution
max_ma_values = np.ceil(max_ma_values * INV_STEP_SIZE) / INV_STEP_SIZE
max_poss_ale = self._compute_summarystat(max_ma_values)
Expand All @@ -189,7 +205,7 @@ def _compute_null_approximate(self, ma_maps):

Parameters
----------
ma_maps : list of imgs or numpy.ndarray
ma_maps : :obj:`sparse._coo.core.COO`
MA maps.

Notes
Expand All @@ -199,27 +215,34 @@ def _compute_null_approximate(self, ma_maps):
- "histogram_bins"
- "histweights_corr-none_method-approximate"
"""
if isinstance(ma_maps, list):
ma_values = self.masker.transform(ma_maps)
elif isinstance(ma_maps, np.ndarray):
ma_values = ma_maps
else:
if not isinstance(ma_maps, sparse._coo.core.COO):
raise ValueError(f"Unsupported data type '{type(ma_maps)}'")

assert "histogram_bins" in self.null_distributions_.keys()

def just_histogram(*args, **kwargs):
"""Collect the first output (weights) from numpy histogram."""
return np.histogram(*args, **kwargs)[0].astype(float)

# Derive bin edges from histogram bin centers for numpy histogram function
bin_centers = self.null_distributions_["histogram_bins"]
step_size = bin_centers[1] - bin_centers[0]
inv_step_size = 1 / step_size
bin_edges = bin_centers - (step_size / 2)
bin_edges = np.append(bin_centers, bin_centers[-1] + step_size)

ma_hists = np.apply_along_axis(just_histogram, 1, ma_values, bins=bin_edges, density=False)
n_exp = ma_maps.shape[0]
n_bins = bin_centers.shape[0]
ma_hists = np.zeros((n_exp, n_bins))
data = ma_maps.data
coords = ma_maps.coords
for exp_idx in range(n_exp):
# The first column of coords is the fourth dimension of the dense array
study_ma_values = data[coords[0, :] == exp_idx]

n_nonzero_voxels = study_ma_values.shape[0]
n_zero_voxels = self.__n_mask_voxels - n_nonzero_voxels

ma_hists[exp_idx, :] = np.histogram(study_ma_values, bins=bin_edges, density=False)[
0
].astype(float)
ma_hists[exp_idx, 0] += n_zero_voxels

# Normalize MA histograms to get probabilities
ma_hists /= ma_hists.sum(1)[:, None]
Expand Down Expand Up @@ -258,6 +281,7 @@ class ALESubtraction(PairwiseCBMAEstimator):
- Use memmapped array for null distribution and remove ``memory_limit`` parameter.
- Support parallelization and add progress bar.
- Add ALE-difference (stat) and -log10(p) (logp) maps to results.
- Use a 4D sparse array for modeled activation maps.

.. versionchanged:: 0.0.8

Expand Down Expand Up @@ -352,16 +376,17 @@ def _fit(self, dataset1, dataset2):
coords_key="coordinates2",
)

n_grp1, n_voxels = ma_maps1.shape

# Get ALE values for the two groups and difference scores
grp1_ale_values = self._compute_summarystat_est(ma_maps1)
grp2_ale_values = self._compute_summarystat_est(ma_maps2)
diff_ale_values = grp1_ale_values - grp2_ale_values
del grp1_ale_values, grp2_ale_values

n_grp1 = ma_maps1.shape[0]
n_voxels = diff_ale_values.shape[0]

# Combine the MA maps into a single array to draw from for null distribution
ma_arr = np.vstack((ma_maps1, ma_maps2))
ma_arr = sparse.concatenate((ma_maps1, ma_maps2))

del ma_maps1, ma_maps2

Expand Down Expand Up @@ -418,6 +443,14 @@ def _fit(self, dataset1, dataset2):

def _compute_summarystat_est(self, ma_values):
stat_values = 1.0 - np.prod(1.0 - ma_values, axis=0)

if isinstance(stat_values, sparse._coo.core.COO):
# NOTE: This may not work correctly with a non-NiftiMasker.
mask_data = self.masker.mask_img.get_fdata().astype(bool)

stat_values = stat_values.todense().reshape(-1) # Indexing a .reshape(-1) is faster
stat_values = stat_values[mask_data.reshape(-1)]

return stat_values

def _run_permutation(self, i_iter, n_grp1, ma_arr, iter_diff_values):
Expand All @@ -440,8 +473,8 @@ def _run_permutation(self, i_iter, n_grp1, ma_arr, iter_diff_values):
gen = np.random.default_rng(seed=i_iter)
id_idx = np.arange(ma_arr.shape[0])
gen.shuffle(id_idx)
iter_grp1_ale_values = 1.0 - np.prod(1.0 - ma_arr[id_idx[:n_grp1], :], axis=0)
iter_grp2_ale_values = 1.0 - np.prod(1.0 - ma_arr[id_idx[n_grp1:], :], axis=0)
iter_grp1_ale_values = self._compute_summarystat_est(ma_arr[id_idx[:n_grp1], :])
iter_grp2_ale_values = self._compute_summarystat_est(ma_arr[id_idx[n_grp1:], :])
iter_diff_values[i_iter, :] = iter_grp1_ale_values - iter_grp2_ale_values

def _alediff_to_p_voxel(self, i_voxel, stat_value, voxel_null):
Expand Down Expand Up @@ -480,6 +513,7 @@ class SCALE(CBMAEstimator):

- Remove unused parameters ``voxel_thresh`` and ``memory_limit``.
- Use memmapped array for null distribution.
- Use a 4D sparse array for modeled activation maps.

.. versionchanged:: 0.0.10

Expand Down Expand Up @@ -584,14 +618,17 @@ def _fit(self, dataset):
)

# Determine bins for null distribution histogram
max_ma_values = np.max(ma_values, axis=1)
max_ma_values = ma_values.max(axis=[1, 2, 3]).todense()

max_poss_ale = self._compute_summarystat_est(max_ma_values)
self.null_distributions_["histogram_bins"] = np.round(
np.arange(0, max_poss_ale + 0.001, 0.0001), 4
)

stat_values = self._compute_summarystat_est(ma_values)

del ma_values

iter_df = self.inputs_["coordinates"].copy()
rand_idx = np.random.choice(self.xyz.shape[0], size=(iter_df.shape[0], self.n_iters))
rand_xyz = self.xyz[rand_idx, :]
Expand Down Expand Up @@ -627,24 +664,30 @@ def _fit(self, dataset):
return images

def _compute_summarystat_est(self, data):
"""Generate ALE-value array and null distribution from list of contrasts.
"""Generate ALE-value array and null distribution from a list of contrasts.

For ALEs on the original dataset, computes the null distribution.
For permutation ALEs and all SCALEs, just computes ALE values.
Returns masked array of ALE values and 1XnBins null distribution.
"""
if isinstance(data, pd.DataFrame):
ma_values = self.kernel_transformer.transform(
data, masker=self.masker, return_type="array"
data, masker=self.masker, return_type="sparse"
)
elif isinstance(data, list):
ma_values = self.masker.transform(data)
elif isinstance(data, np.ndarray):
elif isinstance(data, (np.ndarray, sparse._coo.core.COO)):
ma_values = data
else:
raise ValueError(f"Unsupported data type '{type(data)}'")

stat_values = 1.0 - np.prod(1.0 - ma_values, axis=0)

if isinstance(stat_values, sparse._coo.core.COO):
# NOTE: This may not work correctly with a non-NiftiMasker.
mask_data = self.masker.mask_img.get_fdata().astype(bool)

stat_values = stat_values.todense().reshape(-1) # Indexing a .reshape(-1) is faster
stat_values = stat_values[mask_data.reshape(-1)]

return stat_values

def _scale_to_p(self, stat_values, scale_values):
Expand Down