nimare/decode/discrete.py

"""Methods for decoding subsets of voxels or experiments into text."""
import numpy as np
import pandas as pd
from nilearn._utils import load_niimg
from pymare.stats import bonferroni, fdr
from scipy import special
from scipy.stats import binom

from nimare.decode.base import Decoder
from nimare.decode.utils import weight_priors
from nimare.meta.kernel import KernelTransformer, MKDAKernel
from nimare.stats import one_way, pearson, two_way
from nimare.transforms import p_to_z
from nimare.utils import _check_type, get_masker


def gclda_decode_roi(model, roi, topic_priors=None, prior_weight=1.0):
    r"""Perform image-to-text decoding for discrete inputs using method from Rubin et al. (2017).

    The method used in this function was originally described in :footcite:t:`rubin2017decoding`.

    Parameters
    ----------
    model : :obj:`~nimare.annotate.gclda.GCLDAModel`
        Model object needed for decoding.
    roi : :obj:`nibabel.nifti1.Nifti1Image` or :obj:`str`
        Binary image to decode into text. If string, path to a file with
        the binary image.
    topic_priors : :obj:`numpy.ndarray` of :obj:`float`, optional
        A 1d array of size (n_topics) with values for topic weighting.
        If None, no weighting is done. Default is None.
    prior_weight : :obj:`float`, optional
        The weight by which the prior will affect the decoding.
        Default is 1.

    Returns
    -------
    decoded_df : :obj:`pandas.DataFrame`
        A DataFrame with the word-tokens and their associated weights.
    topic_weights : :obj:`numpy.ndarray` of :obj:`float`
        The weights of the topics used in decoding.

    Notes
    -----
    ======================    ==============================================================
    Notation                  Meaning
    ======================    ==============================================================
    :math:`v`                 Voxel
    :math:`t`                 Topic
    :math:`w`                 Word type
    :math:`r`                 Region of interest (ROI)
    :math:`p(v|t)`            Probability of topic given voxel (``p_topic_g_voxel``)
    :math:`\\tau_{t}`          Topic weight vector (``topic_weights``)
    :math:`p(w|t)`            Probability of word type given topic (``p_word_g_topic``)
    ======================    ==============================================================

    1.  Compute :math:`p(v|t)`.

            - From :func:`gclda.model.Model.get_spatial_probs()`

    2.  Compute topic weight vector (:math:`\\tau_{t}`) by adding across voxels within ROI.

            - :math:`\\tau_{t} = \sum_{i} {p(t|v_{i})}`

    3.  Multiply :math:`\\tau_{t}` by :math:`p(w|t)`.

            - :math:`p(w|r) \propto \\tau_{t} \cdot p(w|t)`

    4.  The resulting vector (``word_weights``) reflects arbitrarily scaled term weights for the
        ROI.

    See Also
    --------
    :class:`~nimare.annotate.gclda.GCLDAModel`
    :func:`~nimare.decode.continuous.gclda_decode_map`
    :func:`~nimare.decode.encode.gclda_encode`

    References
    ----------
    .. footbibliography::
    """
    roi = load_niimg(roi)

    dset_aff = model.mask.affine
    if not np.array_equal(roi.affine, dset_aff):
        raise ValueError(
            "Input roi must have same affine as mask img:\n"
            f"{np.array2string(roi.affine)}\n{np.array2string(dset_aff)}"
        )

    # Load ROI file and get ROI voxels overlapping with brain mask
    mask_vec = model.mask.get_fdata().ravel().astype(bool)
    roi_vec = roi.get_fdata().astype(bool).ravel()
    roi_vec = roi_vec[mask_vec]
    roi_idx = np.where(roi_vec)[0]
    p_topic_g_roi = model.p_topic_g_voxel_[roi_idx, :]  # p(T|V) for voxels in ROI only
    topic_weights = np.sum(p_topic_g_roi, axis=0)  # Sum across words
    if topic_priors is not None:
        weighted_priors = weight_priors(topic_priors, prior_weight)
        topic_weights *= weighted_priors

    # Multiply topic_weights by topic-by-word matrix (p_word_g_topic).
    # n_word_tokens_per_topic = np.sum(model.n_word_tokens_word_by_topic, axis=0)
    # p_word_g_topic = model.n_word_tokens_word_by_topic / n_word_tokens_per_topic[None, :]
    # p_word_g_topic = np.nan_to_num(p_word_g_topic, 0)
    word_weights = np.dot(model.p_word_g_topic_, topic_weights)

    decoded_df = pd.DataFrame(index=model.vocabulary, columns=["Weight"], data=word_weights)
    decoded_df.index.name = "Term"
    return decoded_df, topic_weights


class BrainMapDecoder(Decoder):
    """Perform image-to-text decoding for discrete inputs according to the BrainMap method.

    This method was described in :footcite:t:`amft2015definition`.

    .. versionadded:: 0.0.3

    Parameters
    ----------
    feature_group : :obj:`str`, optional
        Feature group name used to select labels from a specific source.
        Feature groups are stored as prefixes to feature name columns in
        Dataset.annotations, with the format ``[source]_[valuetype]__``.
        Input may or may not include the trailing underscore.
        Default is None, which uses all feature groups available.
    features : :obj:`list`, optional
        List of features in dataset annotations to use for decoding.
        If feature_group is provided, then features should not include the
        feature group prefix.
        If feature_group is *not* provided, then features *should* include the
        prefix.
        Default is None, which uses all features available.
    frequency_threshold : :obj:`float`, optional
        Threshold to apply to dataset annotations. Values greater than or
        equal to the threshold as assigned as label+, while values below
        the threshold are considered label-. Default is 0.001.
    u : :obj:`float`, optional
        Alpha level for multiple comparisons correction. Default is 0.05.
    correction : {None, "bh", "by", "bonferroni"}, optional
        Multiple comparisons correction method to apply.
        Default is 'bh' (Benjamini-Hochberg FDR correction).

    See Also
    --------
    :func:`~nimare.decode.discrete.brainmap_decode`: The associated function for this method.

    References
    ----------
    .. footbibliography::
    """

    _required_inputs = {
        "coordinates": ("coordinates", None),
        "annotations": ("annotations", None),
    }

    def __init__(
        self,
        feature_group=None,
        features=None,
        frequency_threshold=0.001,
        u=0.05,
        correction="fdr_bh",
    ):
        self.feature_group = feature_group
        self.features = features
        self.frequency_threshold = frequency_threshold
        self.u = u
        self.correction = correction

    def _fit(self, dataset):
        pass

    def transform(self, ids, ids2=None):
        """Apply the decoding method to a Dataset.

        Parameters
        ----------
        ids : :obj:`list`
            Subset of studies in coordinates/annotations dataframes indicating
            target for decoding. Examples include studies reporting at least one
            peak in an ROI, or studies selected from a clustering analysis.
        ids2 : :obj:`list` or None, optional
            Second subset of studies, representing "unselected" studies. If None,
            then all studies in coordinates/annotations dataframes **not** in
            ``ids`` will be used.

        Returns
        -------
        results : :class:`pandas.DataFrame`
            Table with each label and the following values associated with each
            label: 'pForward', 'zForward', 'likelihoodForward', 'pReverse',
            'zReverse', and 'probReverse'.
        """
        results = brainmap_decode(
            self.inputs_["coordinates"],
            self.inputs_["annotations"],
            ids=ids,
            ids2=ids2,
            features=self.features_,
            frequency_threshold=self.frequency_threshold,
            u=self.u,
            correction=self.correction,
        )

        return results


def brainmap_decode(
    coordinates,
    annotations,
    ids,
    ids2=None,
    features=None,
    frequency_threshold=0.001,
    u=0.05,
    correction="fdr_bh",
):
    """Perform image-to-text decoding for discrete inputs according to the BrainMap method.

    This method was described in :footcite:t:`amft2015definition`.

    Parameters
    ----------
    coordinates : :class:`pandas.DataFrame`
        DataFrame containing coordinates. Must include a column named 'id' and
        must have a separate row for each reported peak coordinate for each
        study (i.e., there are multiple rows per ID).
        IDs from ``coordinates`` must match those from ``annotations``.
    annotations : :class:`pandas.DataFrame`
        DataFrame containing labels. Must include a column named 'id' and each
        row must correspond to a study. Other columns may correspond to
        individual labels.
        IDs from ``annotations`` must match those from ``coordinates``.
    ids : :obj:`list`
        Subset of studies in coordinates/annotations dataframes indicating
        target for decoding. Examples include studies reporting at least one
        peak in an ROI, or studies selected from a clustering analysis.
    ids2 : :obj:`list` or None, optional
        Second subset of studies, representing "unselected" studies. If None,
        then all studies in coordinates/annotations dataframes **not** in
        ``ids`` will be used.
    features : :obj:`list`, optional
        List of features in dataset annotations to use for decoding.
        Default is None, which uses all features available.
    frequency_threshold : :obj:`float`, optional
        Threshold to apply to dataset annotations. Values greater than or
        equal to the threshold as assigned as label+, while values below
        the threshold are considered label-. Default is 0.001.
    u : :obj:`float`, optional
        Alpha level for multiple comparisons correction. Default is 0.05.
    correction : {None, "bh", "by", "bonferroni"}, optional
        Multiple comparisons correction method to apply.
        Default is 'bh' (Benjamini-Hochberg FDR correction).

    Returns
    -------
    out_df : :class:`pandas.DataFrame`
        Table with each label and the following values associated with each
        label: 'pForward', 'zForward', 'likelihoodForward', 'pReverse',
        'zReverse', and 'probReverse'.

    See Also
    --------
    :func:`~nimare.decode.discrete.BrainMapDecoder`: The associated class for this method.

    References
    ----------
    .. footbibliography::
    """
    dataset_ids = sorted(list(set(coordinates["id"].values)))
    if ids2 is None:
        unselected = sorted(list(set(dataset_ids) - set(ids)))
    else:
        unselected = ids2[:]

    # Binarize with frequency threshold
    features_df = annotations.set_index("id", drop=True)
    features_df = features_df[features].ge(frequency_threshold)

    sel_array = features_df.loc[ids].values
    unsel_array = features_df.loc[unselected].values

    n_selected = len(ids)
    n_unselected = len(unselected)

    # the number of times any term is used (e.g., if one experiment uses
    # two terms, that counts twice). Why though?
    n_exps_across_terms = np.sum(np.sum(features_df))

    n_selected_term = np.sum(sel_array, axis=0)
    n_unselected_term = np.sum(unsel_array, axis=0)

    n_selected_noterm = n_selected - n_selected_term
    n_unselected_noterm = n_unselected - n_unselected_term

    n_term = n_selected_term + n_unselected_term
    p_term = n_term / n_exps_across_terms

    n_foci_in_database = coordinates.shape[0]
    p_selected = n_selected / n_foci_in_database

    # I hope there's a way to do this without the for loop
    n_term_foci = np.zeros(len(features))
    n_noterm_foci = np.zeros(len(features))
    for i, term in enumerate(features):
        term_ids = features_df.loc[features_df[term] == 1].index.values
        noterm_ids = features_df.loc[features_df[term] == 0].index.values
        n_term_foci[i] = coordinates["id"].isin(term_ids).sum()
        n_noterm_foci[i] = coordinates["id"].isin(noterm_ids).sum()

    p_selected_g_term = n_selected_term / n_term_foci  # probForward
    l_selected_g_term = p_selected_g_term / p_selected  # likelihoodForward
    p_selected_g_noterm = n_selected_noterm / n_noterm_foci

    p_term_g_selected = p_selected_g_term * p_term / p_selected  # probReverse
    p_term_g_selected = p_term_g_selected / np.nansum(p_term_g_selected)  # Normalize

    # Significance testing
    # Forward inference significance is determined with a binomial distribution
    p_fi = 1 - binom.cdf(k=n_selected_term, n=n_term_foci, p=p_selected)
    sign_fi = np.sign(
        n_selected_term - np.mean(n_selected_term)
    ).ravel()  # pylint: disable=no-member

    # Two-way chi-square test for specificity of activation
    cells = np.array(
        [
            [n_selected_term, n_selected_noterm],  # pylint: disable=no-member
            [n_unselected_term, n_unselected_noterm],
        ]
    ).T
    chi2_ri = two_way(cells)
    p_ri = special.chdtrc(1, chi2_ri)
    sign_ri = np.sign(p_selected_g_term - p_selected_g_noterm).ravel()  # pylint: disable=no-member

    # Ignore rare features
    p_fi[n_selected_term < 5] = 1.0
    p_ri[n_selected_term < 5] = 1.0

    # Multiple comparisons correction across features. Separately done for FI and RI.
    if correction in ("bh", "by"):
        p_corr_fi = fdr(p_fi, alpha=u, method=correction)
        p_corr_ri = fdr(p_ri, alpha=u, method=correction)
    elif correction == "bonferroni":
        p_corr_fi = bonferroni(p_fi)
        p_corr_ri = bonferroni(p_ri)
    else:
        p_corr_fi = p_fi
        p_corr_ri = p_ri

    # Compute z-values
    z_corr_fi = p_to_z(p_corr_fi, "two") * sign_fi
    z_corr_ri = p_to_z(p_corr_ri, "two") * sign_ri

    # Effect size
    arr = np.array(
        [
            p_corr_fi,
            z_corr_fi,
            l_selected_g_term,  # pylint: disable=no-member
            p_corr_ri,
            z_corr_ri,
            p_term_g_selected,
        ]
    ).T

    out_df = pd.DataFrame(
        data=arr,
        index=features,
        columns=[
            "pForward",
            "zForward",
            "likelihoodForward",
            "pReverse",
            "zReverse",
            "probReverse",
        ],
    )
    out_df.index.name = "Term"
    return out_df


class NeurosynthDecoder(Decoder):
    """Perform discrete functional decoding according to Neurosynth's meta-analytic method.

    Neurosynth was described in :footcite:t:`yarkoni2011large`.

    .. versionadded:: 0.0.3

    This does not employ correlations between unthresholded maps, which are the
    method of choice for decoding within Neurosynth and Neurovault.
    Metadata (i.e., feature labels) for studies within the selected sample
    (`ids`) are compared to the unselected studies remaining in the database
    (`dataset`).

    Parameters
    ----------
    feature_group : :obj:`str`, optional
        Feature group name used to select labels from a specific source.
        Feature groups are stored as prefixes to feature name columns in
        Dataset.annotations, with the format ``[source]_[valuetype]__``.
        Input may or may not include the trailing underscore.
        Default is None, which uses all feature groups available.
    features : :obj:`list`, optional
        List of features in dataset annotations to use for decoding.
        If feature_group is provided, then features should not include the
        feature group prefix.
        If feature_group is *not* provided, then features *should* include the
        prefix.
        Default is None, which uses all features available.
    frequency_threshold : :obj:`float`, optional
        Threshold to apply to dataset annotations. Values greater than or
        equal to the threshold as assigned as label+, while values below
        the threshold are considered label-. Default is 0.001.
    prior : :obj:`float`, optional
        Uniform prior probability of each label being active in a study in
        the absence of evidence (labels or selection) from the study.
        Default is 0.5 (50%).
    u : :obj:`float`, optional
        Alpha level for multiple comparisons correction. Default is 0.05.
    correction : {None, "bh", "by", "bonferroni"}, optional
        Multiple comparisons correction method to apply.
        Default is 'bh' (Benjamini-Hochberg FDR correction).

    See Also
    --------
    :func:`~nimare.decode.discrete.neurosynth_decode`: The associated function for this method.

    References
    ----------
    .. footbibliography::
    """

    _required_inputs = {
        "coordinates": ("coordinates", None),
        "annotations": ("annotations", None),
    }

    def __init__(
        self,
        feature_group=None,
        features=None,
        frequency_threshold=0.001,
        prior=0.5,
        u=0.05,
        correction="fdr_bh",
    ):
        self.feature_group = feature_group
        self.features = features
        self.frequency_threshold = frequency_threshold
        self.prior = prior
        self.u = u
        self.correction = correction

    def _fit(self, dataset):
        pass

    def transform(self, ids, ids2=None):
        """Apply the decoding method to a Dataset.

        Parameters
        ----------
        ids : :obj:`list`
            Subset of studies in coordinates/annotations dataframes indicating
            target for decoding. Examples include studies reporting at least one
            peak in an ROI, or studies selected from a clustering analysis.
        ids2 : :obj:`list` or None, optional
            Second subset of studies, representing "unselected" studies. If None,
            then all studies in Dataset **not** in
            ``ids`` will be used.

        Returns
        -------
        results : :class:`pandas.DataFrame`
            Table with each label and the following values associated with each
            label: 'pForward', 'zForward', 'probForward', 'pReverse', 'zReverse',
            and 'probReverse'.
        """
        results = neurosynth_decode(
            self.inputs_["coordinates"],
            self.inputs_["annotations"],
            ids=ids,
            ids2=ids2,
            features=self.features_,
            frequency_threshold=self.frequency_threshold,
            prior=self.prior,
            u=self.u,
            correction=self.correction,
        )
        return results


def neurosynth_decode(
    coordinates,
    annotations,
    ids,
    ids2=None,
    feature_group=None,
    features=None,
    frequency_threshold=0.001,
    prior=0.5,
    u=0.05,
    correction="fdr_bh",
):
    """Perform discrete functional decoding according to Neurosynth's meta-analytic method.

    This does not employ correlations between unthresholded maps, which are the
    method of choice for decoding within Neurosynth and Neurovault.
    Metadata (i.e., feature labels) for studies within the selected sample
    (`ids`) are compared to the unselected studies remaining in the database
    (`dataset`).

    Neurosynth was described in :footcite:t:`yarkoni2011large`.

    Parameters
    ----------
    coordinates : :class:`pandas.DataFrame`
        DataFrame containing coordinates. Must include a column named 'id' and
        must have a separate row for each reported peak coordinate for each
        study (i.e., there are multiple rows per ID).
        IDs from ``coordinates`` must match those from ``annotations``.
    annotations : :class:`pandas.DataFrame`
        DataFrame containing labels. Must include a column named 'id' and each
        row must correspond to a study. Other columns may correspond to
        individual labels.
        IDs from ``annotations`` must match those from ``coordinates``.
    ids : :obj:`list`
        Subset of studies in coordinates/annotations dataframes indicating
        target for decoding. Examples include studies reporting at least one
        peak in an ROI, or studies selected from a clustering analysis.
    ids2 : :obj:`list` or None, optional
        Second subset of studies, representing "unselected" studies. If None,
        then all studies in coordinates/annotations dataframes **not** in
        ``ids`` will be used.
    features : :obj:`list`, optional
        List of features in dataset annotations to use for decoding.
        Default is None, which uses all features available.
    frequency_threshold : :obj:`float`, optional
        Threshold to apply to dataset annotations. Values greater than or
        equal to the threshold as assigned as label+, while values below
        the threshold are considered label-. Default is 0.001.
    prior : :obj:`float`, optional
        Uniform prior probability of each label being active in a study in
        the absence of evidence (labels or selection) from the study.
        Default is 0.5 (50%).
    u : :obj:`float`, optional
        Alpha level for multiple comparisons correction. Default is 0.05.
    correction : {None, "bh", "by", "bonferroni"}, optional
        Multiple comparisons correction method to apply.
        Default is 'bh' (Benjamini-Hochberg FDR correction).

    Returns
    -------
    out_df : :class:`pandas.DataFrame`
        Table with each label and the following values associated with each
        label: 'pForward', 'zForward', 'probForward', 'pReverse', 'zReverse',
        and 'probReverse'.

    See Also
    --------
    :class:`~nimare.decode.discrete.NeurosynthDecoder`: The associated class for this method.
    :func:`~nimare.decode.continuous.CorrelationDecoder`: The correlation-based decoding
        method employed in Neurosynth and NeuroVault.

    References
    ----------
    .. footbibliography::
    """
    dataset_ids = sorted(list(set(coordinates["id"].values)))
    if ids2 is None:
        unselected = sorted(list(set(dataset_ids) - set(ids)))
    else:
        unselected = ids2[:]

    # Binarize with frequency threshold
    features_df = annotations.set_index("id", drop=True)
    features_df = features_df[features].ge(frequency_threshold)

    sel_array = features_df.loc[ids].values
    unsel_array = features_df.loc[unselected].values

    n_selected = len(ids)
    n_unselected = len(unselected)

    n_selected_term = np.sum(sel_array, axis=0)
    n_unselected_term = np.sum(unsel_array, axis=0)

    n_selected_noterm = n_selected - n_selected_term
    n_unselected_noterm = n_unselected - n_unselected_term

    n_term = n_selected_term + n_unselected_term
    n_noterm = n_selected_noterm + n_unselected_noterm

    p_term = n_term / (n_term + n_noterm)

    p_selected_g_term = n_selected_term / n_term
    p_selected_g_noterm = n_selected_noterm / n_noterm

    # Recompute conditions with empirically derived prior (or inputted one)
    if prior is None:
        # if this is used, p_term_g_selected_prior = p_selected (regardless of term)
        prior = p_term

    # Significance testing
    # One-way chi-square test for consistency of term frequency across terms
    chi2_fi = one_way(n_selected_term, n_term)
    p_fi = special.chdtrc(1, chi2_fi)
    sign_fi = np.sign(
        n_selected_term - np.mean(n_selected_term)
    ).ravel()  # pylint: disable=no-member

    # Two-way chi-square test for specificity of activation
    cells = np.array(
        [
            [n_selected_term, n_selected_noterm],  # pylint: disable=no-member
            [n_unselected_term, n_unselected_noterm],
        ]
    ).T
    chi2_ri = two_way(cells)
    p_ri = special.chdtrc(1, chi2_ri)
    sign_ri = np.sign(p_selected_g_term - p_selected_g_noterm).ravel()  # pylint: disable=no-member

    # Multiple comparisons correction across terms. Separately done for FI and RI.
    if correction in ("bh", "by"):
        p_corr_fi = fdr(p_fi, alpha=u, method=correction)
        p_corr_ri = fdr(p_ri, alpha=u, method=correction)
    elif correction == "bonferroni":
        p_corr_fi = bonferroni(p_fi)
        p_corr_ri = bonferroni(p_ri)
    else:
        p_corr_fi = p_fi
        p_corr_ri = p_ri

    # Compute z-values
    z_corr_fi = p_to_z(p_corr_fi, "two") * sign_fi
    z_corr_ri = p_to_z(p_corr_ri, "two") * sign_ri

    # Effect size
    # est. prob. of brain state described by term finding activation in ROI
    p_selected_g_term_g_prior = prior * p_selected_g_term + (1 - prior) * p_selected_g_noterm

    # est. prob. of activation in ROI reflecting brain state described by term
    p_term_g_selected_g_prior = p_selected_g_term * prior / p_selected_g_term_g_prior

    arr = np.array(
        [
            p_corr_fi,
            z_corr_fi,
            p_selected_g_term_g_prior,  # pylint: disable=no-member
            p_corr_ri,
            z_corr_ri,
            p_term_g_selected_g_prior,
        ]
    ).T

    out_df = pd.DataFrame(
        data=arr,
        index=features,
        columns=["pForward", "zForward", "probForward", "pReverse", "zReverse", "probReverse"],
    )
    out_df.index.name = "Term"
    return out_df


class ROIAssociationDecoder(Decoder):
    """Perform discrete functional decoding according to Neurosynth's ROI association method.

    Neurosynth was described in :footcite:t:`yarkoni2011large`.

    Parameters
    ----------
    masker : :class:`~nilearn.input_data.NiftiMasker`, img_like, or similar
        Masker for region of interest.
    kernel_transformer : :obj:`~nimare.meta.kernel.KernelTransformer`, optional
        Kernel with which to create modeled activation maps. Default is MKDAKernel.
    feature_group : :obj:`str`, optional
        Feature group name used to select labels from a specific source.
        Feature groups are stored as prefixes to feature name columns in
        Dataset.annotations, with the format ``[source]_[valuetype]__``.
        Input may or may not include the trailing underscore.
        Default is None, which uses all feature groups available.
    features : :obj:`list`, optional
        List of features in dataset annotations to use for decoding.
        If feature_group is provided, then features should not include the feature group prefix.
        If feature_group is *not* provided, then features *should* include the prefix.
        Default is None, which uses all features available.

    Notes
    -----
    The general approach in this method is:

    1. Define ROI.
    2. Generate MA maps for all studies in Dataset.
    3. Average MA values within ROI to get study-wise MA regressor.
    4. Correlate MA regressor with study-wise annotation values (e.g., tf-idf values).

    References
    ----------
    .. footbibliography::
    """

    _required_inputs = {
        "coordinates": ("coordinates", None),
        "annotations": ("annotations", None),
    }

    def __init__(
        self,
        masker,
        kernel_transformer=MKDAKernel,
        feature_group=None,
        features=None,
        **kwargs,
    ):
        self.masker = get_masker(masker)

        # Get kernel transformer
        kernel_args = {
            k.split("kernel__")[1]: v for k, v in kwargs.items() if k.startswith("kernel__")
        }
        kernel_transformer = _check_type(kernel_transformer, KernelTransformer, **kernel_args)
        self.kernel_transformer = kernel_transformer

        self.feature_group = feature_group
        self.features = features
        self.frequency_threshold = 0

    def _fit(self, dataset):
        roi_values = self.kernel_transformer.transform(
            self.inputs_["coordinates"],
            self.masker,
            return_type="array",
        )
        self.roi_values_ = roi_values.mean(axis=1)

    def transform(self):
        """Apply the decoding method to a Dataset.

        Returns
        -------
        results : :class:`pandas.DataFrame`
            Table with each label and the following values associated with each
            label: 'r'.
        """
        feature_values = self.inputs_["annotations"][self.features_].values
        corrs = pearson(self.roi_values_, feature_values.T)
        out_df = pd.DataFrame(index=self.features_, columns=["r"], data=corrs)
        out_df.index.name = "feature"
        return out_df