Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 49 additions & 24 deletions pymare/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import pandas as pd

from .estimators import (WeightedLeastSquares, DerSimonianLaird,
LikelihoodBased, StanMetaRegression)
VarianceBasedLikelihoodEstimator,
SampleSizeBasedLikelihoodEstimator,
StanMetaRegression)
from .stats import ensure_2d


Expand All @@ -15,32 +17,38 @@ class Dataset:

Args:
estimates (array-like): 1d array of study-level estimates with length K
variances (array-like): 1d array of study-level variances with length K
variances (array-like, optional): 1d array of study-level variances
with length K
predictors (array-like, optional): 1d or 2d array containing
study-level predictors (or covariates); has dimensions K x P
sample_sizes (array-like, optional): 1d array of study-level sample
sizes (length K)
names ([str], optional): List of length P containing the names of the
predictors
add_intercept (bool, optional): If True, an intercept column is
automatically added to the predictor matrix. If False, the
predictors matrix is passed as-is to estimators.
kwargs (dict, optional): Keyword arguments to pass onto estimators
"""
def __init__(self, estimates, variances, predictors=None, names=None,
add_intercept=True, **kwargs):
def __init__(self, estimates, variances=None, predictors=None,
sample_sizes=None, names=None, add_intercept=True, **kwargs):
self.estimates = ensure_2d(estimates)
self.variances = ensure_2d(variances)
self.sample_sizes = ensure_2d(sample_sizes)
self.kwargs = kwargs
X, n = self._setup_predictors(predictors, names, add_intercept)
X, n = self._get_predictors(predictors, names, add_intercept)
self.predictors = X
self.names = n

def __getattr__(self, key):
# Provide convenient access to stored kwargs.
if key in self.kwargs:
return self.kwargs[key]
raise AttributeError
raise AttributeError("{} object has no attribute {}".format(
self.__class__.__name__, key))

def _setup_predictors(self, X, names, add_intercept):

def _get_predictors(self, X, names, add_intercept):
if X is None and not add_intercept:
raise ValueError("No fixed predictors found. If no X matrix is "
"provided, add_intercept must be True!")
Expand All @@ -67,17 +75,26 @@ def X(self):
"""Alias for the `predictors` attribute."""
return self.predictors

@property
def n(self):
"""Alias for the `sample_sizes` attribute."""
return self.sample_sizes


def meta_regression(estimates, variances, predictors=None, names=None,
add_intercept=True, method='ML', ci_method='QP',
alpha=0.05, **kwargs):
def meta_regression(estimates, variances=None, predictors=None,
sample_sizes=None, names=None, add_intercept=True,
method='ML', ci_method='QP', alpha=0.05, **kwargs):
"""Fits the standard meta-regression/meta-analysis model to provided data.

Args:
estimates ([float]): 1d array of study-level estimates with length K
variances ([float]): 1d array of study-level variances with length K
predictors ([float], optional): 1d or 2d array containing study-level
predictors (or covariates); has dimensions K x P
estimates (array-like): 1d array of study-level estimates with length K
variances (array-like, optional): 1d array of study-level variances
with length K
predictors (array-like, optional): 1d or 2d array containing
study-level predictors (or covariates); has dimensions K x P. If
omitted, add_intercept must be True.
sample_sizes (array-like, optional): 1d array of study-level sample
sizes (length K)
names ([str], optional): List of length P containing the names of the
predictors
add_intercept (bool, optional): If True, an intercept column is
Expand All @@ -102,21 +119,29 @@ def meta_regression(estimates, variances, predictors=None, names=None,
depending on the specified method ('Stan' will return the latter; all
other methods return the former).
"""
dataset = Dataset(estimates, variances, predictors, names, add_intercept)
dataset = Dataset(estimates, variances, predictors, sample_sizes, names,
add_intercept)

method = method.lower()

estimator_cls = {
'ml': partial(LikelihoodBased, method=method),
'reml': partial(LikelihoodBased, method=method),
'dl': DerSimonianLaird,
'wls': WeightedLeastSquares,
'fe': WeightedLeastSquares,
'stan': StanMetaRegression,
}[method]
if method in ['ml', 'reml']:
if variances is not None:
est_cls = partial(VarianceBasedLikelihoodEstimator, method=method)
elif sample_sizes is not None:
est_cls = partial(SampleSizeBasedLikelihoodEstimator, method=method)
else:
raise ValueError("If method is ML or REML, one of `variances` or "
"`sample sizes` must be passed!")
else:
est_cls = {
'dl': DerSimonianLaird,
'wls': WeightedLeastSquares,
'fe': WeightedLeastSquares,
'stan': StanMetaRegression,
}[method]

# Get estimates
est = estimator_cls(**kwargs)
est = est_cls(**kwargs)
results = est.fit(dataset)
if hasattr(results, 'compute_stats'):
results.compute_stats(ci_method=ci_method, alpha=alpha)
Expand Down
7 changes: 5 additions & 2 deletions pymare/estimators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from .estimators import (WeightedLeastSquares, DerSimonianLaird,
LikelihoodBased, StanMetaRegression)
VarianceBasedLikelihoodEstimator,
SampleSizeBasedLikelihoodEstimator,
StanMetaRegression)

__all__ = [
'WeightedLeastSquares',
'DerSimonianLaird',
'LikelihoodBased',
'VarianceBasedLikelihoodEstimator',
'SampleSizeBasedLikelihoodEstimator',
'StanMetaRegression'
]
142 changes: 113 additions & 29 deletions pymare/estimators/estimators.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Meta-regression estimator classes."""

from abc import ABCMeta, abstractmethod
from inspect import getfullargspec

import numpy as np
from scipy.optimize import minimize
Expand All @@ -14,14 +15,43 @@ class BaseEstimator(metaclass=ABCMeta):
_result_cls = MetaRegressionResults

@abstractmethod
def _fit(self, y, v, X):
# Subclasses should retain this minimal signature in all cases; this
# ensures that the fit() wrapper in the base class can be ignored by
# users who want to avoid input/output overhead and call _fit directly.
def _fit(self):
# Subclasses must implement _fit() method that directly takes arrays.
# The following named arguments are allowed, and will be automatically
# extracted from the Dataset instance:
# * y (estimates)
# * v (variances)
# * n (sample_sizes)
# * X (predictors)
pass

def accepts_dataset(self, dataset):
""" Returns whether current class can fit the passed Dataset.

Args:
dataset (Dataset): A Dataset instance

Returns:
A boolean.
"""
args = getfullargspec(self._fit)[0][1:]
for name in args:
if getattr(dataset, name) is None:
return False
return True

def fit(self, dataset):
results = self._fit(dataset.y, dataset.v, dataset.X)
kwargs = {}
spec = getfullargspec(self._fit)
n_kw = len(spec.defaults) if spec.defaults else 0
n_args = len(spec.args) - n_kw - 1
for i, name in enumerate(spec.args[1:]):
if i >= n_args:
kwargs[name] = getattr(dataset, name, spec.defaults[i-n_args])
else:
kwargs[name] = getattr(dataset, name)

results = self._fit(**kwargs)
return self._result_cls(results, dataset, self)


Expand Down Expand Up @@ -73,20 +103,16 @@ def _fit(self, y, v, X):
return {'beta': beta_dl, 'tau2': tau_dl}


class LikelihoodBased(BaseEstimator):
""" Likelihood-based meta-regression estimator.
class VarianceBasedLikelihoodEstimator(BaseEstimator):
""" Likelihood-based estimator for estimates with known variances.

Iteratively estimates the between-subject variance tau^2 and fixed effects
Iteratively estimates the between-subject variance tau^2 and fixed effect
betas using the specified likelihood-based estimator (ML or REML).

Args:
method (str, optional): The estimation method to use. Either 'ML' (for
maximum-likelihood) or 'REML' (restricted maximum-likelihood).
Defaults to 'ML'.
beta (array, optional): Initial beta values to use in optimization. If
None (default), the DerSimonian-Laird estimate is used.
tau2 (float, optional): Initial tau^2 value to use in optimization. If
None (default), the DerSimonian-Laird estimate is used.
kwargs (dict, optional): Keyword arguments to pass to the SciPy
minimizer.

Expand All @@ -96,26 +122,22 @@ class LikelihoodBased(BaseEstimator):
passed in as keyword arguments.
"""

def __init__(self, method='ml', beta=None, tau2=None, **kwargs):
self.method = method
self.beta = beta
self.tau2 = tau2
def __init__(self, method='ml', **kwargs):
nll_func = getattr(self, '_{}_nll'.format(method.lower()))
if nll_func is None:
raise ValueError("No log-likelihood function defined for method "
"'{}'.".format(method))
self._nll_func = nll_func
self.kwargs = kwargs

def _fit(self, y, v, X):
# use D-L estimate for initial values if none provided
if self.tau2 is None or self.beta is None:
est_DL = DerSimonianLaird()._fit(y, v, X)
beta = est_DL['beta'] if self.beta is None else self.beta
tau2 = est_DL['tau2'] if self.tau2 is None else self.tau2

ll_func = getattr(self, '_{}_nll'.format(self.method.lower()))
if ll_func is None:
raise ValueError("No log-likelihood function defined for method "
"'{}'.".format(self.method))
# use D-L estimate for initial values
est_DL = DerSimonianLaird()._fit(y, v, X)
beta = est_DL['beta']
tau2 = est_DL['tau2']

theta_init = np.r_[beta.ravel(), tau2]
res = minimize(ll_func, theta_init, (y, v, X), **self.kwargs).x
res = minimize(self._nll_func, theta_init, (y, v, X), **self.kwargs).x
beta, tau = res[:-1], float(res[-1])
tau = np.max([tau, 0])
return {'beta': beta, 'tau2': tau}
Expand All @@ -127,8 +149,7 @@ def _ml_nll(self, theta, y, v, X):
tau2 = 0
w = 1. / (v + tau2)
R = y - X.dot(beta)
ll = 0.5 * (np.log(w).sum() - (R * w * R).sum())
return -ll
return -0.5 * (np.log(w).sum() - (R * w * R).sum())

def _reml_nll(self, theta, y, v, X):
""" REML negative log-likelihood for meta-regression model. """
Expand All @@ -139,6 +160,69 @@ def _reml_nll(self, theta, y, v, X):
return ll_ + 0.5 * np.log(np.linalg.det(F))


class SampleSizeBasedLikelihoodEstimator(BaseEstimator):
""" Likelihood-based estimator for estimates with known sample sizes but
unknown variances.

Iteratively estimates the between-subject variance tau^2 and fixed effect
betas using the specified likelihood-based estimator (ML or REML).

Args:
method (str, optional): The estimation method to use. Either 'ML' (for
maximum-likelihood) or 'REML' (restricted maximum-likelihood).
Defaults to 'ML'.
beta (array, optional): Initial beta values to use in optimization. If
None (default), uses the weighted least squares estimate.
tau2 (float, optional): Initial tau^2 value to use in optimization.
Defaults to 0.
kwargs (dict, optional): Keyword arguments to pass to the SciPy
minimizer.

Notes:
The ML and REML solutions are obtained via SciPy's scalar function
minimizer (scipy.optimize.minimize). Parameters to minimize() can be
passed in as keyword arguments.
"""

def __init__(self, method='ml', **kwargs):
nll_func = getattr(self, '_{}_nll'.format(method.lower()))
if nll_func is None:
raise ValueError("No log-likelihood function defined for method "
"'{}'.".format(method))
self._nll_func = nll_func
self.kwargs = kwargs

def _fit(self, y, n, X):
# set tau^2 to 0 and compute starting values
tau2 = 0.
beta = WeightedLeastSquares(tau2=tau2)._fit(y, n, X)['beta']
sigma = ((y - X.dot(beta))**2 * n).mean()
theta_init = np.r_[beta.ravel(), sigma, tau2]
res = minimize(self._nll_func, theta_init, (y, n, X), **self.kwargs).x
beta, sigma, tau = res[:-2], float(res[-2]), float(res[-1])
tau = np.max([tau, 0])
return {'beta': beta, 'sigma2': sigma, 'tau2': tau}

def _ml_nll(self, theta, y, n, X):
""" ML negative log-likelihood for meta-regression model. """
beta, sigma2, tau2 = theta[:-2, None], theta[-2], theta[-1]
if tau2 < 0:
tau2 = 0
if sigma2 < 0:
sigma2 = 0
w = 1 / (tau2 + sigma2 / n)
R = y - X.dot(beta)
return -0.5 * (np.log(w).sum() - (R * w * R).sum())

def _reml_nll(self, theta, y, n, X):
""" REML negative log-likelihood for meta-regression model. """
ll_ = self._ml_nll(theta, y, n, X)
sigma2, tau2 = theta[-2:]
w = 1 / (tau2 + sigma2 / n)
F = (X * w).T.dot(X)
return ll_ + 0.5 * np.log(np.linalg.det(F))


class StanMetaRegression(BaseEstimator):
"""Bayesian meta-regression estimator using Stan.

Expand Down
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@ def dataset(variables):
return Dataset(*variables, names=['my_covariate'])


@pytest.fixture(scope='package')
def dataset_n():
y = np.array([[-3., -0.5, 0., -5.01, 0.35, -2., -6., -4., -4.3, -0.1, -1.]]).T
n = np.array([[16, 16, 20.548, 32.597, 14., 11.118, 4.444, 12.414, 26.963,
130.556, 126.76]]).T / 2
return Dataset(y, sample_sizes=n)


@pytest.fixture(scope='package')
def vars_with_intercept():
y = np.array([[-1, 0.5, 0.5, 0.5, 1, 1, 2, 10]]).T
Expand Down
Loading