neurostuff · tyarkoni · Dec 29, 2019 · Dec 27, 2019 · Dec 28, 2019 · Dec 28, 2019
diff --git a/pymare/core.py b/pymare/core.py
@@ -6,7 +6,9 @@
 import pandas as pd
 
 from .estimators import (WeightedLeastSquares, DerSimonianLaird,
-                         LikelihoodBased, StanMetaRegression)
+                         VarianceBasedLikelihoodEstimator,
+                         SampleSizeBasedLikelihoodEstimator,
+                         StanMetaRegression)
 from .stats import ensure_2d
 
 
@@ -15,32 +17,38 @@ class Dataset:
 
     Args:
         estimates (array-like): 1d array of study-level estimates with length K
-        variances (array-like): 1d array of study-level variances with length K
+        variances (array-like, optional): 1d array of study-level variances
+            with length K
         predictors (array-like, optional): 1d or 2d array containing
             study-level predictors (or covariates); has dimensions K x P
+        sample_sizes (array-like, optional): 1d array of study-level sample
+            sizes (length K)
         names ([str], optional): List of length P containing the names of the
             predictors
         add_intercept (bool, optional): If True, an intercept column is
             automatically added to the predictor matrix. If False, the
             predictors matrix is passed as-is to estimators.
         kwargs (dict, optional): Keyword arguments to pass onto estimators
     """
-    def __init__(self, estimates, variances, predictors=None, names=None,
-                 add_intercept=True, **kwargs):
+    def __init__(self, estimates, variances=None, predictors=None,
+                 sample_sizes=None, names=None, add_intercept=True, **kwargs):
         self.estimates = ensure_2d(estimates)
         self.variances = ensure_2d(variances)
+        self.sample_sizes = ensure_2d(sample_sizes)
         self.kwargs = kwargs
-        X, n = self._setup_predictors(predictors, names, add_intercept)
+        X, n = self._get_predictors(predictors, names, add_intercept)
         self.predictors = X
         self.names = n
 
     def __getattr__(self, key):
         # Provide convenient access to stored kwargs.
         if key in self.kwargs:
             return self.kwargs[key]
-        raise AttributeError
+        raise AttributeError("{} object has no attribute {}".format(
+                                self.__class__.__name__, key))
 
-    def _setup_predictors(self, X, names, add_intercept):
+
+    def _get_predictors(self, X, names, add_intercept):
         if X is None and not add_intercept:
             raise ValueError("No fixed predictors found. If no X matrix is "
                              "provided, add_intercept must be True!")
@@ -67,17 +75,26 @@ def X(self):
         """Alias for the `predictors` attribute."""
         return self.predictors
 
+    @property
+    def n(self):
+        """Alias for the `sample_sizes` attribute."""
+        return self.sample_sizes
+
 
-def meta_regression(estimates, variances, predictors=None, names=None,
-                    add_intercept=True, method='ML', ci_method='QP',
-                    alpha=0.05, **kwargs):
+def meta_regression(estimates, variances=None, predictors=None,
+                    sample_sizes=None, names=None, add_intercept=True,
+                    method='ML', ci_method='QP', alpha=0.05, **kwargs):
     """Fits the standard meta-regression/meta-analysis model to provided data.
 
     Args:
-        estimates ([float]): 1d array of study-level estimates with length K
-        variances ([float]): 1d array of study-level variances with length K
-        predictors ([float], optional): 1d or 2d array containing study-level
-            predictors (or covariates); has dimensions K x P
+        estimates (array-like): 1d array of study-level estimates with length K
+        variances (array-like, optional): 1d array of study-level variances
+            with length K
+        predictors (array-like, optional): 1d or 2d array containing
+            study-level predictors (or covariates); has dimensions K x P. If
+            omitted, add_intercept must be True.
+        sample_sizes (array-like, optional): 1d array of study-level sample
+            sizes (length K)
         names ([str], optional): List of length P containing the names of the
             predictors
         add_intercept (bool, optional): If True, an intercept column is
@@ -102,21 +119,29 @@ def meta_regression(estimates, variances, predictors=None, names=None,
         depending on the specified method ('Stan' will return the latter; all
         other methods return the former).
     """
-    dataset = Dataset(estimates, variances, predictors, names, add_intercept)
+    dataset = Dataset(estimates, variances, predictors, sample_sizes, names,
+                      add_intercept)
 
     method = method.lower()
 
-    estimator_cls = {
-        'ml': partial(LikelihoodBased, method=method),
-        'reml': partial(LikelihoodBased, method=method),
-        'dl': DerSimonianLaird,
-        'wls': WeightedLeastSquares,
-        'fe': WeightedLeastSquares,
-        'stan': StanMetaRegression,
-    }[method]
+    if method in ['ml', 'reml']:
+        if variances is not None:
+            est_cls = partial(VarianceBasedLikelihoodEstimator, method=method)
+        elif sample_sizes is not None:
+            est_cls = partial(SampleSizeBasedLikelihoodEstimator, method=method)
+        else:
+            raise ValueError("If method is ML or REML, one of `variances` or "
+                             "`sample sizes` must be passed!")
+    else:
+        est_cls = {
+            'dl': DerSimonianLaird,
+            'wls': WeightedLeastSquares,
+            'fe': WeightedLeastSquares,
+            'stan': StanMetaRegression,
+        }[method]
 
     # Get estimates
-    est = estimator_cls(**kwargs)
+    est = est_cls(**kwargs)
     results = est.fit(dataset)
     if hasattr(results, 'compute_stats'):
         results.compute_stats(ci_method=ci_method, alpha=alpha)

diff --git a/pymare/estimators/__init__.py b/pymare/estimators/__init__.py
@@ -1,9 +1,12 @@
 from .estimators import (WeightedLeastSquares, DerSimonianLaird,
-                         LikelihoodBased, StanMetaRegression)
+                         VarianceBasedLikelihoodEstimator,
+                         SampleSizeBasedLikelihoodEstimator,
+                         StanMetaRegression)
 
 __all__ = [
     'WeightedLeastSquares',
     'DerSimonianLaird',
-    'LikelihoodBased',
+    'VarianceBasedLikelihoodEstimator',
+    'SampleSizeBasedLikelihoodEstimator',
     'StanMetaRegression'
 ]
diff --git a/pymare/estimators/estimators.py b/pymare/estimators/estimators.py
@@ -1,6 +1,7 @@
 """Meta-regression estimator classes."""
 
 from abc import ABCMeta, abstractmethod
+from inspect import getfullargspec
 
 import numpy as np
 from scipy.optimize import minimize
@@ -14,14 +15,43 @@ class BaseEstimator(metaclass=ABCMeta):
     _result_cls = MetaRegressionResults
 
     @abstractmethod
-    def _fit(self, y, v, X):
-        # Subclasses should retain this minimal signature in all cases; this
-        # ensures that the fit() wrapper in the base class can be ignored by
-        # users who want to avoid input/output overhead and call _fit directly.
+    def _fit(self):
+        # Subclasses must implement _fit() method that directly takes arrays.
+        # The following named arguments are allowed, and will be automatically
+        # extracted from the Dataset instance:
+        # * y (estimates)
+        # * v (variances)
+        # * n (sample_sizes)
+        # * X (predictors)
         pass
 
+    def accepts_dataset(self, dataset):
+        """ Returns whether current class can fit the passed Dataset.
+
+        Args:
+            dataset (Dataset): A Dataset instance
+
+        Returns:
+            A boolean.
+        """
+        args = getfullargspec(self._fit)[0][1:]
+        for name in args:
+            if getattr(dataset, name) is None:
+                return False
+        return True
+
     def fit(self, dataset):
-        results = self._fit(dataset.y, dataset.v, dataset.X)
+        kwargs = {}
+        spec = getfullargspec(self._fit)
+        n_kw = len(spec.defaults) if spec.defaults else 0
+        n_args = len(spec.args) - n_kw - 1
+        for i, name in enumerate(spec.args[1:]):
+            if i >= n_args:
+                kwargs[name] = getattr(dataset, name, spec.defaults[i-n_args])
+            else:
+                kwargs[name] = getattr(dataset, name)
+
+        results = self._fit(**kwargs)
         return self._result_cls(results, dataset, self)
 
 
@@ -73,20 +103,16 @@ def _fit(self, y, v, X):
         return {'beta': beta_dl, 'tau2': tau_dl}
 
 
-class LikelihoodBased(BaseEstimator):
-    """ Likelihood-based meta-regression estimator.
+class VarianceBasedLikelihoodEstimator(BaseEstimator):
+    """ Likelihood-based estimator for estimates with known variances.
 
-    Iteratively estimates the between-subject variance tau^2 and fixed effects
+    Iteratively estimates the between-subject variance tau^2 and fixed effect
     betas using the specified likelihood-based estimator (ML or REML).
 
     Args:
         method (str, optional): The estimation method to use. Either 'ML' (for
             maximum-likelihood) or 'REML' (restricted maximum-likelihood).
             Defaults to 'ML'.
-        beta (array, optional): Initial beta values to use in optimization. If
-            None (default), the DerSimonian-Laird estimate is used.
-        tau2 (float, optional): Initial tau^2 value to use in optimization. If
-            None (default), the DerSimonian-Laird estimate is used.
         kwargs (dict, optional): Keyword arguments to pass to the SciPy
             minimizer.
 
@@ -96,26 +122,22 @@ class LikelihoodBased(BaseEstimator):
         passed in as keyword arguments.
     """
 
-    def __init__(self, method='ml', beta=None, tau2=None, **kwargs):
-        self.method = method
-        self.beta = beta
-        self.tau2 = tau2
+    def __init__(self, method='ml', **kwargs):
+        nll_func = getattr(self, '_{}_nll'.format(method.lower()))
+        if nll_func is None:
+            raise ValueError("No log-likelihood function defined for method "
+                             "'{}'.".format(method))
+        self._nll_func = nll_func
         self.kwargs = kwargs
 
     def _fit(self, y, v, X):
-        # use D-L estimate for initial values if none provided
-        if self.tau2 is None or self.beta is None:
-            est_DL = DerSimonianLaird()._fit(y, v, X)
-            beta = est_DL['beta'] if self.beta is None else self.beta
-            tau2 = est_DL['tau2'] if self.tau2 is None else self.tau2
-
-        ll_func = getattr(self, '_{}_nll'.format(self.method.lower()))
-        if ll_func is None:
-            raise ValueError("No log-likelihood function defined for method "
-                             "'{}'.".format(self.method))
+        # use D-L estimate for initial values
+        est_DL = DerSimonianLaird()._fit(y, v, X)
+        beta = est_DL['beta']
+        tau2 = est_DL['tau2']
 
         theta_init = np.r_[beta.ravel(), tau2]
-        res = minimize(ll_func, theta_init, (y, v, X), **self.kwargs).x
+        res = minimize(self._nll_func, theta_init, (y, v, X), **self.kwargs).x
         beta, tau = res[:-1], float(res[-1])
         tau = np.max([tau, 0])
         return {'beta': beta, 'tau2': tau}
@@ -127,8 +149,7 @@ def _ml_nll(self, theta, y, v, X):
             tau2 = 0
         w = 1. / (v + tau2)
         R = y - X.dot(beta)
-        ll = 0.5 * (np.log(w).sum() - (R * w * R).sum())
-        return -ll
+        return -0.5 * (np.log(w).sum() - (R * w * R).sum())
 
     def _reml_nll(self, theta, y, v, X):
         """ REML negative log-likelihood for meta-regression model. """
@@ -139,6 +160,69 @@ def _reml_nll(self, theta, y, v, X):
         return ll_ + 0.5 * np.log(np.linalg.det(F))
 
 
+class SampleSizeBasedLikelihoodEstimator(BaseEstimator):
+    """ Likelihood-based estimator for estimates with known sample sizes but
+    unknown variances.
+
+    Iteratively estimates the between-subject variance tau^2 and fixed effect
+    betas using the specified likelihood-based estimator (ML or REML).
+
+    Args:
+        method (str, optional): The estimation method to use. Either 'ML' (for
+            maximum-likelihood) or 'REML' (restricted maximum-likelihood).
+            Defaults to 'ML'.
+        beta (array, optional): Initial beta values to use in optimization. If
+            None (default), uses the weighted least squares estimate.
+        tau2 (float, optional): Initial tau^2 value to use in optimization.
+            Defaults to 0.
+        kwargs (dict, optional): Keyword arguments to pass to the SciPy
+            minimizer.
+
+    Notes:
+        The ML and REML solutions are obtained via SciPy's scalar function
+        minimizer (scipy.optimize.minimize). Parameters to minimize() can be
+        passed in as keyword arguments.
+    """
+
+    def __init__(self, method='ml', **kwargs):
+        nll_func = getattr(self, '_{}_nll'.format(method.lower()))
+        if nll_func is None:
+            raise ValueError("No log-likelihood function defined for method "
+                             "'{}'.".format(method))
+        self._nll_func = nll_func
+        self.kwargs = kwargs
+
+    def _fit(self, y, n, X):
+        # set tau^2 to 0 and compute starting values
+        tau2 = 0.
+        beta = WeightedLeastSquares(tau2=tau2)._fit(y, n, X)['beta']
+        sigma = ((y - X.dot(beta))**2 * n).mean()
+        theta_init = np.r_[beta.ravel(), sigma, tau2]
+        res = minimize(self._nll_func, theta_init, (y, n, X), **self.kwargs).x
+        beta, sigma, tau = res[:-2], float(res[-2]), float(res[-1])
+        tau = np.max([tau, 0])
+        return {'beta': beta, 'sigma2': sigma, 'tau2': tau}
+
+    def _ml_nll(self, theta, y, n, X):
+        """ ML negative log-likelihood for meta-regression model. """
+        beta, sigma2, tau2 = theta[:-2, None], theta[-2], theta[-1]
+        if tau2 < 0:
+            tau2 = 0
+        if sigma2 < 0:
+            sigma2 = 0
+        w = 1 / (tau2 + sigma2 / n)
+        R = y - X.dot(beta)
+        return -0.5 * (np.log(w).sum() - (R * w * R).sum())
+
+    def _reml_nll(self, theta, y, n, X):
+        """ REML negative log-likelihood for meta-regression model. """
+        ll_ = self._ml_nll(theta, y, n, X)
+        sigma2, tau2 = theta[-2:]
+        w = 1 / (tau2 + sigma2 / n)
+        F = (X * w).T.dot(X)
+        return ll_ + 0.5 * np.log(np.linalg.det(F))
+
+
 class StanMetaRegression(BaseEstimator):
     """Bayesian meta-regression estimator using Stan.
 

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -17,6 +17,14 @@ def dataset(variables):
     return Dataset(*variables, names=['my_covariate'])
 
 
+@pytest.fixture(scope='package')
+def dataset_n():
+    y = np.array([[-3., -0.5, 0., -5.01, 0.35, -2., -6., -4., -4.3, -0.1, -1.]]).T
+    n = np.array([[16, 16, 20.548, 32.597, 14., 11.118, 4.444, 12.414, 26.963,
+                   130.556, 126.76]]).T / 2
+    return Dataset(y, sample_sizes=n)
+
+
 @pytest.fixture(scope='package')
 def vars_with_intercept():
     y = np.array([[-1, 0.5, 0.5, 0.5, 1, 1, 2, 10]]).T