Merge fa86746 into ee9f5c0

statsmodels · Jun 6, 2020 · 0b36b58 · 0b36b58
2 parents ee9f5c0 + fa86746
commit 0b36b58
Show file tree

Hide file tree

Showing 2 changed files with 82 additions and 38 deletions.
diff --git a/statsmodels/duration/hazard_regression.py b/statsmodels/duration/hazard_regression.py
@@ -51,6 +51,10 @@
         If 'lhr', returns log hazard ratios, if 'hr' returns
         hazard ratios, if 'surv' returns the survival function, if
         'cumhaz' returns the cumulative hazard function.
+    pred_only : bool
+        If True, returns only an array of predicted values.  Otherwise
+        returns a bunch containing the predicted values and standard
+        errors.
 
     Returns
     -------
@@ -120,26 +124,11 @@ def __init__(self, time, status, exog, strata=None, entry=None,
             entry = np.zeros(len(time))
 
         # Parameter validity checks.
-        n1, n2, n3, n4 = len(time), len(status), len(strata),\
-            len(entry)
-        nv = [n1, n2, n3, n4]
-        if max(nv) != min(nv):
-            raise ValueError("endog, status, strata, and " +
-                             "entry must all have the same length")
-        if min(time) < 0:
-            raise ValueError("endog must be non-negative")
-        if min(entry) < 0:
-            raise ValueError("entry time must be non-negative")
-
-        # In Stata, this is entry >= time, in R it is >.
-        if np.any(entry > time):
-            raise ValueError("entry times may not occur " +
-                             "after event or censoring times")
+        self._check(time, status, strata, entry)
 
         # Get the row indices for the cases in each stratum
         stu = np.unique(strata)
-        #sth = {x: [] for x in stu} # needs >=2.7
-        sth = dict([(x, []) for x in stu])
+        sth = {x: [] for x in stu}
         for i,k in enumerate(strata):
             sth[k].append(i)
         stratum_rows = [np.asarray(sth[k], dtype=np.int32) for k in stu]
@@ -189,20 +178,15 @@ def __init__(self, time, status, exog, strata=None, entry=None,
         # Number of informative subjects
         self.n_obs = sum([len(ix) for ix in stratum_rows])
 
-        # Split everything by stratum
-        self.time_s = []
-        self.exog_s = []
-        self.status_s = []
-        self.entry_s = []
-        for ix in stratum_rows:
-            self.time_s.append(time[ix])
-            self.exog_s.append(exog[ix,:])
-            self.status_s.append(status[ix])
-            self.entry_s.append(entry[ix])
-
         self.stratum_rows = stratum_rows
         self.stratum_names = stratum_names
 
+        # Split everything by stratum
+        self.time_s = self._split(time)
+        self.exog_s = self._split(exog)
+        self.status_s = self._split(status)
+        self.entry_s = self._split(entry)
+
         # Precalculate some indices needed to fit Cox models.
         # Distinct failure times within a stratum are always taken to
         # be sorted in ascending order.
@@ -260,6 +244,33 @@ def __init__(self, time, status, exog, strata=None, entry=None,
             self.risk_exit.append([np.asarray(x, dtype=np.int32)
                                    for x in risk_exit1])
 
+    def _split(self, x):
+        v = []
+        if x.ndim == 1:
+            for ix in self.stratum_rows:
+                v.append(x[ix])
+        else:
+            for ix in self.stratum_rows:
+                v.append(x[ix, :])
+        return v
+
+    def _check(self, time, status, strata, entry):
+        n1, n2, n3, n4 = len(time), len(status), len(strata),\
+            len(entry)
+        nv = [n1, n2, n3, n4]
+        if max(nv) != min(nv):
+            raise ValueError("endog, status, strata, and " +
+                             "entry must all have the same length")
+        if min(time) < 0:
+            raise ValueError("endog must be non-negative")
+        if min(entry) < 0:
+            raise ValueError("entry time must be non-negative")
+
+        # In Stata, this is entry >= time, in R it is >.
+        if np.any(entry > time):
+            raise ValueError("entry times may not occur " +
+                             "after event or censoring times")
+
 
 class PHReg(model.LikelihoodModel):
     """
@@ -1186,7 +1197,10 @@ def baseline_cumulative_hazard_function(self, params):
         'params_doc': _predict_params_doc,
         'cov_params_doc': _predict_cov_params_docstring})
     def predict(self, params, exog=None, cov_params=None, endog=None,
-                strata=None, offset=None, pred_type="lhr"):
+                strata=None, offset=None, pred_type="lhr", pred_only=False):
+
+        # This function breaks mediation, because it does not simply
+        # return the predicted values as an array.
 
         pred_type = pred_type.lower()
         if pred_type not in ["lhr", "hr", "surv", "cumhaz"]:
@@ -1222,12 +1236,16 @@ class bunch:
                 mat = np.dot(exog, cov_params)
                 va = (mat * exog).sum(1)
                 ret_val.standard_errors = np.sqrt(va)
+            if pred_only:
+                return ret_val.predicted_values
             return ret_val
 
         hr = np.exp(lhr)
 
         if pred_type == "hr":
             ret_val.predicted_values = hr
+            if pred_only:
+                return ret_val.predicted_values
             return ret_val
 
         # Makes sure endog is defined
@@ -1261,9 +1279,12 @@ class bunch:
         elif pred_type == "surv":
             ret_val.predicted_values = np.exp(-cumhaz)
 
+        if pred_only:
+            return ret_val.predicted_values
+
         return ret_val
 
-    def get_distribution(self, params):
+    def get_distribution(self, params, scale=1.0, exog=None):
         """
         Returns a scipy distribution object corresponding to the
         distribution of uncensored endog (duration) values for each
@@ -1273,6 +1294,10 @@ def get_distribution(self, params):
         ----------
         params : array_like
             The proportional hazards model parameters.
+        scale : float
+            Present for compatibility, not used.
+        exog : array-like
+            A design matrix, defaults to model.exog.
 
         Returns
         -------
@@ -1296,9 +1321,14 @@ def get_distribution(self, params):
         # stratum
         pk, xk = [], []
 
+        if exog is None:
+            exog_split = surv.exog_s
+        else:
+            exog_split = self.surv._split(exog)
+
         for stx in range(self.surv.nstrat):
 
-            exog_s = surv.exog_s[stx]
+            exog_s = exog_split[stx]
 
             linpred = np.dot(exog_s, params)
             if surv.offset_s is not None:
@@ -1672,12 +1702,17 @@ def __init__(self, xk, pk):
         self.pk = pk
         self.cpk = np.cumsum(self.pk, axis=1)
 
-    def rvs(self):
+    def rvs(self, n=None):
         """
         Returns a random sample from the discrete distribution.
 
         A vector is returned containing a single draw from each row of
         `xk`, using the probabilities of the corresponding row of `pk`
+
+        Parameters
+        ----------
+        n : not used
+            Present for signature compatibility
         """
 
         n = self.xk.shape[0]

diff --git a/statsmodels/stats/mediation.py b/statsmodels/stats/mediation.py
@@ -56,6 +56,9 @@ class Mediation(object):
         Keyword arguments to use when fitting the outcome model.
     mediator_fit_kwargs : dict-like
         Keyword arguments to use when fitting the mediator model.
+    outcome_predict_kwargs : dict-like
+        Keyword arguments to use when calling predict on the outcome
+        model.
 
     Returns a ``MediationResults`` object.
 
@@ -120,7 +123,8 @@ class Mediation(object):
     """
 
     def __init__(self, outcome_model, mediator_model, exposure, mediator=None,
-                 moderators=None, outcome_fit_kwargs=None, mediator_fit_kwargs=None):
+                 moderators=None, outcome_fit_kwargs=None, mediator_fit_kwargs=None,
+                 outcome_predict_kwargs=None):
 
         self.outcome_model = outcome_model
         self.mediator_model = mediator_model
@@ -133,9 +137,11 @@ def __init__(self, outcome_model, mediator_model, exposure, mediator=None,
             self.mediator = mediator
 
         self._outcome_fit_kwargs = (outcome_fit_kwargs if outcome_fit_kwargs
-                                    is not None else {})
+                is not None else {})
         self._mediator_fit_kwargs = (mediator_fit_kwargs if mediator_fit_kwargs
-                                     is not None else {})
+                is not None else {})
+        self._outcome_predict_kwargs = (outcome_predict_kwargs if
+                outcome_predict_kwargs is not None else {})
 
         # We will be changing these so need to copy.
         self._outcome_exog = outcome_model.exog.copy()
@@ -302,14 +308,17 @@ def fit(self, method="parametric", n_rep=1000):
             predicted_outcomes = [[None, None], [None, None]]
             for tm in 0, 1:
                 mex = self._get_mediator_exog(tm)
+                kwargs = {"exog": mex}
+                if hasattr(mediator_result, "scale"):
+                    kwargs["scale"] = mediator_result.scale
                 gen = self.mediator_model.get_distribution(mediation_params,
-                                                           mediator_result.scale,
-                                                           exog=mex)
+                                                           **kwargs)
                 potential_mediator = gen.rvs(mex.shape[0])
 
                 for te in 0, 1:
                     oex = self._get_outcome_exog(te, potential_mediator)
-                    po = self.outcome_model.predict(outcome_params, oex)
+                    po = self.outcome_model.predict(outcome_params, oex,
+                            **self._outcome_predict_kwargs)
                     predicted_outcomes[tm][te] = po
 
             for t in 0, 1: