Skip to content

Commit

Permalink
Merge fa86746 into ee9f5c0
Browse files Browse the repository at this point in the history
  • Loading branch information
kshedden committed Jun 6, 2020
2 parents ee9f5c0 + fa86746 commit 0b36b58
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 38 deletions.
99 changes: 67 additions & 32 deletions statsmodels/duration/hazard_regression.py
Expand Up @@ -51,6 +51,10 @@
If 'lhr', returns log hazard ratios, if 'hr' returns
hazard ratios, if 'surv' returns the survival function, if
'cumhaz' returns the cumulative hazard function.
pred_only : bool
If True, returns only an array of predicted values. Otherwise
returns a bunch containing the predicted values and standard
errors.
Returns
-------
Expand Down Expand Up @@ -120,26 +124,11 @@ def __init__(self, time, status, exog, strata=None, entry=None,
entry = np.zeros(len(time))

# Parameter validity checks.
n1, n2, n3, n4 = len(time), len(status), len(strata),\
len(entry)
nv = [n1, n2, n3, n4]
if max(nv) != min(nv):
raise ValueError("endog, status, strata, and " +
"entry must all have the same length")
if min(time) < 0:
raise ValueError("endog must be non-negative")
if min(entry) < 0:
raise ValueError("entry time must be non-negative")

# In Stata, this is entry >= time, in R it is >.
if np.any(entry > time):
raise ValueError("entry times may not occur " +
"after event or censoring times")
self._check(time, status, strata, entry)

# Get the row indices for the cases in each stratum
stu = np.unique(strata)
#sth = {x: [] for x in stu} # needs >=2.7
sth = dict([(x, []) for x in stu])
sth = {x: [] for x in stu}
for i,k in enumerate(strata):
sth[k].append(i)
stratum_rows = [np.asarray(sth[k], dtype=np.int32) for k in stu]
Expand Down Expand Up @@ -189,20 +178,15 @@ def __init__(self, time, status, exog, strata=None, entry=None,
# Number of informative subjects
self.n_obs = sum([len(ix) for ix in stratum_rows])

# Split everything by stratum
self.time_s = []
self.exog_s = []
self.status_s = []
self.entry_s = []
for ix in stratum_rows:
self.time_s.append(time[ix])
self.exog_s.append(exog[ix,:])
self.status_s.append(status[ix])
self.entry_s.append(entry[ix])

self.stratum_rows = stratum_rows
self.stratum_names = stratum_names

# Split everything by stratum
self.time_s = self._split(time)
self.exog_s = self._split(exog)
self.status_s = self._split(status)
self.entry_s = self._split(entry)

# Precalculate some indices needed to fit Cox models.
# Distinct failure times within a stratum are always taken to
# be sorted in ascending order.
Expand Down Expand Up @@ -260,6 +244,33 @@ def __init__(self, time, status, exog, strata=None, entry=None,
self.risk_exit.append([np.asarray(x, dtype=np.int32)
for x in risk_exit1])

def _split(self, x):
v = []
if x.ndim == 1:
for ix in self.stratum_rows:
v.append(x[ix])
else:
for ix in self.stratum_rows:
v.append(x[ix, :])
return v

def _check(self, time, status, strata, entry):
n1, n2, n3, n4 = len(time), len(status), len(strata),\
len(entry)
nv = [n1, n2, n3, n4]
if max(nv) != min(nv):
raise ValueError("endog, status, strata, and " +
"entry must all have the same length")
if min(time) < 0:
raise ValueError("endog must be non-negative")
if min(entry) < 0:
raise ValueError("entry time must be non-negative")

# In Stata, this is entry >= time, in R it is >.
if np.any(entry > time):
raise ValueError("entry times may not occur " +
"after event or censoring times")


class PHReg(model.LikelihoodModel):
"""
Expand Down Expand Up @@ -1186,7 +1197,10 @@ def baseline_cumulative_hazard_function(self, params):
'params_doc': _predict_params_doc,
'cov_params_doc': _predict_cov_params_docstring})
def predict(self, params, exog=None, cov_params=None, endog=None,
strata=None, offset=None, pred_type="lhr"):
strata=None, offset=None, pred_type="lhr", pred_only=False):

# This function breaks mediation, because it does not simply
# return the predicted values as an array.

pred_type = pred_type.lower()
if pred_type not in ["lhr", "hr", "surv", "cumhaz"]:
Expand Down Expand Up @@ -1222,12 +1236,16 @@ class bunch:
mat = np.dot(exog, cov_params)
va = (mat * exog).sum(1)
ret_val.standard_errors = np.sqrt(va)
if pred_only:
return ret_val.predicted_values
return ret_val

hr = np.exp(lhr)

if pred_type == "hr":
ret_val.predicted_values = hr
if pred_only:
return ret_val.predicted_values
return ret_val

# Makes sure endog is defined
Expand Down Expand Up @@ -1261,9 +1279,12 @@ class bunch:
elif pred_type == "surv":
ret_val.predicted_values = np.exp(-cumhaz)

if pred_only:
return ret_val.predicted_values

return ret_val

def get_distribution(self, params):
def get_distribution(self, params, scale=1.0, exog=None):
"""
Returns a scipy distribution object corresponding to the
distribution of uncensored endog (duration) values for each
Expand All @@ -1273,6 +1294,10 @@ def get_distribution(self, params):
----------
params : array_like
The proportional hazards model parameters.
scale : float
Present for compatibility, not used.
exog : array-like
A design matrix, defaults to model.exog.
Returns
-------
Expand All @@ -1296,9 +1321,14 @@ def get_distribution(self, params):
# stratum
pk, xk = [], []

if exog is None:
exog_split = surv.exog_s
else:
exog_split = self.surv._split(exog)

for stx in range(self.surv.nstrat):

exog_s = surv.exog_s[stx]
exog_s = exog_split[stx]

linpred = np.dot(exog_s, params)
if surv.offset_s is not None:
Expand Down Expand Up @@ -1672,12 +1702,17 @@ def __init__(self, xk, pk):
self.pk = pk
self.cpk = np.cumsum(self.pk, axis=1)

def rvs(self):
def rvs(self, n=None):
"""
Returns a random sample from the discrete distribution.
A vector is returned containing a single draw from each row of
`xk`, using the probabilities of the corresponding row of `pk`
Parameters
----------
n : not used
Present for signature compatibility
"""

n = self.xk.shape[0]
Expand Down
21 changes: 15 additions & 6 deletions statsmodels/stats/mediation.py
Expand Up @@ -56,6 +56,9 @@ class Mediation(object):
Keyword arguments to use when fitting the outcome model.
mediator_fit_kwargs : dict-like
Keyword arguments to use when fitting the mediator model.
outcome_predict_kwargs : dict-like
Keyword arguments to use when calling predict on the outcome
model.
Returns a ``MediationResults`` object.
Expand Down Expand Up @@ -120,7 +123,8 @@ class Mediation(object):
"""

def __init__(self, outcome_model, mediator_model, exposure, mediator=None,
moderators=None, outcome_fit_kwargs=None, mediator_fit_kwargs=None):
moderators=None, outcome_fit_kwargs=None, mediator_fit_kwargs=None,
outcome_predict_kwargs=None):

self.outcome_model = outcome_model
self.mediator_model = mediator_model
Expand All @@ -133,9 +137,11 @@ def __init__(self, outcome_model, mediator_model, exposure, mediator=None,
self.mediator = mediator

self._outcome_fit_kwargs = (outcome_fit_kwargs if outcome_fit_kwargs
is not None else {})
is not None else {})
self._mediator_fit_kwargs = (mediator_fit_kwargs if mediator_fit_kwargs
is not None else {})
is not None else {})
self._outcome_predict_kwargs = (outcome_predict_kwargs if
outcome_predict_kwargs is not None else {})

# We will be changing these so need to copy.
self._outcome_exog = outcome_model.exog.copy()
Expand Down Expand Up @@ -302,14 +308,17 @@ def fit(self, method="parametric", n_rep=1000):
predicted_outcomes = [[None, None], [None, None]]
for tm in 0, 1:
mex = self._get_mediator_exog(tm)
kwargs = {"exog": mex}
if hasattr(mediator_result, "scale"):
kwargs["scale"] = mediator_result.scale
gen = self.mediator_model.get_distribution(mediation_params,
mediator_result.scale,
exog=mex)
**kwargs)
potential_mediator = gen.rvs(mex.shape[0])

for te in 0, 1:
oex = self._get_outcome_exog(te, potential_mediator)
po = self.outcome_model.predict(outcome_params, oex)
po = self.outcome_model.predict(outcome_params, oex,
**self._outcome_predict_kwargs)
predicted_outcomes[tm][te] = po

for t in 0, 1:
Expand Down

0 comments on commit 0b36b58

Please sign in to comment.