Skip to content

Commit

Permalink
v0.24.6
Browse files Browse the repository at this point in the history
  • Loading branch information
CamDavidsonPilon committed May 4, 2020
1 parent 290d583 commit 027ffd7
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 40 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,14 @@
## Changelog

#### 0.24.6 - unreleased

##### New features
- At the cost of some performance, convergence is improved in many models.
- New `lifelines.plotting.plot_interval_censored_lifetimes` for plotting interval censored data

##### Bug fixes
- fixed bug where `cdf_plot` and `qq_plot` were not factoring in the weights correctly.

#### 0.24.5 - 2020-05-01

##### New features
Expand Down
43 changes: 25 additions & 18 deletions lifelines/fitters/__init__.py
Expand Up @@ -487,26 +487,31 @@ def _fit_model(self, Ts, E, entry, weights, show_progress=True):
with warnings.catch_warnings():
warnings.simplefilter("ignore")

results = minimize(
value_and_grad(negative_log_likelihood), # pylint: disable=no-value-for-parameter
self._initial_values,
jac=True,
method=self._scipy_fit_method,
args=(Ts, E, entry, weights),
bounds=self._bounds,
options={**{"disp": show_progress}, **self._scipy_fit_options},
)
minimizing_results, minimizing_ll = None, np.inf
for method in [self._scipy_fit_method, "Powell"]:
results = minimize(
value_and_grad(negative_log_likelihood), # pylint: disable=no-value-for-parameter
self._initial_values,
jac=True,
method=method,
args=(Ts, E, entry, weights),
bounds=self._bounds,
options={**{"disp": show_progress}, **self._scipy_fit_options},
)
if results.fun < minimizing_ll:
minimizing_ll = results.fun
minimizing_results = results

# convergence successful.
if results.success:
if minimizing_results and minimizing_results.success:
# pylint: disable=no-value-for-parameter
hessian_ = hessian(negative_log_likelihood)(results.x, Ts, E, entry, weights)
hessian_ = hessian(negative_log_likelihood)(minimizing_results.x, Ts, E, entry, weights)
# see issue https://github.com/CamDavidsonPilon/lifelines/issues/801
hessian_ = (hessian_ + hessian_.T) / 2
return results.x, -results.fun * weights.sum(), hessian_ * weights.sum()
return minimizing_results.x, -minimizing_results.fun * weights.sum(), hessian_ * weights.sum()

# convergence failed.
print(results)
print(minimizing_results)
if self._KNOWN_MODEL:
raise utils.ConvergenceError(
dedent(
Expand Down Expand Up @@ -1795,7 +1800,7 @@ def _fit_model(self, likelihood, Ts, Xs, E, weights, entries, show_progress=Fals
if show_progress:
print(minimum_results)

if minimum_results.success:
if minimum_results is not None and minimum_results.success:
sum_weights = weights.sum()
hessian_ = hessian(self._neg_likelihood_with_penalty_function)(minimum_results.x, Ts, E, weights, entries, Xs)
# See issue https://github.com/CamDavidsonPilon/lifelines/issues/801
Expand All @@ -1807,16 +1812,18 @@ def _fit_model(self, likelihood, Ts, Xs, E, weights, entries, show_progress=Fals
raise utils.ConvergenceError(
dedent(
"""\
Fitting did not converge. Try checking the following:
Fitting did not converge. Try the following:
0. Are there any lifelines warnings outputted during the `fit`?
1. Inspect your DataFrame: does everything look as expected?
2. Try scaling your duration vector down, i.e. `df["{duration_col}"] = df["{duration_col}"]/100`
2. Is there high-collinearity in the dataset? Try using the variance inflation factor (VIF) to find redundant variables.
3. Try using an alternate minimizer: ``fitter._scipy_fit_method = "SLSQP"``.
4. Trying adding a small penalizer (or changing it, if already present). Example: `%s(penalizer=0.01).fit(...)`.
4. Trying adding a small penalizer (or changing it, if already present). Example: `{fitter_name}(penalizer=0.01).fit(...)`.
5. Are there any extreme outliers? Try modeling them or dropping them to see if it helps convergence.
"""
% self._class_name
""".format(
duration_col=self.duration_col, fitter_name=self._class_name
)
)
)

Expand Down
2 changes: 1 addition & 1 deletion lifelines/fitters/weibull_fitter.py
Expand Up @@ -96,7 +96,7 @@ class WeibullFitter(KnownModelParametricUnivariateFitter):
_scipy_fit_options = {"ftol": 1e-14}

def _create_initial_point(self, Ts, E, entry, weights):
return np.array([utils.coalesce(*Ts).std(), 1.0])
return np.array([utils.coalesce(*Ts).mean(), 1.5])

def _cumulative_hazard(self, params, times):
lambda_, rho_ = params
Expand Down
16 changes: 10 additions & 6 deletions lifelines/plotting.py
Expand Up @@ -88,11 +88,11 @@ def cdf_plot(model, timeline=None, ax=None, **plot_kwargs):

if CensoringType.is_left_censoring(model):
empirical_kmf = KaplanMeierFitter().fit_left_censoring(
model.durations, model.event_observed, label=COL_EMP, timeline=timeline
model.durations, model.event_observed, label=COL_EMP, timeline=timeline, weights=model.weights, entry=model.entry
)
elif CensoringType.is_right_censoring(model):
empirical_kmf = KaplanMeierFitter().fit_right_censoring(
model.durations, model.event_observed, label=COL_EMP, timeline=timeline
model.durations, model.event_observed, label=COL_EMP, timeline=timeline, weights=model.weights, entry=model.entry
)
elif CensoringType.is_interval_censoring(model):
raise NotImplementedError("lifelines does not have a non-parametric interval model yet.")
Expand Down Expand Up @@ -198,7 +198,7 @@ def rmst_plot(model, model2=None, t=np.inf, ax=None, text_position=None, **plot_
)

ax.text(
text_position[0], text_position[1], "RMST(%s) -\n RMST(%s)=%.3f" % (model._label, model2._label, rmst - rmst2),
text_position[0], text_position[1], "RMST(%s) -\n RMST(%s)=%.3f" % (model._label, model2._label, rmst - rmst2)
) # dynamically pick this.
else:
rmst = restricted_mean_survival_time(model, t=t)
Expand Down Expand Up @@ -255,14 +255,18 @@ def qq_plot(model, ax=None, **plot_kwargs):
COL_THEO = "fitted %s quantiles" % dist

if CensoringType.is_left_censoring(model):
kmf = KaplanMeierFitter().fit_left_censoring(model.durations, model.event_observed, label=COL_EMP)
kmf = KaplanMeierFitter().fit_left_censoring(
model.durations, model.event_observed, label=COL_EMP, weights=model.weights, entry=model.entry
)
elif CensoringType.is_right_censoring(model):
kmf = KaplanMeierFitter().fit_right_censoring(model.durations, model.event_observed, label=COL_EMP)
kmf = KaplanMeierFitter().fit_right_censoring(
model.durations, model.event_observed, label=COL_EMP, weights=model.weights, entry=model.entry
)
elif CensoringType.is_interval_censoring(model):
raise NotImplementedError("lifelines does not have a non-parametric interval model yet.")

q = np.unique(kmf.cumulative_density_.values[:, 0])
# this is equivalent to the old code `qth_survival_times(q, kmf.cumulative_density, cdf=True)`

quantiles = qth_survival_times(1 - q, kmf.survival_function_)
quantiles[COL_THEO] = dist_object.ppf(q)
quantiles = quantiles.replace([-np.inf, 0, np.inf], np.nan).dropna()
Expand Down
12 changes: 12 additions & 0 deletions lifelines/tests/test_estimation.py
Expand Up @@ -984,6 +984,18 @@ def test_interval_censoring_with_excepted_results(self):
npt.assert_allclose(wf.rho_, 1.272946, rtol=1e-5)
npt.assert_allclose(wf.lambda_, 7646.68135, rtol=1e-5)

def test_interval_censoring_against_reliasoft(self):
x_left = [0.1, 30, 60, 90, 120, 150, 180, 210, 240]
x_right = [30, 60, 90, 120, 150, 180, 210, 240, np.inf]
number = [2, 5, 1, 9, 6, 6, 5, 2, 2645]
censor = [0, 0, 0, 0, 0, 0, 0, 0, 0]

wf = WeibullFitter()
wf.fit_interval_censoring(x_left, x_right, event_observed=censor, weights=number)

npt.assert_allclose(wf.rho_, 1.215, rtol=1e-2)
npt.assert_allclose(wf.lambda_, 8270.88, rtol=1e-2)


class TestGeneralizedGammaFitter:
def test_exponential_data_inference(self):
Expand Down
32 changes: 17 additions & 15 deletions lifelines/tests/test_plotting.py
Expand Up @@ -53,19 +53,15 @@ def setup_method(self, method):

self.plt = plt

def test_parametric_univariate_fitters_has_hazard_plotting_methods(
self, block, known_parametric_univariate_fitters
):
def test_parametric_univariate_fitters_has_hazard_plotting_methods(self, block, known_parametric_univariate_fitters):
positive_sample_lifetimes = np.arange(1, 100)
for fitter in known_parametric_univariate_fitters:
f = fitter().fit(positive_sample_lifetimes)
assert f.plot_hazard() is not None
self.plt.title("test_parametric_univariate_fitters_has_hazard_plotting_methods")
self.plt.show(block=block)

def test_parametric_univaraite_fitters_has_cumhazard_plotting_methods(
self, block, known_parametric_univariate_fitters
):
def test_parametric_univaraite_fitters_has_cumhazard_plotting_methods(self, block, known_parametric_univariate_fitters):
positive_sample_lifetimes = np.arange(1, 100)
for fitter in known_parametric_univariate_fitters:
f = fitter().fit(positive_sample_lifetimes)
Expand All @@ -74,9 +70,7 @@ def test_parametric_univaraite_fitters_has_cumhazard_plotting_methods(
self.plt.title("test_parametric_univaraite_fitters_has_cumhazard_plotting_methods")
self.plt.show(block=block)

def test_parametric_univariate_fitters_has_survival_plotting_methods(
self, block, known_parametric_univariate_fitters
):
def test_parametric_univariate_fitters_has_survival_plotting_methods(self, block, known_parametric_univariate_fitters):
positive_sample_lifetimes = np.arange(1, 100)
for fitter in known_parametric_univariate_fitters:
f = fitter().fit(positive_sample_lifetimes)
Expand Down Expand Up @@ -538,9 +532,7 @@ def test_aalen_additive_fit_no_censor(self, block):
timeline = np.linspace(0, 70, 10000)
hz, coef, X = generate_hazard_rates(n, d, timeline)
X.columns = coef.columns
cumulative_hazards = pd.DataFrame(
cumulative_integral(coef.values, timeline), index=timeline, columns=coef.columns
)
cumulative_hazards = pd.DataFrame(cumulative_integral(coef.values, timeline), index=timeline, columns=coef.columns)
T = generate_random_lifetimes(hz, timeline)
X["T"] = T
X["E"] = np.random.binomial(1, 1, n)
Expand All @@ -563,9 +555,7 @@ def test_aalen_additive_fit_with_censor(self, block):
timeline = np.linspace(0, 70, 10000)
hz, coef, X = generate_hazard_rates(n, d, timeline)
X.columns = coef.columns
cumulative_hazards = pd.DataFrame(
cumulative_integral(coef.values, timeline), index=timeline, columns=coef.columns
)
cumulative_hazards = pd.DataFrame(cumulative_integral(coef.values, timeline), index=timeline, columns=coef.columns)
T = generate_random_lifetimes(hz, timeline)
T[np.isinf(T)] = 10
X["T"] = T
Expand Down Expand Up @@ -686,6 +676,18 @@ def test_qq_plot_left_censoring_with_known_distribution(self, block):
self.plt.suptitle("test_qq_plot_left_censoring_with_known_distribution")
self.plt.show(block=block)

def test_qq_plot_with_weights_and_entry(self, block):
from lifelines.utils import survival_events_from_table

df = pd.DataFrame(index=[60, 171, 263, 427, 505, 639])
df["death"] = [1, 1, 1, 0, 1, 0]
df["censored"] = [0, 0, 0, 3, 0, 330]
T, E, W = survival_events_from_table(df, observed_deaths_col="death", censored_col="censored")
wf = WeibullFitter().fit(T, E, weights=W, entry=0.0001 * np.ones_like(T))
ax = qq_plot(wf)
self.plt.suptitle("test_qq_plot_with_weights_and_entry")
self.plt.show(block=block)

def test_qq_plot_right_censoring_with_known_distribution(self, block):
N = 3000
T_actual = scipy.stats.fisk(8, 0, 1).rvs(N)
Expand Down

0 comments on commit 027ffd7

Please sign in to comment.