From c1419ecd974a6a447e58002d8932d2527f9e82e0 Mon Sep 17 00:00:00 2001
From: Paul Buerkner <paul.buerkner@gmail.com>
Date: Wed, 22 Jul 2020 18:45:59 +0200
Subject: [PATCH] officially support the 'cox' family #230

---
 NAMESPACE                   |  1 +
 NEWS.md                     |  2 ++
 R/families.R                | 15 +++++++++------
 R/family-lists.R            |  2 +-
 man/brmsfamily.Rd           | 13 ++++++++++---
 vignettes/brms_families.Rmd | 18 +++++++++++++-----
 6 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index 64fefd16b..59c6a1d80 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -323,6 +323,7 @@ export(cor_lagsar)
 export(cor_ma)
 export(cor_sar)
 export(cosy)
+export(cox)
 export(cratio)
 export(cs)
 export(cse)
diff --git a/NEWS.md b/NEWS.md
index c7dd7259f..05ebbaf29 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,6 +2,8 @@
 
 ### New Features
 
+* Support the Cox proportional hazards model for 
+time-to-event data via family `cox`. (#230, #962)
 * Support method `loo_moment_match`, which can be used to
 update a `loo` object when Pareto k estimates are large.
 
diff --git a/R/families.R b/R/families.R
index 055215b17..e3ba7eb7d 100644
--- a/R/families.R
+++ b/R/families.R
@@ -82,9 +82,10 @@
 #'   \code{sratio} ('stopping ratio'), and \code{acat} ('adjacent category') 
 #'   leads to ordinal regression.} 
 #'   
-#'   \item{Families \code{Gamma}, \code{weibull}, \code{exponential}, 
-#'   \code{lognormal}, \code{frechet}, and \code{inverse.gaussian} can be 
-#'   used (among others) for survival regression.}
+#'   \item{Families \code{Gamma}, \code{weibull}, \code{exponential},
+#'   \code{lognormal}, \code{frechet}, \code{inverse.gaussian}, and \code{cox}
+#'   (Cox proportional hazards model) can be used (among others) for
+#'   time-to-event regression also known as survival regression.}
 #'   
 #'   \item{Families \code{weibull}, \code{frechet}, and \code{gen_extreme_value}
 #'   ('generalized extreme value') allow for modeling extremes.}
@@ -152,6 +153,9 @@
 #'   \item{Family \code{von_mises} supports \code{tan_half} and 
 #'   \code{identity}.}
 #'   
+#'   \item{Family \code{cox} supports \code{log}, \code{identity},
+#'   and \code{softplus} for the proportional hazards parameter.}
+#'   
 #'   \item{Family \code{wiener} supports \code{identity}, \code{log}, 
 #'   and \code{softplus} for the main parameter which represents the
 #'   drift rate.}
@@ -636,9 +640,8 @@ zero_inflated_asym_laplace <- function(link = "identity", link_sigma = "log",
               link_zi = link_zi)
 }
 
-# do not export yet!
-# @rdname brmsfamily
-# @export
+#' @rdname brmsfamily
+#' @export
 cox <- function(link = "log", bhaz = NULL) {
   slink <- substitute(link)
   .brmsfamily("cox", link = link, bhaz = bhaz)
diff --git a/R/family-lists.R b/R/family-lists.R
index 47ba17f65..31fcbb708 100644
--- a/R/family-lists.R
+++ b/R/family-lists.R
@@ -296,7 +296,7 @@
 
 .family_cox <- function() {
   list(
-    links = c("log", "identity"),
+    links = c("log", "identity", "softplus"),
     dpars = c("mu"), type = "real",
     ybounds = c(0, Inf), closed = c(TRUE, NA),
     ad = c("weights", "subset", "cens", "trunc"),
diff --git a/man/brmsfamily.Rd b/man/brmsfamily.Rd
index d6c4024cb..7f1d7a561 100644
--- a/man/brmsfamily.Rd
+++ b/man/brmsfamily.Rd
@@ -19,6 +19,7 @@
 \alias{dirichlet}
 \alias{von_mises}
 \alias{asym_laplace}
+\alias{cox}
 \alias{hurdle_poisson}
 \alias{hurdle_negbinomial}
 \alias{hurdle_gamma}
@@ -100,6 +101,8 @@ von_mises(link = "tan_half", link_kappa = "log")
 
 asym_laplace(link = "identity", link_sigma = "log", link_quantile = "logit")
 
+cox(link = "log", bhaz = NULL)
+
 hurdle_poisson(link = "log")
 
 hurdle_negbinomial(link = "log", link_shape = "log", link_hu = "logit")
@@ -240,9 +243,10 @@ Below, we list common use cases for the different families.
   \code{sratio} ('stopping ratio'), and \code{acat} ('adjacent category') 
   leads to ordinal regression.} 
   
-  \item{Families \code{Gamma}, \code{weibull}, \code{exponential}, 
-  \code{lognormal}, \code{frechet}, and \code{inverse.gaussian} can be 
-  used (among others) for survival regression.}
+  \item{Families \code{Gamma}, \code{weibull}, \code{exponential},
+  \code{lognormal}, \code{frechet}, \code{inverse.gaussian}, and \code{cox}
+  (Cox proportional hazards model) can be used (among others) for
+  time-to-event regression also known as survival regression.}
   
   \item{Families \code{weibull}, \code{frechet}, and \code{gen_extreme_value}
   ('generalized extreme value') allow for modeling extremes.}
@@ -310,6 +314,9 @@ Below, we list common use cases for the different families.
   \item{Family \code{von_mises} supports \code{tan_half} and 
   \code{identity}.}
   
+  \item{Family \code{cox} supports \code{log}, \code{identity},
+  and \code{softplus} for the proportional hazards parameter.}
+  
   \item{Family \code{wiener} supports \code{identity}, \code{log}, 
   and \code{softplus} for the main parameter which represents the
   drift rate.}
diff --git a/vignettes/brms_families.Rmd b/vignettes/brms_families.Rmd
index 1e09e5cd7..896bbe378 100644
--- a/vignettes/brms_families.Rmd
+++ b/vignettes/brms_families.Rmd
@@ -94,11 +94,11 @@ $$
 with location parameter $\mu \in [0, 1]$ and positive shape parameter $\alpha$.
 -->
 
-## Survival models
+## Time-to-event models
 
-With survival models we mean all models that are defined on the positive reals
-only, that is $y \in \mathbb{R}^+$. The density of the **lognormal** family is
-given by
+With time-to-event models we mean all models that are defined on the positive
+reals only, that is $y \in \mathbb{R}^+$. The density of the **lognormal**
+family is given by
 $$
 f(y) = \frac{1}{\sqrt{2\pi}\sigma x} \exp\left(-\frac{1}{2}\left(\frac{\log(y) - \mu}{\sigma}\right)^2\right)
 $$ 
@@ -122,7 +122,15 @@ is set to $1$ for either the gamma or Weibull distribution. The density of the
 $$
 f(y) = \left(\frac{\alpha}{2 \pi y^3}\right)^{1/2} \exp \left(\frac{-\alpha (y - \mu)^2}{2 \mu^2 y} \right)
 $$
-where $\alpha$ is a positive shape parameter.
+where $\alpha$ is a positive shape parameter. The **cox** family implements Cox
+proportional hazards model which assumes a hazard function of the form $h(y) =
+h_0(y) \mu$ with baseline hazard $h_0(y)$ expressed via M-splines (which
+integrate to I-splines) in order to ensure monotonicity. The density of the cox
+model is then given by
+$$
+f(y) = h(y) S(y)
+$$
+where $S(y)$ is the survival function implied by $h(y)$.
 
 ## Extreme value models