Skip to content

Commit

Permalink
Add the nocenter option to coxph. Pass R CMD check
Browse files Browse the repository at this point in the history
  • Loading branch information
Terry Therneau committed Feb 15, 2021
1 parent 9c2a4be commit cd13496
Show file tree
Hide file tree
Showing 36 changed files with 321 additions and 232 deletions.
9 changes: 7 additions & 2 deletions R/agexact.fit.R
@@ -1,6 +1,6 @@
agexact.fit <- function(x, y, strata, offset, init, control,
weights, method, rownames,
resid=TRUE)
resid=TRUE, nocenter=NULL)
{
if (!is.matrix(x)) stop("Invalid formula for cox fitting function")
if (!is.null(weights) && any(weights!=1))
Expand Down Expand Up @@ -44,6 +44,10 @@ agexact.fit <- function(x, y, strata, offset, init, control,
}
else init <- rep(0,nvar)

# 2012 change: individually choose which variable to rescale
# default: leave 0/1 variables along
if (is.null(nocenter)) zero.one <- rep(FALSE, ncol(x))
else zero.one <- apply(x, 2, function(z) all(z %in% nocenter))
agfit <- .C(Cagexact, iter= as.integer(control$iter.max),
as.integer(n),
as.integer(nvar), sstart, sstop,
Expand All @@ -60,7 +64,8 @@ agexact.fit <- function(x, y, strata, offset, init, control,
integer(2*n),
as.double(control$eps),
as.double(control$toler.chol),
sctest=double(1))
sctest=double(1),
ifelse(zero.one, 0L, 1L))

var <- matrix(agfit$imat,nvar,nvar)
coef <- agfit$coef
Expand Down
18 changes: 13 additions & 5 deletions R/agreg.fit.R
@@ -1,6 +1,6 @@
# Automatically generated from the noweb directory
agreg.fit <- function(x, y, strata, offset, init, control,
weights, method, rownames, resid=TRUE)
weights, method, rownames, resid=TRUE, nocenter=NULL)
{
nvar <- ncol(x)
event <- y[,3]
Expand Down Expand Up @@ -61,6 +61,11 @@ agreg.fit <- function(x, y, strata, offset, init, control,
if (length(init) != nvar) stop("Wrong length for inital values")
}

# 2021 change: pass in per covariate centering. This gives
# us more freedom to experiment. Default is to leave 0/1 variables alone
if (is.null(nocenter)) zero.one <- rep(FALSE, ncol(x))
zero.one <- apply(x, 2, function(z) all(z %in% nocenter))

# the returned value of agfit$coef starts as a copy of init, so make sure
# is is a vector and not a matrix; as.double suffices.
# Solidify the storage mode of other arguments
Expand All @@ -75,7 +80,10 @@ agreg.fit <- function(x, y, strata, offset, init, control,
as.integer(maxiter),
as.double(control$eps),
as.double(control$toler.chol),
as.integer(1)) # internally rescale
ifelse(zero.one, 0L, 1L))
# agfit4 centers variables within strata, so does not return a vector
# of means. Use a fill in consistent with other coxph routines
agmeans <- ifelse(zero.one, 0, colMeans(x))

vmat <- agfit$imat
coef <- agfit$coef
Expand All @@ -98,7 +106,7 @@ agreg.fit <- function(x, y, strata, offset, init, control,
"; beta may be infinite. "))
}
}
lp <- as.vector(x %*% coef + offset - sum(coef * colMeans(x)))
lp <- as.vector(x %*% coef + offset - sum(coef * agmeans))
if (resid) {
score <- as.double(exp(lp))
residuals <- .Call(Cagmart3, nused,
Expand Down Expand Up @@ -134,7 +142,7 @@ agreg.fit <- function(x, y, strata, offset, init, control,
iter = agfit$iter,
linear.predictors = as.vector(lp),
residuals = residuals,
means = colMeans(x),
means = agmeans,
first = agfit$u,
info = flag,
method= method,
Expand All @@ -146,7 +154,7 @@ agreg.fit <- function(x, y, strata, offset, init, control,
score = agfit$sctest,
iter = agfit$iter,
linear.predictors = as.vector(lp),
means = colMeans(x),
means = agmeans,
first = agfit$u,
info = flag,
method = method,
Expand Down
12 changes: 9 additions & 3 deletions R/coxexact.fit.R
Expand Up @@ -3,7 +3,7 @@
# case-control data.
coxexact.fit <- function(x, y, strata, offset, init, control,
weights, method, rownames,
resid=TRUE)
resid=TRUE, nocenter=NULL)
{
if (!is.matrix(x)) stop("Invalid formula for cox fitting function")
if (!is.null(weights) && any(weights!=1))
Expand Down Expand Up @@ -50,10 +50,16 @@ coxexact.fit <- function(x, y, strata, offset, init, control,
# Prescale the data set to improve numerical accuracy.
# We will undo the scaling before finishing up.
newx <- scale(x[sorted,])
# newx <- scale(x, scale=NULL) #debug
rescale <- attr(newx, "scaled:scale")
means <- attr(newx, "scaled:center")

if (!is.null(nocenter)){
zero.one <- apply(x, 2, function(z) all(z %in% nocenter))
for (i in which(zero.one)) {
newx[,i] <- x[sorted,i]
rescale[i] <- 1.0
means[i] <- 0.0
}
}
cfit <- .Call(Ccoxexact,
as.integer(maxiter),
as.double(y), # interger data? Just in case.
Expand Down
15 changes: 11 additions & 4 deletions R/coxpenal.fit.R
Expand Up @@ -3,7 +3,7 @@
#
coxpenal.fit <- function(x, y, strata, offset, init, control,
weights, method, rownames,
pcols, pattr, assign) {
pcols, pattr, assign, nocenter=NULL) {
eps <- control$eps
n <- nrow(y)
if (is.matrix(x)) nvar <- ncol(x)
Expand Down Expand Up @@ -283,7 +283,12 @@ coxpenal.fit <- function(x, y, strata, offset, init, control,

## need the current environment for callbacks
rho<-environment()


# 2012 change: individually choose which variable to recenter
# default: leave 0/1 variables along
if (is.null(nocenter)) zero.one <- rep(FALSE, ncol(x))
zero.one <- apply(x, 2, function(z) all(z %in% nocenter))

#
# Have C store the data, and get the loglik for beta=initial, frailty=0
#
Expand All @@ -308,7 +313,8 @@ coxpenal.fit <- function(x, y, strata, offset, init, control,
as.integer(nfrail),
as.integer(frailx),
#R callback additions
f.expr1,f.expr2,rho)
f.expr1,f.expr2,rho,
ifelse(zero.one, 0L, 1L))
else coxfit <- .C(Ccoxfit5a,
as.integer(n),
as.integer(nvar),
Expand All @@ -327,7 +333,8 @@ coxpenal.fit <- function(x, y, strata, offset, init, control,
as.integer(full.imat),
as.integer(nfrail),
as.integer(frailx),
f.expr1,f.expr2,rho)
f.expr1,f.expr2,rho,
ifelse(zero.one, 0L, 1L))

loglik0 <- coxfit$loglik
means <- coxfit$means
Expand Down
30 changes: 19 additions & 11 deletions R/coxph.R
Expand Up @@ -4,7 +4,7 @@ coxph <- function(formula, data, weights, subset, na.action,
init, control, ties= c("efron", "breslow", "exact"),
singular.ok =TRUE, robust,
model=FALSE, x=FALSE, y=TRUE, tt, method=ties,
id, cluster, istate, statedata,...) {
id, cluster, istate, statedata, nocenter=c(-1, 0, 1), ...) {

ties <- match.arg(ties)
Call <- match.call()
Expand Down Expand Up @@ -545,23 +545,31 @@ coxph <- function(formula, data, weights, subset, na.action,
fit <- coxpenal.fit(X, Y, istrat, offset, init=init,
control,
weights=weights, method=method,
row.names(mf), pcols, pattr, assign)
row.names(mf), pcols, pattr, assign,
nocenter= nocenter)
}
else {
rname <- row.names(mf)
if (multi) rname <- rname[xstack$rindex]
if( method=="breslow" || method =="efron") {
if (grepl('right', type)) fitter <- get("coxph.fit")
else fitter <- get("agreg.fit")
if (grepl('right', type))
fit <- coxph.fit(X, Y, istrat, offset, init, control,
weights=weights, method=method,
rname, nocenter=nocenter)
else fit <- agreg.fit(X, Y, istrat, offset, init, control,
weights=weights, method=method,
rname, nocenter=nocenter)
}
else if (method=='exact') {
if (type== "right") fitter <- get("coxexact.fit")
else fitter <- get("agexact.fit")
if (type== "right")
fit <- coxexact.fit(X, Y, istrat, offset, init, control,
weights=weights, method=method,
rname, nocenter=nocenter)
else fit <- agexact.fit(X, Y, istrat, offset, init, control,
weights=weights, method=method,
rname, nocenter=nocenter)
}
else stop(paste ("Unknown method", method))

rname <- row.names(mf)
if (multi) rname <- rname[xstack$rindex]
fit <- fitter(X, Y, istrat, offset, init, control, weights=weights,
method=method, rname)
}
if (is.character(fit)) {
fit <- list(fail=fit)
Expand Down
10 changes: 8 additions & 2 deletions R/coxph.fit.R
@@ -1,5 +1,6 @@
coxph.fit <- function(x, y, strata, offset, init, control,
weights, method, rownames, resid=TRUE)
weights, method, rownames, resid=TRUE,
nocenter=NULL)
{
n <- nrow(y)
if (is.matrix(x)) nvar <- ncol(x)
Expand Down Expand Up @@ -49,6 +50,11 @@ coxph.fit <- function(x, y, strata, offset, init, control,
else init <- rep(0,nvar)
}

# 2012 change: individually choose which variable to rescale
# default: leave 0/1 variables along
if (is.null(nocenter)) zero.one <- rep(FALSE, ncol(x))
else zero.one <- apply(x, 2, function(z) all(z %in% nocenter))

storage.mode(weights) <- storage.mode(init) <- "double"
coxfit <- .Call(Ccoxfit6,
as.integer(maxiter),
Expand All @@ -62,7 +68,7 @@ coxph.fit <- function(x, y, strata, offset, init, control,
as.double(control$eps),
as.double(control$toler.chol),
as.vector(init),
as.integer(1)) # internally rescale
ifelse(zero.one, 0L, 1L))

if (nullmodel) {
if (resid) {
Expand Down
1 change: 1 addition & 0 deletions R/survfit.coxph.R
Expand Up @@ -35,6 +35,7 @@ survfit.coxph <-
ctype <- c(1,1,2)[temp1]
}
else if (!(ctype %in% 1:2)) stop ("ctype must be 1 or 2")
if (!(stype %in% 1:2)) stop("stype must be 1 or 2")

if (!se.fit) conf.type <- "none"
else conf.type <- match.arg(conf.type)
Expand Down
14 changes: 8 additions & 6 deletions R/survfit.coxphms.R
Expand Up @@ -26,7 +26,8 @@ function(formula, newdata, se.fit=TRUE, conf.int=.95, individual=FALSE,
index <- which(ctemp >0)
baselinecoef[2, index] <- exp(object$coef[ctemp[index]])
}
}
} else phbase <- rep(FALSE, nrow(object$cmap))

# process options, set up Y and the model frame, deal with start.time
Terms <- terms(object)
robust <- !is.null(object$naive.var) # did the coxph model use robust var?
Expand Down Expand Up @@ -371,7 +372,8 @@ function(formula, newdata, se.fit=TRUE, conf.int=.95, individual=FALSE,
else {
if (is.factor(strata)) ustrata <- levels(strata)
else ustrata <- sort(unique(strata))
nstrata <- length(ustrata)
nstrata <- length(cifit$strata)
itemp <- rep(1:nstrata, cifit$strata)
timelist <- split(cifit$time, itemp)
ustrata <- names(cifit$strata)
tfit <- vector("list", nstrata)
Expand All @@ -386,8 +388,8 @@ function(formula, newdata, se.fit=TRUE, conf.int=.95, individual=FALSE,

# do.call(rbind) doesn't work for arrays, it loses a dimension
ntime <- length(cifit$time)
cifit$pstate <- array(0., dim=c(ntime, dim(survlist[[1]]$pstate)[2:3]))
cifit$cumhaz <- array(0., dim=c(ntime, dim(survlist[[1]]$cumhaz)[2:3]))
cifit$pstate <- array(0., dim=c(ntime, dim(tfit[[1]]$pstate)[2:3]))
cifit$cumhaz <- array(0., dim=c(ntime, dim(tfit[[1]]$cumhaz)[2:3]))
rtemp <- split(seq(along=cifit$time), itemp)
for (i in 1:nstrata) {
cifit$pstate[rtemp[[i]],,] <- tfit[[i]]$pstate
Expand Down Expand Up @@ -431,10 +433,10 @@ multihaz <- function(y, x, position, weight, risk, istrat, ctype, stype,
}

temp <- matrix(cn[,5] / denom1, ncol = fit$ntrans)
hazard <- temp[,bcoef[,1]] * rep(bcoef[,2], each=nrow(temp))
hazard <- temp[,bcoef[1,]] * rep(bcoef[2,], each=nrow(temp))
if (se.fit) {
temp <- matrix(cn[,5] / denom2, ncol = fit$ntrans)
varhaz <- temp[,bcoef[,1]] * rep(bcoef[,2]^2, each=nrow(temp))
varhaz <- temp[,bcoef[1,]] * rep(bcoef[2,]^2, each=nrow(temp))
}

# Expand the result, one "hazard set" for each row of x2
Expand Down
11 changes: 10 additions & 1 deletion inst/NEWS.Rd
Expand Up @@ -10,9 +10,18 @@
first rather than the last.
\item Add the last test case for residuals.survfit (auc for tstart,
tstop data), and yes, it uncovered an error.
\item Survit.coxph for a multistate fit with strata error, an array
\item Error in survit.coxph for a multistate fit with strata, an array
dimension was wrong (but contents correct). It led to downstream
problems with print and plot. Pointed out by Matthew Hall.
\item Add ability for a multistate coxph model to have shared
(proportional) baselines for transitions.
\item The coxph routine no longer recenters 0/1 predictors (fit$mean
will be 0), per the default value of the new \code{nocenter}
argument. Application of survfit to a coxph model without specifying
newdata remains a bad idea, but this change makes it a little
less bad since for factors the curve will refer to the reference
level. The results of predict(fit) are now more defensible for a
factor variable.
}}
\section{Changes in version 3.2-8}{
\itemize{
Expand Down
7 changes: 5 additions & 2 deletions man/agreg.fit.Rd
Expand Up @@ -11,9 +11,9 @@
}
\usage{
agreg.fit(x, y, strata, offset, init, control, weights, method,
rownames, resid=TRUE)
rownames, resid=TRUE, nocenter=NULL)
coxph.fit(x, y, strata, offset, init, control, weights, method,
rownames, resid=TRUE)
rownames, resid=TRUE, nocenter=NULL)
}
\arguments{
\item{x}{Matix of predictors. This should \emph{not} include an
Expand All @@ -31,6 +31,9 @@ rownames, resid=TRUE)
\item{rownames}{this is only needed for a NULL model, in which case it
contains the rownames (if any) of the original data.}
\item{resid}{compute and return residuals.}
\item{nocenter}{an optional list of values. Any column of the X matrix
whose values lie strictly within that set will not be recentered.
Note that the coxph function has (-1, 0, 1) as the default.}
}
\details{
This routine does no checking that arguments are the proper length or
Expand Down
9 changes: 7 additions & 2 deletions man/coxph.Rd
Expand Up @@ -20,7 +20,7 @@ coxph(formula, data=, weights, subset,
ties=c("efron","breslow","exact"),
singular.ok=TRUE, robust,
model=FALSE, x=FALSE, y=TRUE, tt, method=ties,
id, cluster, istate, statedata, ...)
id, cluster, istate, statedata, nocenter=c(-1, 0, 1), ...)
}
\arguments{
\item{formula}{
Expand Down Expand Up @@ -107,6 +107,8 @@ coxph(formula, data=, weights, subset,
}
\item{tt}{optional list of time-transform functions.}
\item{method}{alternate name for the \code{ties} argument.}
\item{nocenter}{columns of the X matrix whose values lie strictly
within this set are not recentered}
\item{...}{Other arguments will be passed to \code{\link{coxph.control}}
}
}
Expand Down Expand Up @@ -143,6 +145,9 @@ of which applies to an interval of observation (start, stop].
The routine internally scales and centers data to avoid overflow in
the argument to the exponential function. These actions do not change
the result, but lead to more numerical stability.
Any column of the X matrix whose values lie within \code{nocenter} list
are not recentered. The practical consequence of the default is to not
recenter dummy variables corresponding to factors.
However, arguments to offset are not scaled since there are situations
where a large offset value is a purposefully used.
In general, however, users should not avoid very large numeric values
Expand Down Expand Up @@ -196,7 +201,7 @@ cluster function would not be noticed.
(Full qualification of the coxph call itself may be protective, however.)
Second, and more importantly, the call just above will not give the
correct answer. The specials are recognized by their name, and
`survival::cluster' is not the same as `cluster'; the above model would
\code{survival::cluster} is not the same as \code{cluster}; the above model would
treat \code{inst} as an ordinary variable.
A similar issue arises from using \code{stats::offset} as a term, in
either survival or glm models.
Expand Down
4 changes: 2 additions & 2 deletions man/survival-internal.Rd
Expand Up @@ -26,10 +26,10 @@ match.ratetable(R, ratetable)
coxpenal.df(hmat, hinv, fdiag, assign.list, ptype, nvar, pen1,
pen2, sparse)
coxpenal.fit(x, y, strata, offset, init, control, weights, method,
rownames, pcols, pattr, assign)
rownames, pcols, pattr, assign, nocenter)
coxph.wtest(var, b, toler.chol = 1e-09)
agexact.fit(x, y, strata, offset, init, control, weights, method,
rownames, resid=TRUE)
rownames, resid=TRUE, nocenter=NULL)
survfitCI(X, Y, weights, id, cluster, robust, istate,
stype=1, ctype=1,
se.fit=TRUE,
Expand Down

0 comments on commit cd13496

Please sign in to comment.