/
htlr.R
149 lines (144 loc) · 6.33 KB
/
htlr.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#' Fit a HTLR Model
#'
#' This function trains linear logistic regression models with HMC in restricted Gibbs sampling.
#'
#' @param X Input matrix, of dimension nobs by nvars; each row is an observation vector.
#'
#' @param y Vector of response variables. Must be coded as non-negative integers,
#' e.g., 1,2,\ldots,C for C classes, label 0 is also allowed.
#'
#' @param fsel Subsets of features selected before fitting, such as by univariate screening.
#' @param stdx Logical; if \code{TRUE}, the original feature values are standardized to have \code{mean = 0}
#' and \code{sd = 1}.
#'
#' @param iter A positive integer specifying the number of iterations (including warmup).
#' @param warmup A positive integer specifying the number of warmup (aka burnin).
#' The number of warmup iterations should not be larger than iter and the default is \code{iter / 2}.
#'
#' @param thin A positive integer specifying the period for saving samples.
#'
#' @param leap The length of leapfrog trajectory in sampling phase.
#' @param leap.warm The length of leapfrog trajectory in burnin phase.
#' @param leap.stepsize The integrator step size used in the Hamiltonian simulation.
#'
#' @param cut The coefficients smaller than this criteria will be fixed in each HMC updating step.
#'
#' @param init The initial state of Markov Chain; it accepts three forms:
#' \itemize{
#' \item a previously fitted \code{fithtlr} object,
#' \item a user supplied initial coeficient matrix of (p+1)*K, where p is the number of features, K is the number of classes in y minus 1,
#' \item a character string matches the following:
#' \itemize{
#' \item "lasso" - (Default) Use Lasso initial state with \code{lambda} chosen by
#' cross-validation. Users may specify their own candidate \code{lambda} values via
#' optional argument \code{lasso.lambda}. Further customized Lasso initial
#' states can be generated by \code{\link{lasso_deltas}}.
#' \item "bcbc" - Use initial state generated by package \code{BCBCSF}
#' (Bias-corrected Bayesian classification). Further customized BCBCSF initial
#' states can be generated by \code{\link{bcbcsf_deltas}}. WARNING: This type of
#' initial states can be used for continuous features such as gene expression profiles,
#' but it should not be used for categorical features such as SNP profiles.
#' \item "random" - Use random initial values sampled from N(0, 1).
#' }
#'
#' }
#'
#' @param prior The prior to be applied to the model. Either a list of hyperparameter settings
#' returned by \code{\link{htlr_prior}} or a character string from "t" (student-t), "ghs" (horseshoe),
#' and "neg" (normal-exponential-gamma).
#'
#' @param df The degree freedom of t/ghs/neg prior for coefficients. Will be ignored if the
#' configuration list from \code{\link{htlr_prior}} is passed to \code{prior}.
#'
#' @param verbose Logical; setting it to \code{TRUE} for tracking MCMC sampling iterations.
#'
#' @param rep.legacy Logical; if \code{TRUE}, the output produced in \code{HTLR} versions up to
#' legacy-3.1-1 is reproduced. The speed will be typically slower than non-legacy mode on
#' multi-core machine. Default is \code{FALSE}.
#'
#' @param keep.warmup.hist Warmup iterations are not recorded by default, set \code{TRUE} to enable it.
#'
#' @param ... Other optional parameters:
#' \itemize{
#' \item rda.alpha - A user supplied alpha value for \code{\link{bcbcsf_deltas}}. Default: 0.2.
#' \item lasso.lambda - A user supplied lambda sequence for \code{\link{lasso_deltas}}.
#' Default: \{.01, .02, \ldots, .05\}. Will be ignored if \code{rep.legacy} is set to \code{TRUE}.
#' }
#'
#' @return An object with S3 class \code{htlr.fit}.
#'
#' @references
#' Longhai Li and Weixin Yao (2018). Fully Bayesian Logistic Regression
#' with Hyper-Lasso Priors for High-dimensional Feature Selection.
#' \emph{Journal of Statistical Computation and Simulation} 2018, 88:14, 2827-2851.
#'
#' @export
#'
#' @examples
#' set.seed(12345)
#' data("colon")
#'
#' ## fit HTLR models with selected features, note that the chain length setting is for demo only
#'
#' ## using t prior with 1 df and log-scale fixed to -10
#' fit.t <- htlr(X = colon$X, y = colon$y, fsel = 1:100,
#' prior = htlr_prior("t", df = 1, logw = -10),
#' init = "bcbc", iter = 20, thin = 1)
#'
#' ## using NEG prior with 1 df and log-scale fixed to -10
#' fit.neg <- htlr(X = colon$X, y = colon$y, fsel = 1:100,
#' prior = htlr_prior("neg", df = 1, logw = -10),
#' init = "bcbc", iter = 20, thin = 1)
#'
#' ## using horseshoe prior with 1 df and auto-selected log-scale
#' fit.ghs <- htlr(X = colon$X, y = colon$y, fsel = 1:100,
#' prior = "ghs", df = 1, init = "bcbc",
#' iter = 20, thin = 1)
#'
htlr <-
function (X, y,
fsel = 1:ncol(X),
stdx = TRUE,
prior = "t",
df = 1,
iter = 2000,
warmup = floor(iter/2),
thin = 1,
init = "lasso",
leap = 50,
leap.warm = floor(leap/10),
leap.stepsize = 0.3,
cut = 0.05,
verbose = FALSE,
rep.legacy = FALSE,
keep.warmup.hist = FALSE,
...
)
{
stopifnot(iter > warmup, warmup > 0, thin > 0, leap > 0, leap.warm > 0)
if (exists("rda.alpha")) {
if (init != "bcbc")
warning("not using 'BCBCSF' init, input 'rda.alpha' will be ignored")
} else {
rda.alpha <- 0.2
}
if (exists("lasso.lambda")) {
if (init != "lasso")
warning("not using 'LASSO' init, input 'lasso.lambda' will be ignored")
else if (rep.legacy)
warning("rep.legacy == TRUE, input 'lasso.lambda' will be ignored")
} else {
lasso.lambda <- seq(.05, .01, by = -.01)
}
if (is.character(prior))
prior <- htlr_prior(prior, df)
# htlr_fit() will take care of input checking
htlr_fit(X_tr = X, y_tr = y, fsel = fsel, stdzx = stdx,
ptype = prior$ptype, alpha = prior$alpha, s = prior$logw,
#eta = prior$eta,
sigmab0 = prior$sigmab0,
iters_h = warmup, iters_rmc = (iter - warmup), thin = thin,
leap_L = leap, leap_L_h = leap.warm, leap_step = leap.stepsize,
hmc_sgmcut = cut, initial_state = init,
silence = !verbose, rep.legacy = rep.legacy, keep.warmup.hist = keep.warmup.hist)
}