/
plm.R
290 lines (274 loc) · 18.2 KB
/
plm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
#' Power-law model with variance that varies with stage.
#'
#' plm is used to fit a discharge rating curve for paired measurements of stage and discharge using a power-law model with variance that varies with stage as described in Hrafnkelsson et al. (2022). See "Details" for a more elaborate description of the model.
#' @param formula an object of class "formula", with discharge column name as response and stage column name as a covariate, i.e. of the form \code{y}~\code{x} where \code{y} is discharge in m\eqn{^3/}s and \code{x} is stage in m (it is very important that the data is in the correct units).
#' @param data data.frame containing the variables specified in formula.
#' @param c_param stage for which there is zero discharge. If NULL, it is treated as unknown in the model and inferred from the data.
#' @param h_max maximum stage to which the rating curve should extrapolate to. If NULL, the maximum stage value in the data is selected as an upper bound.
#' @param parallel logical value indicating whether to run the MCMC in parallel or not. Defaults to TRUE.
#' @param num_cores integer between 1 and 4 (number of MCMC chains) indicating how many cores to use. Only used if parallel=TRUE. If NULL, the number of cores available on the device is detected automatically.
#' @param forcepoint logical vector of the same length as the number of rows in data. If an element at index \eqn{i} is TRUE it indicates that the rating curve should be forced through the \eqn{i}-th measurement. Use with care, as this will strongly influence the resulting rating curve.
#'
#' @details The power-law model, which is commonly used in hydraulic practice, is of the form
#' \deqn{Q=a(h-c)^{b}}
#' where \eqn{Q} is discharge, \eqn{h} is stage and \eqn{a}, \eqn{b} and \eqn{c} are unknown constants.\cr\cr
#' The power-law model is here inferred by using a Bayesian hierarchical model. The model is on a logarithmic scale
#' \deqn{\log(Q_i) = \log(a) + b \log(h_i - c) + \varepsilon_i, i = 1,...,n}
#' where \eqn{\varepsilon_i} follows a normal distribution with mean zero and variance \eqn{\sigma_\varepsilon(h_i)^2} that varies with stage. The error variance, \eqn{\sigma_\varepsilon^2(h)}, of the log-discharge data is modeled as an exponential of a B-spline curve, that is, a linear combination of six B-spline basis functions that are defined over the range of the stage observations. An efficient posterior simulation is achieved by sampling from the joint posterior density of the hyperparameters of the model, and then sampling from the density of the latent parameters conditional on the hyperparameters.\cr\cr
#' Bayesian inference is based on the posterior density and summary statistics such as the posterior mean and 95\% posterior intervals are based on the posterior density. Analytical formulas for these summary statistics are intractable in most cases and thus they are computed by generating samples from the posterior density using a Markov chain Monte Carlo simulation.
#' @return plm returns an object of class "plm". An object of class "plm" is a list containing the following components: \cr
#' \item{\code{rating_curve}}{a data frame with 2.5\%, 50\% and 97.5\% percentiles of the posterior predictive distribution of the rating curve.}
#' \item{\code{rating_curve_mean}}{a data frame with 2.5\%, 50\% and 97.5\% percentiles of the posterior distribution of the mean of the rating curve. Additionally contains columns with r_hat and the effective number of samples for each parameter as defined in Gelman et al. (2013).}
#' \item{\code{param_summary}}{a data frame with 2.5\%, 50\% and 97.5\% percentiles of the posterior distribution of latent- and hyperparameters.}
#' \item{\code{sigma_eps_summary}}{a data frame with 2.5\%, 50\% and 97.5\% percentiles of the posterior of \eqn{\sigma_{\varepsilon}}.}
#' \item{\code{Deviance_summary}}{a data frame with 2.5\%, 50\% and 97.5\% percentiles of the posterior distribution of the deviance.}
#' \item{\code{rating_curve_posterior}}{a matrix containing the full thinned posterior samples of the posterior predictive distribution of the rating curve (excluding burn-in).}
#' \item{\code{rating_curve_mean_posterior}}{a matrix containing the full thinned posterior samples of the posterior distribution of the mean of the rating curve (excluding burn-in).}
#' \item{\code{a_posterior}}{a numeric vector containing the full thinned posterior samples of the posterior distribution of \eqn{a}.}
#' \item{\code{b_posterior}}{a numeric vector containing the full thinned posterior samples of the posterior distribution of \eqn{b}.}
#' \item{\code{c_posterior}}{a numeric vector containing the full thinned posterior samples of the posterior distribution of \eqn{c}.}
#' \item{\code{sigma_eps_posterior}}{a numeric vector containing the full thinned posterior samples of the posterior distribution of \eqn{\sigma_{\varepsilon}}.}
#' \item{\code{eta_1_posterior}}{a numeric vector containing the full thinned posterior samples of the posterior distribution of \eqn{\eta_1}.}
#' \item{\code{eta_2_posterior}}{a numeric vector containing the full thinned posterior samples of the posterior distribution of \eqn{\eta_2}.}
#' \item{\code{eta_3_posterior}}{a numeric vector containing the full thinned posterior samples of the posterior distribution of \eqn{\eta_3}.}
#' \item{\code{eta_4_posterior}}{a numeric vector containing the full thinned posterior samples of the posterior distribution of \eqn{\eta_4}.}
#' \item{\code{eta_5_posterior}}{a numeric vector containing the full thinned posterior samples of the posterior distribution of \eqn{\eta_5}.}
#' \item{\code{eta_6_posterior}}{a numeric vector containing the full thinned posterior samples of the posterior distribution of \eqn{\eta_6}.}
#' \item{\code{Deviance_posterior}}{a numeric vector containing the full thinned posterior samples of the posterior distribution of the deviance excluding burn-in samples.}
#' \item{\code{D_hat}}{deviance at the median value of the parameters.}
#' \item{\code{effective_num_param_DIC}}{effective number of parameters, which is calculated as median(Deviance_posterior) minus D_hat.}
#' \item{\code{DIC}}{Deviance Information Criterion for the model, calculated as D_hat plus 2*effective_num_parameters_DIC.}
#' \item{\code{lppd}}{log pointwise predictive probability of the observed data under the model}
#' \item{\code{effective_num_param_WAIC}}{effective number of parameters, which is calculated by summing up the posterior variance of the log predictive density for each data point.}
#' \item{\code{WAIC}}{Watanabe-Akaike information criterion for the model, defined as -2*( lppd - effective_num_param_WAIC ).}
#' \item{\code{autocorrelation}}{a data frame with the autocorrelation of each parameter for different lags.}
#' \item{\code{acceptance_rate}}{proportion of accepted samples in the thinned MCMC chain (excluding burn-in).}
#' \item{\code{formula}}{object of type "formula" provided by the user.}
#' \item{\code{data}}{data provided by the user, ordered by stage.}
#' \item{\code{run_info}}{information about the input arguments and the specific parameters used in the MCMC chain.}
#' @references Gelman, A., Carlin, J. B., Stern, H. S., Dunson, D. B., Vehtari, A., and Rubin, D. B. (2013). Bayesian Data Analysis, Third Edition. Chapman & Hall/CRC Texts in Statistical Science. Taylor & Francis.
#' @references Hrafnkelsson, B., Sigurdarson, H., and Gardarsson, S. M. (2022). Generalization of the power-law rating curve using hydrodynamic theory and Bayesian hierarchical modeling, Environmetrics, 33(2):e2711.
#' @references Spiegelhalter, D., Best, N., Carlin, B., Van Der Linde, A. (2002). Bayesian measures of model complexity and fit. Journal of the Royal Statistical Society: Series B (Statistical Methodology) 64(4), 583–639.
#' @references Watanabe, S. (2010). Asymptotic equivalence of Bayes cross validation and widely applicable information criterion in singular learning theory. J. Mach. Learn. Res. 11, 3571–3594.
#' @seealso \code{\link{summary.plm}} for summaries, \code{\link{predict.plm}} for prediction. It is also useful to look at \code{\link{spread_draws}} and \code{\link{plot.plm}} to help visualize the full posterior distributions.
#' @examples
#' \donttest{
#' data(spanga)
#' set.seed(1)
#' plm.fit <- plm(formula=Q~W,data=spanga,num_cores=2)
#' summary(plm.fit)
#' }
#' @export
plm <- function(formula,data,c_param=NULL,h_max=NULL,parallel=TRUE,num_cores=NULL,forcepoint=rep(FALSE,nrow(data))){
#argument checking
stopifnot(inherits(formula,'formula'))
stopifnot(inherits(data,'data.frame'))
stopifnot(is.null(c_param) | is.double(c_param))
stopifnot(is.null(h_max) | is.double(h_max))
stopifnot(is.null(num_cores) | is.numeric(num_cores))
stopifnot(length(forcepoint)==nrow(data) & is.logical(forcepoint))
formula_args <- all.vars(formula)
stopifnot(length(formula_args)==2 & all(formula_args %in% names(data)))
model_dat <- as.data.frame(data[,all.vars(formula)])
forcepoint <- forcepoint[order(model_dat[,2,drop=TRUE])]
model_dat <- model_dat[order(model_dat[,2,drop=TRUE]),]
Q <- model_dat[,1,drop=TRUE]
h <- model_dat[,2,drop=TRUE]
if(!is.null(c_param) && min(h)<c_param) stop('c_param must be lower than the minimum stage value in the data')
if(any(Q<=0)) stop('All discharge measurements must but strictly greater than zero. If you know the stage of zero discharge, use c_param.')
MCMC_output_list <- plm.inference(y=log(Q),h=h,c_param=c_param,h_max=h_max,parallel=parallel,forcepoint=forcepoint,num_cores=num_cores)
param_names <- get_param_names('plm',c_param)
result_obj=list()
attr(result_obj, "class") <- "plm"
result_obj$a_posterior = MCMC_output_list$x[1,]
result_obj$b_posterior = MCMC_output_list$x[2,]
if(is.null(c_param)){
result_obj$c_posterior <- MCMC_output_list$theta[1,]
result_obj$sigma_eta_posterior <- MCMC_output_list$theta[2,]
for(i in 3:nrow(MCMC_output_list$theta)){
result_obj[[paste0('eta_',i-2,'_posterior')]] <- MCMC_output_list$theta[i,]
}
}else{
result_obj$c_posterior <- NULL
result_obj$sigma_eta_posterior <- MCMC_output_list$theta[1,]
for(i in 2:nrow(MCMC_output_list$theta)){
result_obj[[paste0('eta_',i-1,'_posterior')]] <- MCMC_output_list$theta[i,]
}
}
unique_h_idx <- !duplicated(MCMC_output_list$h)
h_unique <- unique(MCMC_output_list$h)
h_unique_order <- order(h_unique)
h_unique_sorted <- h_unique[h_unique_order]
h_idx_data <- match(h,h_unique_sorted)
result_obj$rating_curve_posterior <- exp(MCMC_output_list$y_post_pred[unique_h_idx,][h_unique_order,])
result_obj$rating_curve_mean_posterior <- exp(MCMC_output_list$y_post[unique_h_idx,][h_unique_order,])
result_obj$sigma_eps_posterior <- sqrt(MCMC_output_list$sigma_eps[unique_h_idx,][h_unique_order,])
result_obj$Deviance_posterior <- c(MCMC_output_list$D)
#summary objects
result_obj$rating_curve <- get_MCMC_summary(result_obj$rating_curve_posterior,h=h_unique_sorted)
result_obj$rating_curve_mean <- get_MCMC_summary(result_obj$rating_curve_mean_posterior,h=h_unique_sorted)
result_obj$sigma_eps_summary <- get_MCMC_summary(result_obj$sigma_eps_posterior,h=h_unique_sorted)
result_obj$param_summary <- get_MCMC_summary(rbind(MCMC_output_list$x[1,],MCMC_output_list$x[2,],MCMC_output_list$theta))
result_obj$param_summary$eff_n_samples <- MCMC_output_list$effective_num_samples
result_obj$param_summary$r_hat <- MCMC_output_list$r_hat
row.names(result_obj$param_summary) <- param_names
result_obj$Deviance_summary <- get_MCMC_summary(MCMC_output_list$D)
#Deviance calculations
result_obj$D_hat <- MCMC_output_list$D_hat
result_obj$effective_num_param_DIC <- result_obj$Deviance_summary[,'median']-result_obj$D_hat
result_obj$DIC <- result_obj$D_hat + 2*result_obj$effective_num_param_DIC
#WAIC calculations
waic_list <- calc_waic(result_obj,model_dat)
result_obj$lppd <- waic_list$lppd
result_obj$effective_num_param_WAIC <- waic_list$p_waic
result_obj$WAIC <- waic_list$waic
#Rhat and autocorrelation
autocorrelation_df <- as.data.frame(t(MCMC_output_list$autocorrelation))
names(autocorrelation_df) <- param_names
autocorrelation_df$lag <- 1:nrow(autocorrelation_df)
result_obj$autocorrelation <- autocorrelation_df[,c('lag',param_names)]
# store other information
result_obj$acceptance_rate <- MCMC_output_list[['acceptance_rate']]
result_obj$formula <- formula
result_obj$data <- model_dat
result_obj$run_info <- MCMC_output_list$run_info
return(result_obj)
}
#' @importFrom stats optim
plm.inference <- function(y,h,c_param=NULL,h_max=NULL,parallel=TRUE,forcepoint=rep(FALSE,length(h)),num_cores=NULL,num_chains=4,nr_iter=20000,burnin=2000,thin=5){
RC <- get_model_components('plm',y,h,c_param,h_max,forcepoint,h_min=NULL)
output_list <- get_MCMC_output_list(theta_m=RC$theta_m,RC=RC,density_fun=RC$density_fun,
unobserved_prediction_fun=RC$unobserved_prediction_fun,
parallel=parallel,num_cores=num_cores,num_chains=num_chains,nr_iter=nr_iter,
burnin=burnin,thin=thin)
output_list$D_hat <- plm.calc_Dhat(output_list$theta,RC)
if(is.null(RC$c)){
output_list$theta[1,] <- RC$h_min-exp(output_list$theta[1,])
output_list$theta[2,] <- exp(output_list$theta[2,])
output_list$theta[3:8,] <- RC$P%*%output_list$theta[3:8,]
}else{
output_list$theta[1,] <- exp(output_list$theta[1,])
output_list$theta[2:7,] <- RC$P%*%output_list$theta[2:7,]
}
output_list$x[1,] <- exp(output_list$x[1,])
output_list[['h']] <- c(RC$h,RC$h_u)
output_list[['acceptance_rate']] <- sum(output_list[['acceptance_vec']])/ncol(output_list[['acceptance_vec']])
output_list[['run_info']] <- list('c_param'=c_param,'h_max'=h_max,'forcepoint'=forcepoint,'nr_iter'=nr_iter,'num_chains'=num_chains,'burnin'=burnin,'thin'=thin)
return(output_list)
}
#' @importFrom stats rnorm dlnorm
plm.density_evaluation_known_c <- function(theta,RC){
log_sig_eta <- theta[1]
eta_1 <- theta[2]
z <- theta[3:7]
lambda=c(RC$P%*%as.matrix(c(eta_1,exp(log_sig_eta)*z)))
l=c(log(RC$h-RC$c))
varr=c(RC$epsilon*exp(RC$B%*%lambda))
if(any(varr>10^2)) return(list(p=-1e9)) # to avoid numerical instability
Sig_eps=diag(varr)
X=cbind(1,l)
L=t(chol(X%*%RC$Sig_x%*%t(X)+Sig_eps+diag(nrow(Sig_eps))*RC$nugget))
w=solve(L,RC$y-X%*%RC$mu_x)
p=-0.5%*%t(w)%*%w-sum(log(diag(L)))+
pri('eta_1',eta_1=eta_1,lambda_eta_1=RC$lambda_eta_1) +
pri('eta_minus1',z=z) +
pri('sigma_eta',log_sig_eta=log_sig_eta,lambda_seta=RC$lambda_seta)
W=solve(L,X%*%RC$Sig_x)
x_u=RC$mu_x+t(chol(RC$Sig_x))%*%rnorm(nrow(RC$mu_x))
sss=(X%*%x_u)-RC$y+sqrt(varr)*as.matrix(rnorm(RC$n))
x=as.matrix(x_u-t(W)%*%solve(L,sss))
yp=X%*%x
#posterior predictive draw
ypo=yp+as.matrix(rnorm(RC$n))*sqrt(varr)
D=-2*sum( log(dlnorm(exp(RC$y[1:RC$n,]),yp,sqrt(varr))) )
return(list("p"=p,"x"=x,"y_post"=yp,"y_post_pred"=ypo,"sigma_eps"=varr,"D"=D))
}
#' @importFrom stats rnorm dlnorm
plm.density_evaluation_unknown_c <- function(theta,RC){
zeta <- theta[1]
log_sig_eta <- theta[2]
eta_1 <- theta[3]
z <- theta[4:8]
lambda=c(RC$P%*%as.matrix(c(eta_1,exp(log_sig_eta)*z)))
l=c(log(RC$h-RC$h_min+exp(zeta)))
varr=c(RC$epsilon*exp(RC$B%*%lambda))
if(any(varr>10^2)) return(list(p=-1e9)) # to avoid numerical instability
Sig_eps=diag(varr)
X=cbind(1,l)
L=t(chol(X%*%RC$Sig_x%*%t(X)+Sig_eps+diag(nrow(Sig_eps))*RC$nugget))
w=solve(L,RC$y-X%*%RC$mu_x)
p=-0.5%*%t(w)%*%w-sum(log(diag(L))) +
pri('c', zeta = zeta, lambda_c = RC$lambda_c) +
pri('eta_1',eta_1=eta_1,lambda_eta_1=RC$lambda_eta_1) +
pri('eta_minus1',z=z) +
pri('sigma_eta',log_sig_eta=log_sig_eta,lambda_seta=RC$lambda_seta)
W=solve(L,X%*%RC$Sig_x)
x_u=RC$mu_x+t(chol(RC$Sig_x))%*%rnorm(nrow(RC$mu_x))
sss=(X%*%x_u)-RC$y+sqrt(varr)*as.matrix(rnorm(RC$n))
x=as.matrix(x_u-t(W)%*%solve(L,sss))
yp=X%*%x
#posterior predictive draw
ypo=yp+as.matrix(rnorm(RC$n))*sqrt(varr)
D=-2*sum( log(dlnorm(exp(RC$y[1:RC$n,]),yp,sqrt(varr))) )
return(list("p"=p,"x"=x,"y_post"=yp,"y_post_pred"=ypo,"sigma_eps"=varr,"D"=D))
}
#' @importFrom stats dlnorm
plm.calc_Dhat <- function(theta,RC){
theta_median <- apply(theta,1,median)
if(!is.null(RC$c)){
theta_median <- c(log(RC$h_min-RC$c),theta_median)
}
zeta <- theta_median[1]
log_sig_eta <- theta_median[2]
eta_1 <- theta_median[3]
z <- theta_median[4:8]
lambda=c(RC$P%*%as.matrix(c(eta_1,exp(log_sig_eta)*z)))
l=c(log(RC$h-RC$h_min+exp(zeta)))
varr=c(RC$epsilon*exp(RC$B%*%lambda))
if(any(varr>10^2)) return(list(p=-1e9)) # to avoid numerical instability
Sig_eps=diag(varr)
X=cbind(1,l)
L=t(chol(X%*%RC$Sig_x%*%t(X)+Sig_eps+diag(nrow(Sig_eps))*RC$nugget))
w=solve(L,RC$y-X%*%RC$mu_x)
x=RC$mu_x+RC$Sig_x%*%(t(X)%*%solve(t(L),w))
yp=(X %*% x)[1:RC$n,]
D=-2*sum( log(dlnorm(exp(RC$y[1:RC$n,]),yp,sqrt(varr))) )
return(D)
}
#' @importFrom stats rnorm
plm.predict_u_known_c <- function(theta,x,RC){
log_sig_eta2 <- theta[1]
eta_1 <- theta[2]
z <- theta[3:7]
lambda=c(RC$P%*%as.matrix(c(eta_1,exp(log_sig_eta2)*z)))
m <- length(RC$h_u)
#calculate spline variance from B_splines
varr_u <- c(exp(RC$B_u %*% lambda))
l <- log(RC$h_u-RC$c)
X <- cbind(rep(1,m),l)
#sample from the posterior of discharge y
yp_u <- c(X%*%x)
ypo_u <- yp_u + rnorm(m) * sqrt(varr_u)
return(list('y_post'=yp_u,'y_post_pred'=ypo_u,'sigma_eps'=varr_u))
}
#' @importFrom stats rnorm
plm.predict_u_unknown_c <- function(theta,x,RC){
#collecting parameters from the MCMC sample
zeta=theta[1]
log_sig_eta2 <- theta[2]
eta_1 <- theta[3]
z <- theta[4:8]
lambda=c(RC$P%*%as.matrix(c(eta_1,exp(log_sig_eta2)*z)))
m=length(RC$h_u)
#calculate spline variance from B_splines
varr_u <- c(exp(RC$B_u %*% lambda))
above_c <- RC$h_min-exp(zeta) < RC$h_u
m_above_c <- sum(above_c)
#building blocks of the explanatory matrix X calculated
l=log(RC$h_u[above_c]-RC$h_min+exp(zeta))
X=cbind(rep(1,m_above_c),l)
#sample from the posterior of discharge y
yp_u <- c(X%*%x)
ypo_u = yp_u + rnorm(m_above_c) * sqrt(varr_u[above_c])
return(list('y_post'=c(rep(-Inf,m-m_above_c),yp_u),'y_post_pred'=c(rep(-Inf,m-m_above_c),ypo_u),'sigma_eps'=varr_u))
}