-
Notifications
You must be signed in to change notification settings - Fork 5
/
do.bootstrap.cress.robust.r
155 lines (137 loc) · 7.02 KB
/
do.bootstrap.cress.robust.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#' Bootstrapping function without model selection for a model of class 'gamMRSea'
#'
#' This fuction performs a specified number of bootstrapping iterations using CReSS/SALSA for fitting the count model. See below for details.
#'
#' @param predictionGrid The prediction grid data
#' @param model.obj The best fitting \code{CReSS} model for the original count data. Should be geeglm or a Poisson/Binomial GLM (not quasi).
#' @param splineParams The object describing the parameters for fitting the one and two dimensional splines
#' @param g2k (N x k) matrix of distances between all prediction points (N) and all knot points (k)
#' @param rename A vector of column names for which a new column needs to be created for the bootstrapped data. This defaults to \code{segment.id} for line transects (which is required for \code{create.bootcount.data}), others might be added.
#' A new column with new ids will automatically be created for the column listed in \code{resample}. In case of nearshore data, this argument is ignored.
#' @param B Number of bootstrap iterations
#' @param name Analysis name. Required to avoid overwriting previous bootstrap results. This name is added at the beginning of "predictionboot.RData" when saving bootstrap predictions.
#' @param seed Set the seed for the bootstrap sampling process.
#' @param nCores Set the number of computer cores for the bootstrap process to use (default = 1). The more cores the faster the proces but be wary of over using the cores on your computer. If \code{nCores} > (number of computer cores - 2), the function defaults to \code{nCores} = (number of computer cores - 2). Note: On a Mac computer the parallel code does not compute so use nCores=1.
#'
#' @details
#'
#' The following steps are performed for each iteration:
#'
#' - coefficients are resampled from a multivariate normal distribution defined by MLE and COV from the best fitting count model
#'
#' - predictions are made to the prediction data using the resampled coefficients
#'
#' @return
#' The function returns a matrix of bootstrap predictions. The number of rows is equal to the number of rows in predictionGrid. The number of columns is equal to \code{B}. The matrix may be very large and so is stored directly into the working directory as a workspace object: '"name"predictionboot.RObj'. The object inside is called \code{bootPreds}.
#'
#'
#'
#' @export
do.bootstrap.cress.robust<-function(model.obj, predictionGrid, splineParams=NULL, g2k=NULL, B, robust=T,name=NULL, seed=12345, nCores=1, cat.message=TRUE){
require(Matrix)
require(mvtnorm)
#require(mvtnorm)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~~~~~~~~~~~ PARALLEL CODE ~~~~~~~~~~~~~~~~~~~~~~~~~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(nCores>1){
require(parallel)
if(cat.message) {cat('Code in parallel: No progress guide printed')}
computerCores <- getOption("cl.cores", detectCores())
if(nCores>(computerCores-2)){nCores<-(computerCores-2)}
myCluster <- makeCluster(nCores) ; myCluster # initialise the cluster
clusterExport(myCluster, ls(), envir=environment()) # export all objects to each cluster
# export directory and functions to each cluster
clusterEvalQ(myCluster, {
#setwd('../Dropbox/SNH/Nhats/')
#setwd(dir())
require(mrds)
require(MRSea)
require(geepack)
require(mvtnorm)
require(mgcv)
require(splines)
})
# only do parametric boostrap if no data re-sampling and no nhats provided
Routputs<-parLapply(myCluster, 1:B, function(b){
set.seed(seed + b)
# setting up CReSS related variables
attributes(model.obj$formula)$.Environment<-environment()
radii<-splineParams[[1]]$radii
d2k<-splineParams[[1]]$dist
radiusIndices<-splineParams[[1]]$radiusIndices
aR<- splineParams[[1]]$invInd[splineParams[[1]]$knotPos]
est<- coefficients(model.obj)
if(robust==T){
vbeta<-summary(model.obj)$cov.robust
samplecoeff<-NULL
try(samplecoeff<- as.numeric(rmvnorm(1,est,vbeta, method='svd')), silent = TRUE)
if(is.null(samplecoeff)){
vbeta<-as.matrix(nearPD(as.matrix(summary(model.obj)$cov.robust))$mat)
samplecoeff<- as.numeric(rmvnorm(1,est,vbeta, method='svd'))
}
}
else{
vbeta<-summary(model.obj)$cov.scaled
samplecoeff<-NULL
try(samplecoeff<- as.numeric(rmvnorm(1,est,vbeta, method='svd')), silent = TRUE)
if(is.null(samplecoeff)){
vbeta<-as.matrix(nearPD(as.matrix(summary(model.obj)$cov.scaled))$mat)
samplecoeff<- as.numeric(rmvnorm(1,est,vbeta, method='svd'))
}
}
# make predictions
dists<- g2k
rpreds<-predict.gamMRSea(newdata=predictionGrid, g2k=dists, object=model.obj, type='response', coeff=samplecoeff)
bootPreds<- rpreds
return(bootPreds)
})
stopCluster(myCluster)
bootPreds<- matrix(unlist(Routputs), ncol=B)
} # end ncores >1 loop
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~~~~~~~~~~~ SINGLE CORE CODE ~~~~~~~~~~~~~~~~~~~~~~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(nCores==1){
# object for storing the predictions
bootPreds<- matrix(NA, nrow=nrow(predictionGrid), ncol=B)
if(cat.message) {cat('Not in parallel: progress guide printed below \n')}
attributes(model.obj$formula)$.Environment<-environment()
radii<-splineParams[[1]]$radii
d2k<-splineParams[[1]]$dist
radiusIndices<-splineParams[[1]]$radiusIndices
aR<- splineParams[[1]]$invInd[splineParams[[1]]$knotPos]
for(b in 1:B){#
set.seed(seed + b)
#cat('Parametric boostrp only')
if(cat.message) {if((b/10)%%1 == 0){cat(b, '\n')}else{cat('.')}}
# sample from multivariate normal
est<- coefficients(model.obj)
if(robust==T){
vbeta<-summary(model.obj)$cov.robust
samplecoeff<-NULL
try(samplecoeff<- as.numeric(rmvnorm(1,est,vbeta, method='svd')), silent = TRUE)
if(is.null(samplecoeff)){
vbeta<-as.matrix(nearPD(as.matrix(summary(model.obj)$cov.robust))$mat)
samplecoeff<- as.numeric(rmvnorm(1,est,vbeta, method='svd'))
}
}
else{
vbeta<-summary(model.obj)$cov.scaled
samplecoeff<-NULL
try(samplecoeff<- as.numeric(rmvnorm(1,est,vbeta, method='svd')), silent = TRUE)
if(is.null(samplecoeff)){
vbeta<-as.matrix(nearPD(as.matrix(summary(model.obj)$cov.scaled))$mat)
samplecoeff<- as.numeric(rmvnorm(1,est,vbeta, method='svd'))
}
}
# make predictions
dists<- g2k
rpreds<-predict.gamMRSea(newdata=predictionGrid, g2k=dists, object=model.obj, type='response', coeff=samplecoeff)
bootPreds[,b]<- rpreds
}
} # end ncores=1 loop
# save predictions - and other data? e.g. parameter values?
#save(bootPreds, file=paste(name,"predictionboot.RData",sep=""), compress='bzip2')
return(bootPreds)
} # end of function