-
Notifications
You must be signed in to change notification settings - Fork 3
/
ALFAM2mod.R
283 lines (233 loc) · 9.88 KB
/
ALFAM2mod.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# Function for running the model
# Use group = character name of column in dat to run by group
# time.incorp is name of column with incorporation time. Only first value (in each group) is used.
# group is name of group column, app.name is name of total pool (a0 + u0) column
ALFAM2mod <- function(
dat,
pars = c(
int0 = -0.91400,
int1 = -1.16256,
int2 = -1.02444,
int3 = -2.92947,
app.methodos0 = -0.98384,
app.rate0 = -0.01602,
man.dm0 = 0.40164,
incorpdeep5 = -3.08108,
incorpshallow5 = -0.91376,
app.methodbc1 = 0.62870,
man.dm1 = -0.07974,
air.temp1 = 0.04909,
wind.1m1 = 0.04876,
air.temp3 = 0.01344,
incorpdeep3 = -0.74621,
app.methodos3 = -0.20088,
rain.rate2 = 0.38434
),
app.name = 'TAN.app',
time.name = 'ct',
time.incorp = NULL, # NULL with no incorporation, otherwise numeric or column name. If column name value should be NA for no incorporation (w groups)
group = NULL,
center = TRUE,
cmns = c(app.rate = 40,
man.dm = 6,
man.tan = 1.2,
man.ph = 7.5,
air.temp = 13,
wind.1m = 2.7,
lwind = 0.43,
crop.z = 10),
check.NA = TRUE,
pass.col = NULL,
add.incorp.rows = FALSE,
parallel = FALSE,
n.cpus = 1
) {
#### NTS: not package-ready
if(parallel) {
requireNamespace("parallel")
}
#print(pars)
# NTS: Work needed here.
# Add checks for all arguments
checkArgClassValue(dat, expected.class = 'data.frame')
checkArgClassValue(pars, expected.class = c('numeric', 'list'))
checkArgClassValue(time.incorp, expected.class = c('character', 'numeric', 'integer', 'NULL'))
# If pars was given as list, change to vector
if(is.list(pars)) {
pars <- unlist(pars)
}
if(any(ch_nms <- grepl("[fr]{1}[0-5]{1}[.]",names(pars)))){
names(pars)[ch_nms] <- gsub("^[fr]([0-5])[.](.*)","\\2\\1",names(pars)[ch_nms])
}
# Check that all names for p end with a number
if(any(!grepl('[0-9]$', names(pars)))) stop('One or more entries in argument "pars" cannot be assigned to parameters f0, r1, r2, r3, r4, r5.\n Make sure that the naming is correct. Either append the corresponding number (0 to 5) at the name endings (e.g. int0)\n or prepend the parameter separated by a dot (e.g. f0.int) or provide an appropriately named list as argument.')
# Check predictor names to make sure they don't match reserved names (group, incorporation, etc.)
# -> possibly extend names as done below?
# Rename pass-through column if pass-through requested
if(!is.null(pass.col)) {
dat[,paste0("pass_me.through_",pass.col)] <- dat[,pass.col]
}
# If there is no grouping variable, add one to simplify code below (only one set, for groups)
if(is.null(group)) {
dat$group <- 0
} else {
dat$group <- as.character(dat[,group])
}
# Center numeric predictors
if(center) {
# get columns that will be centered
c_cols <- names(cmns)[names(cmns) %in% names(dat)]
# center
if(length(c_cols)) dat[,c_cols] <- sweep(dat[,c_cols],2,cmns[c_cols])
}
# Original order (for sorting before return)
dat$orig.order <- 1:nrow(dat)
# Extend dat data frame with incorporation time if needed
dat$ievent <- dat$added.row <- FALSE # NTS: problem if dat already has column with this name
if(!is.null(time.incorp)) {
# Add numeric time.incorp to data frame dat (column needed to handle groups)
if(is.numeric(time.incorp)) {
dat$time.incorp <- time.incorp
time.incorp <- 'time.incorp' # NTS: really a name, change arg name to t.incorp.name?
}
# Add incorporation times -> hac: how about using a vector as input argument: e.g. c(group1=4, group2=2,...)
# or even a list: e.g. list(group1=c("deep",4),group2=c("shallow",10))
if(!is.null(time.incorp)) {
for(i in sort(unique(dat$group))) {
dd <- dat[dat$group == i, ]
tt <- dd[1, time.incorp]
if(!is.na(tt)) {
# If exact time is already present, no need to add row
if(!tt %in% dd[, time.name]) {
irow <- dd[1, ]
irow$added.row <- TRUE
irow[, time.name] <- tt
dat <- rbind(dat, irow)
}
# Identify time of incorporation event
dat[dat$group == i & dat[, time.name] == tt, 'ievent'] <- TRUE
}
}
}
}
# Sort (time must increase for calcEmis())
dat <- dat[order(dat$group, dat[, time.name]), ]
# Drop parameters for missing predictors
p.orig <- pars
ppnames <- gsub('[0-9]$', '', names(pars))
pars <- pars[predpres <- ppnames %in% names(dat) | ppnames == 'int']
if(any(!predpres)) {
warning('Missing predictors. These secondary parameters have been dropped: ', paste(names(p.orig)[!predpres], collapse = ', '))
}
# Associate (secondary) parameters with primary parameters (r1, etc.)
which0 <- grep('0$', names(pars)) # For f0
which1 <- grep('1$', names(pars)) # For r1
which2 <- grep('2$', names(pars)) # For r2
which3 <- grep('3$', names(pars)) # For r3
which5 <- grep('5$', names(pars)) # For a to u transfer at specific times, incorporation, will be applied once only!
names(pars) <- gsub('[0-9]$', '', names(pars))
if(!all(ww <- sort(c(which0, which1, which2, which3, which5)) == 1:length(pars))) {
stop('Something wrong with p. ', paste(ww, collapse = ', '))
}
# Make sure parameter names can be found in dat
if(any(ncheck <- !(names(pars) %in% c('int', names(dat))))) stop ('Names in parameter vector pars not in dat (or not "int"): ', paste(names(pars)[ncheck], collapse = ', '))
# Calculate primary parameters (zero by default)
zv <- rep(0, nrow(dat))
if(length(which0) > 0) f0 <- calcPParms(pars[which0], dat, tr = 'logistic') else f0 <- zv
if(length(which1) > 0) r1 <- calcPParms(pars[which1], dat) else r1 <- zv
if(length(which2) > 0) r2 <- calcPParms(pars[which2], dat) else r2 <- zv
if(length(which3) > 0) r3 <- calcPParms(pars[which3], dat) else r3 <- zv
if(length(which5) > 0) f5 <- calcPParms(pars[which5], dat, tr = 'logistic') else f5 <- zv
# f5 only applies when incorporation occurs (0 or 1 time per group), otherwise, 100% stays in f/a
f5[!dat$ievent] <- 1
if(check.NA) if(any(is.na(c(f0, r1, r2, r3, f5)))) {
cat('Missing values in predictors:\n')
print(apply(dat[, unique(names(pars[!grepl('^int', names(pars))]))], 2, function(x) sum(is.na(x))))
stop('NA values in primary parameters. Look for missing values in predictor variables (in dat) and double-check parameters agaist dat column names')
}
# After calculating f5, set incorporation predictor variables to FALSE for times before incorporation occurred
if(!is.null(time.incorp)) {
for(i in sort(unique(dat$group))) {
dd <- dat[dat$group == i, ]
tt <- dd[1, time.incorp]
if(!is.na(tt)) {
# NTS: problematic
dat[dat$group == i & dat[, time.name] <= tt, grepl('incorp', names(dat)) & names(dat) %in% gsub('[0-9]$', '', names(pars))] <- FALSE
} else {
# NTS: does this really fix problem wehn there is no incorp?
f5[dat$group == i] <- 1
}
}
}
# ToDo:
# - clean above for loop and f0 r1 etc parameters
# keep incorp rows?
if(add.incorp.rows){
dat[,"added.row"] <- rep(FALSE, nrow(dat))
}
s.dat <- split(cbind(dat,"__f0"=f0,"__r1"=r1,"__r2"=r2,"__r3"=r3,"__f5"=f5),dat$group)
# Not parallel
if(parallel) {
# starting cluster and trigger stop on.exit
cl <- parallel::makeCluster(n.cpus,type="SOCK")
on.exit(parallel::stopCluster(cl))
# sorting input for efficiency
s.nr <- sapply(s.dat,nrow)
do.nr <- order(s.nr,decreasing=TRUE)
e.list <- vector("list",length(s.dat))
# do parallel
# parallel::clusterExport(cl,c("calcEmis","time.name","app.name"))
e.list[do.nr] <- parallel::clusterApply(cl,s.dat[do.nr],function(sub.dat){
data.frame(group = sub.dat[!sub.dat$added.row,"group"], calcEmis(
ct = sub.dat[, time.name]
# Calculate a0 and u0 (f5 transfers done in calcEmis())
,a0 = sub.dat[1,"__f0"]*sub.dat[1, app.name]
,u0 = (1 - sub.dat[1,"__f0"])*sub.dat[1, app.name]
,r1 = sub.dat[,"__r1"]
,r2 = sub.dat[,"__r2"]
,r3 = sub.dat[,"__r3"]
,f5 = sub.dat[,"__f5"]
,ievent = sub.dat$ievent
,drop.rows = sub.dat$added.row)
, row.names = NULL, check.names = FALSE)
})
# stop cluster and empty on.exit
parallel::stopCluster(cl)
on.exit()
} else {
e.list <- vector("list",length(s.dat))
for(i in seq_along(s.dat)) {
# get subset
sub.dat <- s.dat[[i]]
# Check for duplicate ct
if(any(duplicated(sub.dat[, time.name]))) {
stop('Look for 998123b in pmod.R. Duplicated ct values.')
}
# calculate emission
ce <- calcEmis(
ct = sub.dat[, time.name]
# Calculate a0 and u0 (f5 transfers done in calcEmis())
,a0 = sub.dat[1,"__f0"]*sub.dat[1, app.name]
,u0 = (1 - sub.dat[1,"__f0"])*sub.dat[1, app.name]
,r1 = sub.dat[,"__r1"]
,r2 = sub.dat[,"__r2"]
,r3 = sub.dat[,"__r3"]
,f5 = sub.dat[,"__f5"], ievent = sub.dat$ievent, drop.rows = sub.dat$added.row)
# add group
e.list[[i]] <- data.frame(group = sub.dat[!sub.dat$added.row,"group"], ce, row.names = NULL, check.names = FALSE)
}
}
# rbind e.list to data.frame
e <- do.call("rbind",e.list)
# rename 'group' column
if(!is.null(group)){
names(e)[1] <- group
}
# Sort to match original order
e <- e[order(dat$orig.order[!dat$added.row]), ]
# Add pass-through column if requested
if(!is.null(pass.col)) {
e <- data.frame(setNames(dat[!dat$added.row, paste0("pass_me.through_",pass.col)],pass.col), e)
}
return(e)
}