-
Notifications
You must be signed in to change notification settings - Fork 15
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
findHzGaps()
, fillHzGaps()
function to find / fill gaps in horizon depths
#205
Comments
Functional, tested, documented (but not optimized) |
fillHzGaps() is not yet optimized, so this option is disabled by default.
@brownag any suggestions on how to optimize |
Here is an attempt--some 3x faster on library(aqp, warn = FALSE)
#> This is aqp 1.27
# original code, slightly modified relative to master
fillHzGaps <- function(x, flag = FALSE) {
idn <- idname(x)
hzidn <- hzidname(x)
htb <- horizonDepths(x)
hznames <- horizonNames(x)
ids.top.bottom.idx <- match(c(idn, hzidn, htb), hznames)
h <- horizons(x)
hs <- split(h, h[[idn]])
h.filled <- lapply(hs, function(i) {
n <- nrow(i)
s <- 1:(n-1)
.top <- i[[htb[1]]]
.bottom <- i[[htb[2]]]
idx <- which(.bottom[s] != .top[s + 1])
if(length(idx) > 0) {
gap.top <- .bottom[idx]
gap.bottom <- .top[idx + 1]
hz.template <- i[1, ids.top.bottom.idx]
hz.template[[htb[1]]] <- gap.top
hz.template[[htb[2]]] <- gap.bottom
hz.template[[hzidn]] <- NA
res <- data.table::rbindlist(list(i, hz.template), fill = TRUE)
res <- as.data.frame(res)
return(res)
} else {
return(i)
}
})
h.filled <- do.call('rbind', h.filled)
o <- order(h.filled[[idn]], h.filled[[htb[1]]])
h.filled <- h.filled[o, ]
idx <- which(is.na(h.filled[[hzidn]]))
if(length(idx) > 0) {
m <- max(as.numeric(h[[hzidn]]), na.rm = TRUE)
s <- seq(
from = m + 1,
to = m + length(idx),
by = 1
)
if(flag) {
h.filled[['.filledGap']] <- FALSE
h.filled[['.filledGap']][idx] <- TRUE
}
h.filled[[hzidn]][idx] <- as.character(s)
}
# note: this is the right place to deal with hzid
h.filled$hzID <- as.character(1:nrow(h.filled))
replaceHorizons(x) <- aqp:::.as.data.frame.aqp(h.filled, aqp_df_class(x))
hzidname(x) <- "hzID"
return(x)
}
# new code
fillHzGaps_2 <- function(x, flag = FALSE) {
idn <- idname(x)
hzidn <- hzidname(x)
htb <- horizonDepths(x)
hznames <- horizonNames(x)
hcnames <- c(idn, hzidn, htb)
h <- horizons(x)
lead.idx <- 2:nrow(h)
lag.idx <- 1:(nrow(h) - 1)
# identify bad horizons
bad.idx <- which(h[[htb[2]]][lag.idx] != h[[htb[1]]][lead.idx]
& h[[idn]][lag.idx] == h[[idn]][lead.idx])
# create template data.frame
hz.template <- h[bad.idx, ]
# replace non-ID/depth column values with NA
hz.template[, hznames[!hznames %in% hcnames]] <- NA
# fill gaps
hz.template[[htb[1]]] <- h[[htb[2]]][bad.idx] # replace top with (overlying) bottom
hz.template[[htb[2]]] <- h[[htb[1]]][bad.idx + 1] # replace bottom with (underlying) top
# flag if needed
if (flag) {
h[['.filledGap']] <- FALSE
hz.template[['.filledGap']] <- TRUE
}
# combine original data with filled data
res <- rbind(h, hz.template)
# ID + top depth sort
res <- res[order(res[[idn]], res[[htb[1]]]),]
# re-calculate unique hzID (note: AFTER reorder)
res$hzID <- as.character(1:nrow(res))
# replace horizons (use df class in object x)
replaceHorizons(x) <- aqp:::.as.data.frame.aqp(res, aqp_df_class(x))
# use the autocalculated hzID (in case user had e.g. phiid, chiid set)
hzidname(x) <- "hzID"
return(x)
}
# create sample dataset
data(sp4)
depths(sp4) <- id ~ top + bottom
# introduce gaps
idx <- c(2, 8, 12)
sp4$top[idx] <- NA
# check
horizons(sp4)[idx, ]
#> id name top bottom K Mg Ca CEC_7 ex_Ca_to_Mg sand silt clay CF
#> 2 colusa ABt NA 8 0.2 23.7 5.6 21.4 0.23 42 31 27 0.27
#> 8 kings Bt1 NA 13 0.6 12.1 7.0 18.0 0.51 36 49 15 0.75
#> 12 mariposa Bt2 NA 34 0.3 44.3 6.2 34.1 0.14 36 33 31 0.71
#> hzID
#> 2 2
#> 8 8
#> 12 12
# remove problematic horizons
x <- HzDepthLogicSubset(sp4, byhz = TRUE)
#> dropping horizons with invalid depth logic, see `metadata(x)$removed.horizons`
# benchmark filling gaps
bench::mark(horizons(fillHzGaps(x, flag = TRUE)),
horizons(fillHzGaps_2(x, flag = TRUE)),
min_iterations = 100)
#> # A tibble: 2 x 6
#> expression min median `itr/sec` mem_alloc
#> <bch:expr> <bch:> <bch:> <dbl> <bch:byt>
#> 1 horizons(fillHzGaps(x, flag = TRUE)) 5.3ms 5.7ms 171. 667KB
#> 2 horizons(fillHzGaps_2(x, flag = TRUE)) 1.47ms 1.53ms 633. 237KB
#> # ... with 1 more variable: `gc/sec` <dbl> @dylanbeaudette I might have missed something in your logic for filling in the All we need is to assign the values in Basically your solution returns something like (using But I think it should return: Like I said, I may not have fully understood your intention... BUT if this was not intentional... I suggest we add the following test to test-fillHzGaps.R, where
If you would prefer the new filled layers to start counting from the previous See #207 for a commit with my suggested changes incorporated. |
Nice work, good idea using lag/lead index + logical vectors. I originally clobbered the |
Need a function to reliably "fill" horizon gaps in existing data or as a product of filtering on `checkHzDepthLogic(..., byhz = TRUE)
fillHzGaps()
findHzGaps()
data.table
optimizationExample using latest {aqp}.
The text was updated successfully, but these errors were encountered: