Skip to content

Commit

Permalink
Merge pull request #124 from pvanlaake/main
Browse files Browse the repository at this point in the history
Make tidync understand CF time dimension
  • Loading branch information
mdsumner committed Oct 25, 2023
2 parents 2336352 + 712f181 commit 42af413
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 38 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ Imports:
RNetCDF (>= 1.9-1),
rlang,
tibble,
tidyr
tidyr,
CFtime
RoxygenNote: 7.2.2
Suggests: ggplot2,
knitr,
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,5 @@ importFrom(rlang,.data)
importFrom(stats,setNames)
importFrom(tibble,as_tibble)
importFrom(utils,head)
importFrom(CFtime,CFtime)
importFrom(CFtime,CFtimestamp)
79 changes: 51 additions & 28 deletions R/hyper_array.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ hyper_slice <- function(x, select_var = NULL, ...,
hyper_array(x = x, select_var = select_var, ...,
raw_datavals = raw_datavals, force = force, drop = drop)
}

#' @name hyper_array
#' @export
hyper_array.tidync <- function(x, select_var = NULL, ...,
Expand All @@ -84,7 +85,7 @@ hyper_array.tidync <- function(x, select_var = NULL, ...,
## hack to get the order of the indices of the dimension
ordhack <- 1 + as.integer(unlist(strsplit(gsub("D", "",
dplyr::filter(x$grid, .data$grid == active(x)) %>%
dplyr::slice(1L) %>%
# dplyr::slice(1L) %>% THERE'S ONLY EVER ONE ACTIVE GRID
dplyr::pull(.data$grid)), ",")))
dimension <- x[["dimension"]] %>% dplyr::slice(ordhack)
## ensure dimension is in order of the dims in these vars
Expand All @@ -110,21 +111,6 @@ hyper_array.tidync <- function(x, select_var = NULL, ...,
varnames <- select_var
}

## naughty internal function using scope for
## x, START, COUNT, con, raw_datavals, drop
get_vara <- function(vara) {
## issue #119
suppressWarnings(con <- ncdf4::nc_open(x$source$source[1]))
on.exit(ncdf4::nc_close(con), add = TRUE)
ncdf4::ncvar_get(con, vara,
start = START, count = COUNT,
raw_datavals = raw_datavals, collapse_degen = drop)
}
mess <- sprintf("pretty big extraction, (%i*%i values [%s]*%i",
as.integer(prod( COUNT)), length(varnames),
paste( COUNT, collapse = ", "),
length(varnames))

#browser()
opt <- getOption("tidync.large.data.check")
if (!isTRUE(opt)) {
Expand All @@ -134,31 +120,68 @@ hyper_array.tidync <- function(x, select_var = NULL, ...,
interactive() && !force) {
message("please confirm data extraction, Y(es) to proceed ... use 'force = TRUE' to avoid size check\n ( see '?hyper_array')")

mess <- sprintf("pretty big extraction, (%i*%i values [%s]*%i)",
as.integer(prod( COUNT)), length(varnames),
paste( COUNT, collapse = ", "),
length(varnames))
yes <- utils::askYesNo(mess)
if (!yes) {
stop("extraction cancelled by user", call. = FALSE)
## return(invisible(NULL))
}
}
transforms <- active_axis_transforms(x)
datalist <- lapply(varnames, get_vara)


## which of the variables for read are NC_CHAR? (they have to be split)
charvars <- variable$type[match(varnames, variable$name)] == "NC_CHAR"
if (any(charvars)) {
idx <- which(charvars)
for (i in seq_along(idx)) {

ii <- idx[i]
datalist[[ii]] <- array(unlist(strsplit(datalist[[ii]], "")),
dimension$count)
## Avoid opening file on disk multiple times for multiple variables
con <- suppressWarnings(ncdf4::nc_open(x$source$source[1]))
on.exit(ncdf4::nc_close(con), add = TRUE)
datalist <- lapply(varnames, function(vara) {
ncdf4::ncvar_get(con, vara, start = START, count = COUNT,
raw_datavals = raw_datavals, collapse_degen = FALSE)
})

## Get dimension names from the transforms. Use "timestamp" instead of "time"
transforms <- active_axis_transforms(x)
dn <- lapply(transforms, function(trans) {
ts <- suppressWarnings(trans[["timestamp"]])
if (is.null(ts)) trans[[1]][trans$selected] else ts[trans$selected]
})

## If some (but not all) of the variables defined on the grid are NC_CHAR then
## the NC_CHAR variables read here have to be split into characters to
## maintain consistent dimensionality with arrays of other data types
## (disregarding the esoteric possibility that a grid is used both for numeric
## data and for some text application).
## If all variables defined on the grid are NC_CHAR then don't split the read
## variables here but drop the first dimension from dn before applying
## dimnames. This is related to how NC_CHAR data is stored in NetCDF files.
## The result is the string array as read directly from the file, with reduced
## array dimensions.
grid_vars <- unlist(x$grid$variables[which(x$grid$grid == active(x))])
var_dt <- x$variable$type[which(x$variable$name %in% grid_vars)]
if (all(var_dt == "NC_CHAR")) dn <- dn[-1]
else if (any(var_dt == "NC_CHAR")) {
char_vars <- variable$type[match(varnames, variable$name)] == "NC_CHAR"
if (any(char_vars)) {
idx <- which(char_vars)
for (i in seq_along(idx)) {
ii <- idx[i]
datalist[[ii]] <- array(unlist(strsplit(datalist[[ii]], "")),
dimension$count)
}
}
}

## Apply dimnames
datalist <- lapply(datalist, function(d) {dimnames(d) <- dn; d})

## Drop any degenerate dimensions, if requested and needed
if (drop && any(lengths(dn) == 1)) datalist <- lapply(datalist, drop)

structure(datalist, names = varnames,
transforms = transforms,
source = x$source, class = "tidync_data")
}

#' @name hyper_array
#' @export
hyper_array.character <- function(x, select_var = NULL, ...,
Expand Down
14 changes: 6 additions & 8 deletions R/hyper_tibble.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,15 @@ hyper_tibble.tidync<- function(x, ..., na.rm = TRUE, force = FALSE) {
if (na.rm) all_na <- Reduce(`&`, lapply(slabs,
function(a) is.na(as.vector(a))))
total_prod <- prod(dim(slabs[[1]]))
out <- tibble::as_tibble(lapply(slabs, as.vector))
out <- tibble::as_tibble(lapply(slabs, as.vector))

prod_dims <- 1
trans <- attr(slabs, "transforms")
dn <- dimnames(slabs[[1]])
nm <- names(dn)

for (i in seq_along(trans)) {
nm <- names(trans)[i]
nr <- sum(trans[[i]]$selected)
out[[nm]] <- rep(dplyr::filter(trans[[nm]], .data$selected)[[nm]],
each = prod_dims, length.out = total_prod)
prod_dims <- prod_dims * nr
for (i in seq_along(nm)) {
out[[nm[i]]] <- rep(dn[[i]], each = prod_dims, length.out = total_prod)
prod_dims <- prod_dims * length(dn[[i]])
}
if (na.rm) out <- dplyr::filter(out, !all_na)
out
Expand Down
20 changes: 20 additions & 0 deletions R/hyper_transforms.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,16 +68,36 @@ hyper_transforms.default <- function(x, all = FALSE, ...) {
transforms <- vector("list", nrow(dims))
names(transforms) <- dims$name

all_atts <- mutate(x$attribute, low_name = tolower(.data$name))

for (i in seq_along(transforms)) {
ll <- list(value = ifelse(rep(dims$coord_dim[i], dims$length[i]),
nc_get(source$source, dims$name[i]), seq_len(dims$length[i])))
axis <- tibble::as_tibble(ll)
names(axis) <- dims$name[i]

## axis might have a column called "i"
## tidync/issues/74
id_value <- dims$dimension[i]
dim_name <- dims$name[i]
dim_coord <- dims$coord_dim[i]

## Add timestamp for any "time" dimension. Since not all files have a
## "calendar" attribute or "axis == "T"", just try to create a CFtime
## instance from the "units" attribute and a "calendar" if present
## tidync/issues/54
dim_atts <- all_atts %>% dplyr::filter(.data$variable == dim_name)
units <- unlist(dim_atts$value[which(dim_atts$low_name == "units")])
if (!(is.null(units))) {
cal_idx <- which(dim_atts$low_name == "calendar")
if (length(cal_idx) == 0) calendar <- "standard"
else calendar <- unlist(dim_atts$value[cal_idx])
try({
cft <- CFtime::CFtime(units, calendar, axis[[1]])
axis$timestamp = CFtime::CFtimestamp(cft)
}, silent = TRUE)
}

axis <- mutate(axis,
index = row_number(),
id = id_value,
Expand Down
3 changes: 2 additions & 1 deletion R/tidync.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ tidync.character <- function(x, what, ...) {
axis = meta$axis,
grid = meta$grid,
dimension = meta$dimension,
variable = variable)
variable = variable,
attribute = meta$attribute)
out$transforms <- hyper_transforms(out, all = TRUE)

out <- structure(out, class = "tidync")
Expand Down

0 comments on commit 42af413

Please sign in to comment.