Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

convert to RNetCDF and bring in crs handling fixes #87 #88

Closed
wants to merge 8 commits into from
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Imports:
methods,
parallel,
classInt (>= 0.2-2),
ncdf4,
RNetCDF,
ncmeta (>= 0.0.3),
rlang,
units
Expand Down
5 changes: 3 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,9 @@ export(st_warp)
export(st_xy2sfc)
import(sf)
import(units)
importFrom(RNetCDF,close.nc)
importFrom(RNetCDF,open.nc)
importFrom(RNetCDF,var.get.nc)
importFrom(abind,abind)
importFrom(abind,adrop)
importFrom(abind,asub)
Expand All @@ -106,8 +109,6 @@ importFrom(methods,as)
importFrom(methods,new)
importFrom(methods,slot)
importFrom(methods,slotNames)
importFrom(ncdf4,nc_open)
importFrom(ncdf4,ncvar_get)
importFrom(ncmeta,nc_meta)
importFrom(parallel,parApply)
importFrom(rlang,"%||%")
Expand Down
153 changes: 113 additions & 40 deletions R/ncdf.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#' @importFrom ncdf4 nc_open ncvar_get
#' @importFrom RNetCDF open.nc var.get.nc close.nc
#' @importFrom ncmeta nc_meta
#' @importFrom stats setNames
NULL
Expand All @@ -10,15 +10,21 @@ NULL
}


#' read ncdf file into stars object
#' Read NetCDF file into stars object
#'
#' Read data from a file using the NetCDF library directly.
#' Read data from a file or URL using the RNetCDF library directly.
#'
#' The following logic is applied to coordinate axes. If any coordinate axes have
#' regularly spaced coordinates they are reduced to the
#' The following logic is applied to coordinate variables. If any dimensions have
#' regularly spaced coordinate variables they are reduced to the
#' offset/delta form with 'affine = c(0, 0)', otherwise the values of the coordinates
#' are stored. If the data has two or more dimensions and the first two are regular
#' are stored and used to define an irregular rectilinear grid.
#'
#' If the data has two or more dimensions and the first two are regular
#' they are nominated as the 'raster' for plotting.
#'
#' If a grid_mapping is found in the NetCDF file, an attempt to convert it to a
#' projection is made. If no grid_mapping is found, WGS84 lat/lon is assumped.
#'
#' @examples
#' f <- system.file("nc/reduced.nc", package = "stars")
#' read_ncdf(f)
Expand All @@ -29,68 +35,106 @@ NULL
#' @param .x NetCDF file or source
#' @param ... ignored
#' @param var variable name or names (they must be on matching grids)
#' @param ncsub matrix of start, count columns
#' @param ncsub matrix of start, count columns. NA count gets all. Axis order must match that of the variable that will be requested.
#' @param curvilinear length two character vector with names of subdatasets holding longitude and latitude values for all raster cells.
#' @details
#' If `var` is not set the first set of variables on a shared grid is used.
#' It's supposed to be the grid with the most dimensions, but there's no control
#' yet (see `ncmeta::nc_grids(.x)` for the current assumption).
#' If `var` is not set, variables that are not coordinate variables are used.
#' Data sources with non-coordinate variables that use different axes or axis orders are not supported.
#'
#' \code{start} and \code{count} columns of ncsub must correspond to the variable dimemsion (nrows)
#' and be valid index using `ncdf4::ncvar_get` convention (start is 1-based).
#' and be valid index using `RNetCDF::var.get.nc` convention (start is 1-based).
#' @export
read_ncdf = function(.x, ..., var = NULL, ncsub = NULL, curvilinear = character(0)) {
meta = ncmeta::nc_meta(.x)
# Don't want scalar
# todo handle choice of grid
nas <- is.na(meta$axis$dimension)
if (any(nas)) meta$axis$dimension[nas] <- -1

coord_vars <- ncmeta::nc_coord_var(.x) # coord_vars as determined by metadata/attributes

cds <- check_cds(coord_vars, meta) # eliminates 2d coord vars when 1d vars exist.

coord_vars <- dplyr::filter(coord_vars,
apply(coord_vars[, c("X", "Y", "Z", "T")], 1,
function(x) any(x %in% cds$variable)))

if (is.null(var)) {
ix <- 1
if (meta$grid$grid[ix] == "S") {
ix <- which(!meta$grid$grid == "S")[1L]

if (length(ix) < 1) stop("only scalar variables found, not yet supported")
}
var = meta$grid$variable[meta$grid$grid[ix] == meta$grid$grid]
var <- meta$variable$name[!meta$variable$name %in% cds$variable &
!meta$variable$name %in% coord_vars$bounds &
meta$variable$ndims > 1]
if(any(meta$grid$grid == "S")) var <- var[var != meta$grid$variable[meta$grid$grid == "S"]]
}
##
dims_index = meta$axis$dimension[meta$axis$variable == var[1L]]

nas <- is.na(meta$axis$dimension)
if (any(nas)) meta$axis$dimension[nas] <- -1

# This ensures that dims and ncsub are valid for all variables we are looking at.
dims_index <- unique(lapply(var, function(.v) meta$axis$dimension[meta$axis$variable == .v]))
dims_index <- dims_index[sapply(dims_index, function(x) all(x != -1))]

if(length(dims_index) > 1) stop("Variables with different axis orders found, select one to continue.") #nocov

dims_index <- dims_index[[1]] # dims_index is in the axis order used by all data variables
dims = meta$dimension[match(dims_index, meta$dimension$id), ]

coord_var_axis <- coord_vars[!coord_vars$variable %in% var, ]
coord_vars <- coord_vars[coord_vars$variable %in% var, ] # only data variables

XYZT_dim <- suppressWarnings(sapply(c("X", "Y", "Z", "T"),
function(axis)
meta$axis[meta$axis$variable ==
coord_var_axis[[axis]][!is.na(coord_var_axis[[axis]])], ]$dimension))
XYZT_dim <- XYZT_dim[lengths(XYZT_dim) > 0]

reorder_var <- c()
if(all(!dims$id == as.numeric(XYZT_dim))) {
warning("Found non-canonical axis order in NetCDF unexpected bahavior may result.")
reorder_var <- match(dims$id, as.numeric(XYZT_dim))
}

## need to validate existing ncsub here
if (is.null(ncsub)) {
ncsub = cbind(start = 1, count = dims$length)
rownames(ncsub) = dims$name
if(prod(ncsub[, "count"]) * length(var) > 1e+08) stop("very large data requested, are you sure?")
} else {
## needs more attention
if (nrow(dims) != nrow(ncsub)) stop("input ncsub doesn't match available dims") # nocov
if (any(ncsub[, "start"] < 1) || any((ncsub[, "count"] - ncsub[, "start"] + 1) > dims$length)) stop("start or count out of bounds") # nocov
not_na <- !is.na(ncsub[, "count"])
if (any(ncsub[, "start"] < 1) ||
any((ncsub[, "count"][not_na] -
ncsub[, "start"][not_na] + 1) > dims$length)) stop("start or count out of bounds") # nocov
}

nc = ncdf4::nc_open(.x, suppress_dimvals = TRUE)
on.exit(ncdf4::nc_close(nc), add = TRUE)
out = lapply(var, function(.v) ncdf4::ncvar_get(nc,
varid = .v,
start = ncsub[, "start", drop = TRUE],
count = ncsub[, "count", drop = TRUE],
collapse_degen = FALSE))
nc = RNetCDF::open.nc(.x)
on.exit(RNetCDF::close.nc(nc), add = TRUE)
out = lapply(var, function(.v) RNetCDF::var.get.nc(nc,
variable = .v,
start = ncsub[, "start", drop = TRUE],
count = ncsub[, "count", drop = TRUE],
collapse = FALSE,
unpack = TRUE))
out = setNames(out, var)
## cannot assume we have coord dims
## - so create them as 1:length if needed

# If any counts are NA to get all this sets them to true size.
# Given assumptions above, this must be true.
ncsub[, "count"] <- dim(out[[1]])

## cannot assume we have 1d coord variables
## - so create them as 1:length(dim) if needed
coords = setNames(vector("list", length(dims$name)), dims$name)

for (ic in seq_along(coords)) {
if (!is.null(ncsub)) {
subidx <- seq(ncsub[ic, "start"], length = ncsub[ic, "count"])
} else {
subidx <- seq_len(length(coords[[ic]]))
}
## create_dimvar means we can var_get it
if (nc$dim[[dims$name[ic]]]$create_dimvar) {
coords[[ic]] <- ncdf4::ncvar_get(nc, varid = dims$name[ic])[subidx]

## if there is a coordinate variable that matches the dimension name
if (dims$name[ic] %in% c(coord_vars$X, coord_vars$Y, coord_vars$Z, coord_vars$T)) {
coords[[ic]] <- RNetCDF::var.get.nc(nc, variable = dims$name[ic])[subidx] # could use start/count here too?
# otherwise coordinate variables are bound to variables and need to be handled differently.
} else {
coords[[ic]] <- subidx##seq_len(dims$length[ic])
coords[[ic]] <- subidx
}
}

Expand Down Expand Up @@ -127,14 +171,43 @@ read_ncdf = function(.x, ..., var = NULL, ncsub = NULL, curvilinear = character(

ret = st_stars(out, dimensions)
if (length(curvilinear) == 2) {
curvi_coords = lapply(curvilinear, function(.v) ncdf4::ncvar_get(nc,
varid = .v,
curvi_coords = lapply(curvilinear, function(.v) RNetCDF::var.get.nc(nc,
variable = .v,
## note there subtle subsetting into x,y
start = ncsub[1:2, "start", drop = TRUE],
count = ncsub[1:2, "count", drop = TRUE],
collapse_degen = FALSE))
collapse = FALSE, unpack = TRUE))
names(curvi_coords) <- c("x", "y")
ret = st_as_stars(ret, curvilinear = curvi_coords)
}

try({
gm_atts <- ncmeta::nc_grid_mapping_atts(.x)

if(length(unique(gm_atts$name[gm_atts$name == "grid_mapping_name"])) > 1) {
warning("Multiple grid mappings found for this NetCDF source. Not setting CRS.")
}

if("data_variable" %in% names(gm_atts)) {
gm_atts <- gm_atts[gm_atts$data_variable == gm_atts$data_variable[1],
c("name", "value")]
}

sf::st_crs(ret) <- sf::st_crs(ncmeta::nc_gm_to_prj(gm_atts))
})

ret
}

check_cds <- function(coord_vars, meta) {
cds <- unique(as.vector(unlist(coord_vars[, c("X", "Y", "Z", "T")])))
cds <- cds[!is.na(cds)]
cds <- tibble::tibble(variable = cds) %>%
dplyr::left_join(coord_vars, by = c("variable"))
if(sum(!is.na(cds$X)) > 1 | sum(!is.na(cds$Y)) > 1 | sum(!is.na(cds$Z)) > 1 | sum(!is.na(cds$T)) > 1)
cds <- dplyr::filter(cds, variable %in% X |
variable %in% Y |
variable %in% Z |
variable %in% T)
return(cds)
}
Binary file added inst/nc/EURO-CORDEX_81_DOMAIN000.nc
Binary file not shown.
Binary file added inst/nc/bcsd_obs_1999.nc
Binary file not shown.
Binary file added inst/nc/bcsd_obs_1999_borked.nc
Binary file not shown.
24 changes: 14 additions & 10 deletions man/read_ncdf.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 0 additions & 6 deletions tests/netcdf.R

This file was deleted.

75 changes: 0 additions & 75 deletions tests/netcdf.Rout.save

This file was deleted.

Loading