-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
st_as_stars for tidync #68
Comments
Better
## devtools::install_github("hypertidy/tidync@ropensci-review")
st_as_stars.tidync <- function(x, ...) {
## x is a tidync
## ignore unit details for the moment
data <- lapply(tidync::hyper_slice(x, drop = FALSE),
units::as_units)
## this needs to be a bit easier ...
transforms <- tidync:::active_axis_transforms(x)
dims <- lapply(names(transforms), function(trname) {
transform <- transforms[[trname]]
values <- transform[[trname]]
stars:::create_dimension(
values = values)
})
names(dims) <- names(transforms)
## let's fake it to see if it can work
names(dims)[1:2] <- c("x", "y")
geotransform_xy <- c(dims[[1]]$offset, dims[[1]]$delta, 0, dims[[2]]$offset, 0, dims[[2]]$delta)
dims[[1]]$geotransform <- dims[[2]]$geotransform <- geotransform_xy
structure(data, dimensions = structure(dims, class = "dimensions"), class = "stars")
}
f <- system.file("nc/reduced.nc", package = "stars")
st <- read_stars(f)
x <- tidync::tidync(f)
plot(st_as_stars(x))
f <- system.file("tests/data/test.nc4", package = "easyncdf")
x <- tidync::tidync(f) %>% tidync::activate("D3,D2,D1,D0")
plot(st_as_stars(x))
## this works
plot(st_as_stars(x %>% tidync::hyper_filter(ens = index < 5)))
## but not this
plot(st_as_stars(x %>% tidync::hyper_filter(ens = index < 5, lon = lon > 236))) |
This is right now Key problem is the extraction of metadata, and special case for values where there's only one. st_as_stars.tidync <- function(x, ...) {
## x is a tidync
## ignore unit details for the moment
data <- lapply(tidync::hyper_slice(x, drop = FALSE),
units::as_units)
## this needs to be a bit easier ...
transforms <- tidync:::active_axis_transforms(x)
dims <- lapply(names(transforms), function(trname) {
transform <- transforms[[trname]] %>% dplyr::filter(selected)
values <- transform[[trname]]
if (length(values) > 1) {
stars:::create_dimension(
values = values)
} else {
## a hack for now when there's only one value
structure(list(from = values, to = values,
offset = values, delta = NA_real_,
geotransform = rep(NA_real_, 6),
refsys = NA_character_,
point = NA,
values = NULL), class = "dimension")
}
})
names(dims) <- names(transforms)
## let's fake it to see if it can work
names(dims)[1:2] <- c("x", "y")
geotransform_xy <- c(dims[[1]]$offset, dims[[1]]$delta, 0, dims[[2]]$offset, 0, dims[[2]]$delta)
dims[[1]]$geotransform <- dims[[2]]$geotransform <- geotransform_xy
structure(data, dimensions = structure(dims, class = "dimensions"), class = "stars")
}
library(stars)
f <- raadfiles::altimetry_daily_files()$fullname[1]
x <- tidync::tidync(f)
## this works
plot(st_as_stars(x %>% tidync::hyper_filter(longitude = index > 1000,
latitude = latitude < -20)))
|
This also works f <- system.file("tests/data/test.nc4", package = "easyncdf")
x <- tidync::tidync(f) %>% tidync::activate("D3,D2,D1,D0")
plot(st_as_stars(x))
## this works
plot(st_as_stars(x %>% tidync::hyper_filter(ens = index <= 1)))
but, hyper_slice is still dropping degenerate dims so that has to stop. |
It's ok in "ropensci-review" branch, the drop = FALSE is passed down to the "collapse_degen" arg of RNetCDF. |
Do forget all content above, this is working: ## devtools::install_github("hypertidy/tidync")
st_as_stars.tidync <- function(x, ...) {
## x is a tidync
## ignore unit details
## and ... passes select_var to hyper_array
data <- lapply(tidync::hyper_array(x, drop = FALSE, ...),
units::as_units)
## this needs to be a bit easier ...
transforms <- tidync:::active_axis_transforms(x)
dims <- lapply(names(transforms), function(trname) {
transform <- transforms[[trname]] %>% dplyr::filter(selected)
values <- transform[[trname]]
if (length(values) > 1) {
stars:::create_dimension(
values = values)
} else {
## a hack for now when there's only one value
structure(list(from = values, to = values,
offset = values, delta = NA_real_,
geotransform = rep(NA_real_, 6),
refsys = NA_character_,
point = NA,
values = NULL), class = "dimension")
}
})
names(dims) <- names(transforms)
## I have to fake the first to dims to be x/y, and to work as a geotransform
## rather than give values (degenerate rectilinear) as NetCDF usually delivers
names(dims)[1:2] <- c("x", "y")
geotransform_xy <- c(dims[[1]]$offset, dims[[1]]$delta, 0, dims[[2]]$offset, 0, dims[[2]]$delta)
dims[[1]]$geotransform <- dims[[2]]$geotransform <- geotransform_xy
structure(data, dimensions = structure(dims, class = "dimensions"), class = "stars")
}
library(stars)
easyfile <- system.file("tests/data/test.nc4", package = "easyncdf")
easyx <- tidync::tidync(easy) %>% tidync::activate("D3,D2,D1,D0")
plot(st_as_stars(easyx %>% hyper_filter(ens = index == 2)))
## ftp.sltac.cls.fr/Core/SEALEVEL_GLO_PHY_L4_REP_OBSERVATIONS_008_047/dataset-duacs-rep-global-merged-allsat-phy-l4-v3/1993/dt_global_allsat_phy_l4_19930101_20170110.nc
altfile <- raadfiles::altimetry_daily_files()$fullname[1]
altx <- tidync(altfile)
## these are the cell intervals, on dimension nv
altx %>% activate("lat_bnds") %>% st_as_stars()
altx %>% activate("lon_bnds") %>% st_as_stars()
library(dplyr)
altx %>%
hyper_filter(longitude = between(longitude, 100, 155),
latitude = between(latitude, -50, -25)) %>%
st_as_stars(select_var = c("ugos", "vgos")) %>%
mutate(eke = sqrt(ugos^2 + vgos^2)) %>%
dplyr::select(eke) %>% plot(breaks = "equal", col = viridis::viridis(64))
EDIT: functionality now in master branch, and hyper_array does job of old hyper_slice |
The stars follow ups for me:
|
Yes.
NetCDF cannot do this, it's a major limitation in the API. EDIT: wrong |
Great, helpful! I've felt like there's some obscure API function I don't understand that maybe allows it. GDAL must cleverly buffer it somehow, it's pretty good at this. Do you have definitive doc or discussion somewhere about it? (Presumably it's partly why HDF5 has become the preferred lib in many modern applications? And, presumably, NetCDF4 types can be seen with HDF5 functionality that does allow this, but we're mostly seeing them through the classic interpretation). |
Whoops, I was 100% wrong. Strides are actually defined: see here. The Fortran and C APIs have this implemented, I have no idea why it is not so in |
However, the Python documentation says:
So maybe that's why. |
Hi Michael: Way at the top you state that netCDF is y-up. Actually not so, and that is one of the problems gdal has. Most netCDF files are y-up, but i can give you examples that are not, such as the Hadley SST or some VIIRS datasets. A well-constructed netCDF file should contain this info in the metadata, and the trick is to read that info, rather than assume which direction "y" goes. |
What do you mean by "y-up"? You mean increasing/decreasing y values? |
From Michael's start of this thread: geotransform is not correct, what I thought would work does not (NetCDF is y-up so GDAL corrects for this) I was assuming this means the y values are increasing, which they are in most, but not all, netCDF files (in most cases this is latitude). I can give several well-known counter examples to this. The trick to properly dealing with well-constructed netCDF files (say fully CF compliant) is not to assume about the coordinate variables, but to use the information in the file to know about them. That is the one of the benefits of self-documenting files. For quite awhile gdal had problems with any number of netcdf files (they came out flipped) because it did assume how the y-values ran. |
I know it's not generally true, it's just more likely and I tire of endless caveats when writing casually. It's just true for this example. I don't care for tools that think this can be automated anyway, ultimately it's the user interpretation that matters and tidync will try to stay out of dictating how things be used. For stars in its current state I have to make some shortcuts while I learn what's there. |
Still need to sort out curvilinear case, and there's something wrong with the object it doesn't print correctly st_as_stars.tidync <- function(x, ...) {
## x is a tidync
## ignore unit details for the moment
data <- lapply(tidync::hyper_array(x, drop = FALSE),
units::as_units)
## this needs to be a bit easier ...
transforms <- tidync:::active_axis_transforms(x)
dims <- lapply(names(transforms), function(trname) {
transform <- transforms[[trname]] %>% dplyr::filter(selected)
values <- transform[[trname]]
if (length(values) > 1) {
stars:::create_dimension(
values = values)
} else {
## a hack for now when there's only one value
structure(list(from = values, to = values,
offset = values, delta = NA_real_,
geotransform = rep(NA_real_, 6),
refsys = NA_character_,
point = NA,
values = NULL),
class = "dimension")
}
})
names(dims) <- names(transforms)
if (length(transforms)>= 2L) {
r <- structure(list(affine = c(0, 0),
dimensions = names(dims)[1:2],
curvilinear = FALSE, class = "stars_raster"))
attr(dims, "raster") <- r
}
geotransform_xy <- c(dims[[1]]$offset, dims[[1]]$delta, 0, dims[[2]]$offset, 0, dims[[2]]$delta)
dims[[1]]$geotransform <- dims[[2]]$geotransform <- geotransform_xy
structure(data, dimensions = structure(dims, class = "dimensions"),
class = "stars")
}
f <- "~/Git/rasterwise/extdata/large-mem/pp_ens_mean_0.25deg_reg_v19.0e.nc"
library(tidync)
tnc <- tidync(f)
x <- tnc %>% hyper_filter(time = index < 16) %>% st_as_stars.tidync()
library(stars)
#> Loading required package: abind
#> Loading required package: sf
#> Linking to GEOS 3.7.0, GDAL 2.4.0, PROJ 5.2.0
plot(x) Created on 2019-04-19 by the reprex package (v0.2.1) |
Here's a first stab.
Subsetting and then creating stars works, could make a nice pipeline for lazy-read from NetCDF without GDAL.
But plotting doesn't, probably the geotransform is not generate properly.
The text was updated successfully, but these errors were encountered: