-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
listdir in HttpStore and use consolidated metadata #82
Changes from all commits
92d6aed
170c5ec
e331db4
ef6697f
247d8e5
1093637
29d8787
03d4436
f5a4476
9b9b557
25cb4a8
58286b7
2fa35a1
43805d5
1c24e2d
dd54b1d
0515f8e
559cef3
b5fb774
a072824
6fd6178
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
* text=auto | ||
tests/fixtures/**/* -diff |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,4 +12,5 @@ vignettes/data/ | |
*.Rproj | ||
tests/testthat/test_data | ||
.ipynb_checkpoints/ | ||
*.ipynb | ||
*.ipynb | ||
.Renviron |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -808,13 +808,15 @@ zarr_save_array <- function(store, arr, ...) { | |
zarr_open <- function(store = NA, mode = NA, path = NA, ...) { | ||
kwargs <- list(...) | ||
|
||
store <- normalize_store_arg(store) | ||
path <- normalize_storage_path(path) | ||
|
||
if(is_na(mode)) { | ||
mode <- "a" | ||
if(inherits(store, "HttpStore")) | ||
mode <- "r" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. changes here get zarr_open working properly with HttpStore |
||
} | ||
|
||
store <- normalize_store_arg(store) | ||
path <- normalize_storage_path(path) | ||
|
||
if(mode %in% c("w", "w-", "x")) { | ||
if("shape" %in% names(kwargs)) { | ||
return(zarr_open_array(store=store, mode=mode, path=path, ...)) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -245,6 +245,9 @@ normalize_store_arg <- function(store, storage_options=NA, mode=NA) { | |
} | ||
|
||
if(is.character(store)) { | ||
if(grepl("^https?://", store)) { | ||
return(HttpStore$new(store)) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. needed to explicitly check for http / https to get HttpStores to open with convenience functions |
||
if(grepl("://", store, fixed=TRUE) || grepl("::", store, fixed=TRUE)) { | ||
# TODO: return FSStore | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ Store <- R6::R6Class("Store", | |
erasable = NULL, | ||
listable = NULL, | ||
store_version = NULL, | ||
zmetadata = NULL, | ||
#' @keywords internal | ||
listdir_from_keys = function(path) { | ||
# TODO | ||
|
@@ -115,7 +116,12 @@ Store <- R6::R6Class("Store", | |
#' @return A boolean value. | ||
contains_item = function(key) { | ||
|
||
} | ||
}, | ||
#' @description | ||
#' Get consolidated metadata if it exists. | ||
get_consolidated_metadata = function() { | ||
return(private$zmetadata) | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This just returns NULL unless the initialize function sets zmetadata to something. |
||
) | ||
) | ||
|
||
|
@@ -355,16 +361,31 @@ HttpStore <- R6::R6Class("HttpStore", | |
options = NULL, | ||
headers = NULL, | ||
client = NULL, | ||
zmetadata = NULL, | ||
mem_get = NULL, | ||
cache_time_seconds = 3600, | ||
make_request = function(item) { | ||
# Remove leading slash if necessary. | ||
if(substr(item, 1, 1) == "/") { | ||
key <- substr(item, 2, length(item)) | ||
} else { | ||
key <- item | ||
} | ||
key <- item_to_key(item) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this was used in a couple places so I move it to utils |
||
|
||
# mem_get caches in memory on a per-session basis. | ||
res <- private$mem_get(private$client, | ||
paste(private$base_path, key, sep="/")) | ||
|
||
res <- private$client$get(paste(private$base_path, key, sep="/")) | ||
return(res) | ||
}, | ||
get_zmetadata = function() { | ||
res <- private$make_request(".zmetadata") | ||
|
||
if(res$status_code == 200) { | ||
out <- tryCatch({ | ||
jsonlite::fromJSON(res$parse("UTF-8")) | ||
}, error = \(e) { | ||
warning("\n\nError parsing .zmetadata:\n\n", e) | ||
NULL | ||
}) | ||
} else out <- NULL | ||
|
||
return(out) | ||
} | ||
), | ||
public = list( | ||
|
@@ -376,8 +397,8 @@ HttpStore <- R6::R6Class("HttpStore", | |
initialize = function(url, options = NA, headers = NA) { | ||
super$initialize() | ||
# Remove trailing slash if necessary. | ||
if(substr(url, length(url), length(url)) == "/") { | ||
private$url <- substr(url, 1, length(url)-1) | ||
if(substr(url, nchar(url), nchar(url)) == "/") { | ||
private$url <- substr(url, 1, nchar(url)-1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This just stuck out as obviously wrong There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for catching this |
||
} else { | ||
private$url <- url | ||
} | ||
|
@@ -395,6 +416,11 @@ HttpStore <- R6::R6Class("HttpStore", | |
opts = private$options, | ||
headers = private$headers | ||
) | ||
|
||
private$mem_get <- memoise::memoise(function(client, path) client$get(path), | ||
~memoise::timeout(private$cache_time_seconds)) | ||
|
||
private$zmetadata <- private$get_zmetadata() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. replaces NULL with .zmetadata if it can be retrieved. |
||
}, | ||
#' @description | ||
#' Get an item from the store. | ||
|
@@ -409,8 +435,50 @@ HttpStore <- R6::R6Class("HttpStore", | |
#' @param item The item key. | ||
#' @return A boolean value. | ||
contains_item = function(item) { | ||
res <- private$make_request(item) | ||
return(res$status_code == 200) | ||
|
||
# use consolidated metadata if it exists | ||
if(!is.null(try_from_zmeta(item_to_key(item), self))) { | ||
return(TRUE) | ||
} else if(!is.null(self$get_consolidated_metadata())) { | ||
return(FALSE) | ||
} else { | ||
res <- private$make_request(item) | ||
|
||
return(res$status_code == 200) | ||
} | ||
|
||
}, | ||
#' @description | ||
#' Fetches .zmetadata from the store evaluates its names | ||
#' @return character vector of unique keys that do note start with a `.`. | ||
listdir = function() { | ||
|
||
if(!is.null(private$zmetadata)) { | ||
tryCatch({ | ||
out <- names(private$zmetadata$metadata) |> | ||
stringr::str_subset("^\\.", negate = TRUE) |> | ||
stringr::str_split("/") |> | ||
vapply(\(x) head(x, 1), "") |> | ||
unique() | ||
}, error = \(e) warning("\n\nError parsing .zmetadata:\n\n", e)) | ||
} else { | ||
out <- NULL | ||
message(".zmetadata not found for this http store. Can't listdir") | ||
} | ||
|
||
return(out) | ||
|
||
}, | ||
#' @description | ||
#' Get cache time of http requests. | ||
get_cache_time_seconds = function() { | ||
return(private$cache_time_seconds) | ||
}, | ||
#' @description | ||
#' Set cache time of http requests. | ||
#' @param seconds number of seconds until cache is invalid -- 0 for no cache | ||
set_cache_time_seconds = function(seconds) { | ||
private$cache_time_seconds <- seconds | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we support environment variables / setting / clearing with some global action? It would be nice to be able to manually modify cache behavior from way up the calling stack where you don't have access to modify the store. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like this idea - it could be left as a follow-up issue to complete or done here, either way is fine with me There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm tight on time today -- I'll open an issue to follow up on it. |
||
} | ||
) | ||
) | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -79,10 +79,18 @@ ZarrArray <- R6::R6Class("ZarrArray", | |
#' @description | ||
#' (Re)load metadata from store. | ||
load_metadata_nosync = function() { | ||
|
||
mkey <- paste0(private$key_prefix, ARRAY_META_KEY) | ||
meta_bytes <- private$store$get_item(mkey) | ||
meta <- private$store$metadata_class$decode_array_metadata(meta_bytes) | ||
|
||
meta <- try_from_zmeta(mkey, private$store) | ||
|
||
if(is.null(meta)) { | ||
meta_bytes <- private$store$get_item(mkey) | ||
meta <- private$store$metadata_class$decode_array_metadata(meta_bytes) | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Put this here since this is where a list is retrieved and the zmeta cache is already in that form. |
||
private$meta <- meta | ||
|
||
if(is.list(meta$shape)) { | ||
private$shape <- as.integer(meta$shape) | ||
} else { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -136,19 +136,34 @@ ZarrGroup <- R6::R6Class("ZarrGroup", | |
stop("ContainsArrayError(path)") | ||
} | ||
|
||
# initialize metadata | ||
meta_bytes <- tryCatch({ | ||
m_key <- paste0(private$key_prefix, GROUP_META_KEY) | ||
meta_bytes <- store$get_item(m_key) | ||
}, error = function(cond) { | ||
if(is_key_error(cond)) { | ||
stop("GroupNotFoundError(path) in Group$new") | ||
} else { | ||
stop(cond$message) | ||
} | ||
}) | ||
private$meta <- private$store$metadata_class$decode_group_metadata(meta_bytes) | ||
|
||
m_key <- paste0(private$key_prefix, GROUP_META_KEY) | ||
|
||
# use consolidated metadata if exists | ||
meta <- try_from_zmeta(m_key, store) | ||
|
||
if(!is.null(meta)) { | ||
|
||
private$meta <- meta | ||
|
||
} else { | ||
|
||
# initialize metadata | ||
meta_bytes <- tryCatch({ | ||
|
||
meta_bytes <- store$get_item(m_key) | ||
|
||
}, error = function(cond) { | ||
if(is_key_error(cond)) { | ||
stop("GroupNotFoundError(path) in Group$new") | ||
} else { | ||
stop(cond$message) | ||
} | ||
}) | ||
|
||
private$meta <- private$store$metadata_class$decode_group_metadata(meta_bytes) | ||
|
||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is mostly reindentation -- just trying to pull the info from zmeta. |
||
# setup attributes | ||
a_key <- paste0(private$key_prefix, ATTRS_KEY) | ||
private$attrs <- Attributes$new(store, key = a_key, read_only = read_only, cache = cache_attrs, synchronizer = synchronizer) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
{ | ||
"CDI": "Climate Data Interface version 1.5.6 (http://code.zmaw.de/projects/cdi)", | ||
"CDO": "Climate Data Operators version 1.5.6.1 (http://code.zmaw.de/projects/cdo)", | ||
"Conventions": "CF-1.0", | ||
"History": "Translated to CF-1.0 Conventions by Netcdf-Java CDM (CFGridWriter2)\nOriginal Dataset = bcsd_obs; Translation Date = 2019-01-03T20:03:59.756Z", | ||
"Metadata_Conventions": "Unidata Dataset Discovery v1.0", | ||
"NCO": "netCDF Operators version 4.7.6 (Homepage = http://nco.sf.net, Code = http://github.com/nco/nco)", | ||
"acknowledgment": "Maurer, E.P., A.W. Wood, J.C. Adam, D.P. Lettenmaier, and B. Nijssen, 2002, A Long-Term Hydrologically-Based Data Set of Land Surface Fluxes and States for the Conterminous United States, J. Climate 15(22), 3237-3251", | ||
"cdm_data_type": "Grid", | ||
"date_created": "2014", | ||
"date_issued": "2015-11-01", | ||
"geospatial_lat_max": 37.0625, | ||
"geospatial_lat_min": 33.0625, | ||
"geospatial_lon_max": -74.9375, | ||
"geospatial_lon_min": -84.9375, | ||
"history": "Mon Jan 7 18:59:08 2019: ncks -4 -L3 bcsd_obs_1999_two_var.nc bcsd_obs_1999_two_var.nc.comp\nThu May 08 12:07:18 2014: cdo monsum gridded_obs/daily/gridded_obs.daily.Prcp.1950.nc gridded_obs/monthly/gridded_obs.monthly.pr.1950.nc", | ||
"id": "cida.usgs.gov/bcsd_obs", | ||
"institution": "Varies, see http://gdo-dcp.ucllnl.org/downscaled_cmip_projections/", | ||
"keywords": "Atmospheric Temperature, Air Temperature Atmosphere, Precipitation, Rain, Maximum Daily Temperature, Minimum Daily Temperature", | ||
"keywords_vocabulary": "GCMD Science Keywords", | ||
"license": "Freely available", | ||
"naming_authority": "cida.usgs.gov", | ||
"processing_level": "Gridded meteorological observations", | ||
"publisher_email": "dblodgett@usgs.gov", | ||
"publisher_name": "Center for Integrated Data Analytics", | ||
"publisher_url": "https://www.cida.usgs.gov/", | ||
"summary": "These are the monthly observational data used for BCSD downscaling. See: http://gdo-dcp.ucllnl.org/downscaled_cmip_projections/dcpInterface.html#About for more information.", | ||
"time_coverage_end": "1999-12-15T00:00", | ||
"time_coverage_resolution": "P1M", | ||
"time_coverage_start": "1950-01-15T00:00", | ||
"title": "Monthly Gridded Meteorological Observations" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"zarr_format": 2 | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this ensures the cache gets cleared when DESCRIPTION changes