Skip to content

Commit

Permalink
using hoardr for cache path setup, rework pg_data to use hoardr object
Browse files Browse the repository at this point in the history
  • Loading branch information
sckott committed Sep 18, 2018
1 parent a2278df commit caa3cb3
Show file tree
Hide file tree
Showing 12 changed files with 176 additions and 66 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Description: Tools to interact with the 'Pangaea' Database
(<https://www.pangaea.de>), including functions for searching for data,
fetching 'datasets' by 'dataset' 'ID', and working with the 'Pangaea'
'OAI-PMH' service.
Version: 0.6.1.9100
Version: 0.6.2.9100
Authors@R: c(person("Scott", "Chamberlain",
email = "myrmecocystus@gmail.com", role = c("aut","cre")),
person("Kara", "Woo", email = "woo.kara@gmail.com", role = "aut"),
Expand All @@ -22,10 +22,10 @@ Language: en-US
Imports:
crul (>= 0.4.0),
jsonlite (>= 1.5),
rappdirs (>= 0.3.1),
xml2 (>= 1.1.1),
oai (>= 0.2.2),
tibble (>= 1.1),
hoardr (>= 0.2.0),
png
Suggests:
knitr,
Expand Down
3 changes: 2 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

S3method(print,pangaea)
S3method(print,pg_identify)
export(pg_cache)
export(pg_cache_clear)
export(pg_cache_list)
export(pg_data)
Expand All @@ -14,12 +15,12 @@ export(pg_list_sets)
export(pg_search)
export(pg_search_es)
importFrom(crul,HttpClient)
importFrom(hoardr,hoard)
importFrom(oai,get_records)
importFrom(oai,id)
importFrom(oai,list_identifiers)
importFrom(oai,list_metadataformats)
importFrom(oai,list_sets)
importFrom(rappdirs,user_cache_dir)
importFrom(tibble,as_data_frame)
importFrom(xml2,read_html)
importFrom(xml2,xml_attr)
Expand Down
51 changes: 45 additions & 6 deletions R/caching.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,45 @@
env <- new.env(parent = emptyenv())

.onLoad <- function(libname, pkgname) {
path <- rappdirs::user_cache_dir("pangaear")
env$path <- path
}
#' @title Caching
#'
#' @description Manage cached `pangaear` files with \pkg{hoardr}
#'
#' @export
#' @name pg_cache
#'
#' @details The dafault cache directory is
#' `paste0(rappdirs::user_cache_dir(), "/R/pangaear")`, but you can set
#' your own path using `cache_path_set()`
#'
#' `cache_delete` only accepts 1 file name, while
#' `cache_delete_all` doesn't accept any names, but deletes all files.
#' For deleting many specific files, use `cache_delete` in a [lapply()]
#' type call
#'
#' @section Useful user functions:
#' \itemize{
#' \item `pg_cache$cache_path_get()` get cache path
#' \item `pg_cache$cache_path_set()` set cache path
#' \item `pg_cache$list()` returns a character vector of full
#' path file names
#' \item `pg_cache$files()` returns file objects with metadata
#' \item `pg_cache$details()` returns files with details
#' \item `pg_cache$delete()` delete specific files
#' \item `pg_cache$delete_all()` delete all files, returns nothing
#' }
#'
#' @examples \dontrun{
#' pg_cache
#'
#' # list files in cache
#' pg_cache$list()
#'
#' # delete certain database files
#' # pg_cache$delete("file path")
#' # pg_cache$list()
#'
#' # delete all files in cache
#' # pg_cache$delete_all()
#' # pg_cache$list()
#'
#' # set a different cache path from the default
#' }
NULL
8 changes: 8 additions & 0 deletions R/onload.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# env <- new.env(parent = emptyenv())
pg_cache <- NULL # nocov start

.onLoad <- function(libname, pkgname) {
x <- hoardr::hoard()
x$cache_path_set("pangaear")
pg_cache <<- x
} # nocov start
2 changes: 1 addition & 1 deletion R/pangaear-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#' @importFrom xml2 read_html xml_find_all xml_attr xml_text xml_find_first
#' xml_parent
#' @importFrom tibble as_data_frame
#' @importFrom rappdirs user_cache_dir
#' @importFrom hoardr hoard
#' @name pangaear-package
#' @aliases pangaear
#' @docType package
Expand Down
26 changes: 9 additions & 17 deletions R/pg_cache.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,13 @@
#' cache path clear
#' @export
#' @rdname pg_data
pg_cache_clear <- function(doi = NULL, prompt = TRUE) {
if (is.null(doi)) {
files <- list.files(env$path, full.names = TRUE)
resp <- if (prompt) {
readline(sprintf("Sure you want to clear all %s files? [y/n]: ",
length(files)))
} else {
"y"
}
if (resp == "y") unlink(files, force = TRUE) else NULL
} else {
files <- file.path(env$path, rdoi(doi))
unlink(files, force = TRUE)
}
#' @rdname pg_cache_clear-defunct
#' @param ... ignored
pg_cache_clear <- function(...) {
.Defunct(msg = "defunct, see pg_cache$delete() and pg_cache$delete_all()")
}

#' cache list
#' @export
#' @rdname pg_data
pg_cache_list <- function() list.files(env$path)
#' @rdname pg_cache_list-defunct
#' @param ... ignored
pg_cache_list <- function(...) .Defunct(msg = "defunct, see pg_cache$list()")
34 changes: 17 additions & 17 deletions R/pg_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@
#' @param overwrite (logical) Ovewrite a file if one is found with the same name
#' @param mssgs (logical) print information messages. Default: `TRUE`
#' @param ... Curl options passed on to [crul::HttpClient]
#' @param prompt (logical) Prompt before clearing all files in cache? No prompt
#' used when DOIs passed in. Default: `TRUE`
#' @return One or more items of class pangaea, each with the doi, parent doi
#' (if many dois within a parent doi), url, citation, path, and data object.
#' Data object depends on what kind of file it is. For tabular data, we print
Expand All @@ -22,8 +20,9 @@
#' @author Naupaka Zimmerman, Scott Chamberlain
#' @references <https://www.pangaea.de>
#' @details Data files are stored in an operating system appropriate location.
#' Run `rappdirs::user_cache_dir("pangaear")` to get the storage location
#' on your machine.
#' Run `pg_cache$cache_path_get()` to get the storage location
#' on your machine. See [pg_cache] for more information, including how to
#' set a different base path for downloaded files.
#'
#' Some files/datasets require the user to be logged in. For now we
#' just pass on these - that is, give back nothing other than metadata.
Expand All @@ -44,23 +43,23 @@
#'
#' # Manipulating the cache
#' ## list files in the cache
#' pg_cache_list()
#' pg_cache$list()
#'
#' ## clear all data
#' # pg_cache_clear()
#' pg_cache_list()
#' # pg_cache$delete_all()
#' pg_cache$list()
#'
#' ## clear a single dataset by DOI
#' pg_data(doi='10.1594/PANGAEA.812093')
#' pg_cache_list()
#' pg_cache$list()
#' pg_cache_clear(doi='10.1594/PANGAEA.812093')
#' pg_cache_list()
#' pg_cache$list()
#'
#' ## clear more than 1 dataset by DOI
#' lapply(c('10.1594/PANGAEA.746398','10.1594/PANGAEA.746400'), pg_data)
#' pg_cache_list()
#' pg_cache$list()
#' pg_cache_clear(doi=c('10.1594/PANGAEA.746398','10.1594/PANGAEA.746400'))
#' pg_cache_list()
#' pg_cache$list()
#'
#' # search for datasets, then pass in DOIs
#' (searchres <- pg_search(query = 'birds', count = 20))
Expand All @@ -82,7 +81,7 @@ pg_data <- function(doi, overwrite = TRUE, mssgs = TRUE, ...) {
citation <- attr(dois, "citation")
if (mssgs) message("Downloading ", length(dois), " datasets from ", doi)
invisible(lapply(dois, function(x) {
if ( !is_pangaea(env$path, x) ) {
if ( !is_pangaea(pg_cache$cache_path_get(), x) ) {
pang_GET(url = paste0(base(), x), doi = x, overwrite, ...)
}
}))
Expand All @@ -103,7 +102,8 @@ print.pangaea <- function(x, ...) {
}

pang_GET <- function(url, doi, overwrite, ...){
dir.create(env$path, showWarnings = FALSE, recursive = TRUE)
bpath <- pg_cache$cache_path_get()
dir.create(bpath, showWarnings = FALSE, recursive = TRUE)

cli <- crul::HttpClient$new(url = url,
opts = list(followlocation = TRUE, ...))
Expand Down Expand Up @@ -133,12 +133,12 @@ pang_GET <- function(url, doi, overwrite, ...){
)
switch(
res$response_headers$`content-type`,
`image/png` = png::writePNG(png::readPNG(res$content), file.path(env$path, fname)),
`image/png` = png::writePNG(png::readPNG(res$content), file.path(bpath, fname)),
`text/tab-separated-values;charset=UTF-8` = {
writeLines(res$parse("UTF-8"), file.path(env$path, fname))
writeLines(res$parse("UTF-8"), file.path(bpath, fname))
},
`application/zip` = {
path <- file(file.path(env$path, fname), "wb")
path <- file(file.path(bpath, fname), "wb")
writeBin(res$content, path)
close(path)
}
Expand All @@ -147,7 +147,7 @@ pang_GET <- function(url, doi, overwrite, ...){

process_pg <- function(x, doi, citation) {
lapply(x, function(m) {
file <- list.files(env$path, pattern = gsub("/|\\.", "_", m),
file <- list.files(pg_cache$cache_path_get(), pattern = gsub("/|\\.", "_", m),
full.names = TRUE)
if (length(file) == 0) {
list(
Expand Down
50 changes: 50 additions & 0 deletions man/pg_cache.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions man/pg_cache_clear-defunct.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 14 additions & 0 deletions man/pg_cache_list-defunct.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 12 additions & 20 deletions man/pg_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit caa3cb3

Please sign in to comment.