From d470c0bbf45e05a833693ff9e9536d13cb8e5de7 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Mon, 5 Oct 2020 18:50:39 -0500 Subject: [PATCH 01/25] Use memoryCache from cache package --- DESCRIPTION | 7 +++++-- NAMESPACE | 1 + R/memoise.R | 24 +++++++++++++++++------- man/memoise.Rd | 2 +- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2e44920..8be0ebd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: memoise Title: Memoisation of Functions Version: 1.1.0.9000 -Authors@R: +Authors@R: c(person(given = "Hadley", family = "Wickham", role = "aut", @@ -29,7 +29,8 @@ License: MIT + file LICENSE URL: https://github.com/r-lib/memoise BugReports: https://github.com/r-lib/memoise/issues Imports: - digest (>= 0.6.3) + digest (>= 0.6.3), + cache Suggests: aws.s3, covr, @@ -37,5 +38,7 @@ Suggests: googleCloudStorageR, httr, testthat +Remotes: + wch/cache Encoding: UTF-8 RoxygenNote: 7.1.1 diff --git a/NAMESPACE b/NAMESPACE index 159abbb..45c4745 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,5 +13,6 @@ export(is.memoized) export(memoise) export(memoize) export(timeout) +import(cache) importFrom(digest,digest) importFrom(stats,setNames) diff --git a/R/memoise.R b/R/memoise.R index e9347ab..5d51e28 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -115,7 +115,14 @@ #' memA4 <- memoise(a, ~timeout(10)) #' memA4(2) #' @importFrom stats setNames -memoise <- memoize <- function(f, ..., envir = environment(f), cache = cache_memory(), omit_args = c()) { +#' @import cache +memoise <- memoize <- function( + f, + ..., + envir = environment(f), + cache = cache::memoryCache(), + omit_args = c()) +{ f_formals <- formals(args(f)) if(is.memoised(f)) { stop("`f` must not be memoised.", call. = FALSE) @@ -142,14 +149,17 @@ memoise <- memoize <- function(f, ..., envir = environment(f), cache = cache_mem args <- c(lapply(called_args, eval, parent.frame()), lapply(default_args, eval, envir = environment())) - hash <- encl$`_cache`$digest( - c(as.character(body(encl$`_f`)), args, - lapply(encl$`_additional`, function(x) eval(x[[2L]], environment(x)))) + hash <- digest::digest( + c( + as.character(body(encl$`_f`)), + args, + lapply(encl$`_additional`, function(x) eval(x[[2L]], environment(x))) + ), + algo = "xxhash64" ) - if (encl$`_cache`$has_key(hash)) { - res <- encl$`_cache`$get(hash) - } else { + res <- encl$`_cache`$get(hash) + if (cache::is.key_missing(res)) { # modify the call to use the original function and evaluate it mc[[1L]] <- encl$`_f` res <- withVisible(eval(mc, parent.frame())) diff --git a/man/memoise.Rd b/man/memoise.Rd index 883ef0a..30885c0 100644 --- a/man/memoise.Rd +++ b/man/memoise.Rd @@ -9,7 +9,7 @@ memoise( f, ..., envir = environment(f), - cache = cache_memory(), + cache = cache::memoryCache(), omit_args = c() ) } From d3824f7c2088517231c5c943d9cd8e801c2ad336 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Mon, 5 Oct 2020 19:22:31 -0500 Subject: [PATCH 02/25] Add function to wrap old-style caches --- R/memoise.R | 11 ++++++++--- R/old_cache.R | 30 ++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 R/old_cache.R diff --git a/R/memoise.R b/R/memoise.R index 5d51e28..436fe22 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -180,6 +180,11 @@ memoise <- memoize <- function( envir <- baseenv() } + # Handle old-style memoise cache objects + if (is_old_cache(cache)) { + cache <- wrap_old_cache(cache) + } + memo_f_env <- new.env(parent = envir) memo_f_env$`_cache` <- cache memo_f_env$`_f` <- f @@ -292,7 +297,7 @@ has_cache <- function(f) { # Modify the function body of the function to simply return TRUE and FALSE # rather than get or set the results of the cache body <- body(f) - body[[10]] <- quote(if (encl$`_cache`$has_key(hash)) return(TRUE) else return(FALSE)) + body[[11]] <- quote(return(encl$`_cache`$exists(hash))) body(f) <- body f @@ -319,8 +324,8 @@ drop_cache <- function(f) { # Modify the function body of the function to simply drop the key # and return TRUE if successfully removed body <- body(f) - body[[10]] <- quote(if (encl$`_cache`$has_key(hash)) { - encl$`_cache`$drop_key(hash) + body[[11]] <- quote(if (encl$`_cache`$exists(hash)) { + encl$`_cache`$remove(hash) return(TRUE) } else { return(FALSE) diff --git a/R/old_cache.R b/R/old_cache.R new file mode 100644 index 0000000..23a1097 --- /dev/null +++ b/R/old_cache.R @@ -0,0 +1,30 @@ +# Wrap an old-style cache so that the external API is consistent with that from +# the cache package. +wrap_old_cache <- function(x) { + if (!is_old_cache(x)) { + stop("`x` must be an old-style cache.", call. = FALSE) + } + + list( + reset = x$reset, + set = x$set, + get = function(key) { + if (!x$has_key(key)) { + return(key_missing()) + } + x$get(key) + }, + exists = x$has_key, + remove = x$drop_key + ) +} + +# Returns TRUE if it's an old-style cache. +is_old_cache <- function(x) { + is.function(x$reset) && + is.function(x$digest) && + is.function(x$set) && + is.function(x$get) && + is.function(x$has_key) && + is.function(x$drop_key) +} From b222fbe892638ebc42dfc694f26b8444ef511ccf Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Mon, 5 Oct 2020 19:25:37 -0500 Subject: [PATCH 03/25] Precompute hash for function --- R/memoise.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/memoise.R b/R/memoise.R index 436fe22..24bce7e 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -151,7 +151,7 @@ memoise <- memoize <- function( hash <- digest::digest( c( - as.character(body(encl$`_f`)), + encl$`_f_hash`, args, lapply(encl$`_additional`, function(x) eval(x[[2L]], environment(x))) ), @@ -188,6 +188,7 @@ memoise <- memoize <- function( memo_f_env <- new.env(parent = envir) memo_f_env$`_cache` <- cache memo_f_env$`_f` <- f + memo_f_env$`_f_hash` <- digest(f, algo = "sha512") memo_f_env$`_additional` <- additional memo_f_env$`_omit_args` <- omit_args environment(memo_f) <- memo_f_env From e6bd2fab5de8d496fb3e9775fcc2acaeb45a22c7 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Tue, 6 Oct 2020 10:57:21 -0500 Subject: [PATCH 04/25] Allow passing algorithm to memoise --- R/memoise.R | 13 +++++++++++-- R/old_cache.R | 1 + man/memoise.Rd | 3 ++- 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/R/memoise.R b/R/memoise.R index 24bce7e..04141de 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -121,7 +121,8 @@ memoise <- memoize <- function( ..., envir = environment(f), cache = cache::memoryCache(), - omit_args = c()) + omit_args = c(), + algo = "spookyhash") { f_formals <- formals(args(f)) if(is.memoised(f)) { @@ -155,7 +156,7 @@ memoise <- memoize <- function( args, lapply(encl$`_additional`, function(x) eval(x[[2L]], environment(x))) ), - algo = "xxhash64" + algo = encl$`_algo` ) res <- encl$`_cache`$get(hash) @@ -182,7 +183,14 @@ memoise <- memoize <- function( # Handle old-style memoise cache objects if (is_old_cache(cache)) { + algo <- cache cache <- wrap_old_cache(cache) + # Old-style caches include their own digest algorithm, so rewrite + # digest::digest(xx, algo = encl$`_algo`) + # to: + # encl$`_cache`$digest(xx) + body(memo_f)[[9]][[3]][[1]] <- quote(encl$`_cache`$digest) + body(memo_f)[[9]][[3]][[3]] <- NULL } memo_f_env <- new.env(parent = envir) @@ -191,6 +199,7 @@ memoise <- memoize <- function( memo_f_env$`_f_hash` <- digest(f, algo = "sha512") memo_f_env$`_additional` <- additional memo_f_env$`_omit_args` <- omit_args + memo_f_env$`_algo` <- algo environment(memo_f) <- memo_f_env class(memo_f) <- c("memoised", "function") diff --git a/R/old_cache.R b/R/old_cache.R index 23a1097..fe89f74 100644 --- a/R/old_cache.R +++ b/R/old_cache.R @@ -6,6 +6,7 @@ wrap_old_cache <- function(x) { } list( + digest = x$digest, reset = x$reset, set = x$set, get = function(key) { diff --git a/man/memoise.Rd b/man/memoise.Rd index 30885c0..20a3dad 100644 --- a/man/memoise.Rd +++ b/man/memoise.Rd @@ -10,7 +10,8 @@ memoise( ..., envir = environment(f), cache = cache::memoryCache(), - omit_args = c() + omit_args = c(), + algo = "spookyhash" ) } \arguments{ From f250f647f230e1c5945cd54e84def5cefee0dde6 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Wed, 7 Oct 2020 10:18:27 -0500 Subject: [PATCH 05/25] Remove use of in memoised wrapper --- R/memoise.R | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/memoise.R b/R/memoise.R index 04141de..0389c8b 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -150,7 +150,8 @@ memoise <- memoize <- function( args <- c(lapply(called_args, eval, parent.frame()), lapply(default_args, eval, envir = environment())) - hash <- digest::digest( + # Use getNamespace() instead of `::`, because the latter is slow. + hash <- getNamespace("digest")$digest( c( encl$`_f_hash`, args, @@ -160,7 +161,7 @@ memoise <- memoize <- function( ) res <- encl$`_cache`$get(hash) - if (cache::is.key_missing(res)) { + if (inherits(res, "key_missing")) { # modify the call to use the original function and evaluate it mc[[1L]] <- encl$`_f` res <- withVisible(eval(mc, parent.frame())) @@ -200,6 +201,7 @@ memoise <- memoize <- function( memo_f_env$`_additional` <- additional memo_f_env$`_omit_args` <- omit_args memo_f_env$`_algo` <- algo + environment(memo_f) <- memo_f_env class(memo_f) <- c("memoised", "function") From b0267f406bb285fe2f4c610e3978ab5bf013aad9 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Tue, 13 Oct 2020 11:02:51 -0500 Subject: [PATCH 06/25] Remove source refs --- R/memoise.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/memoise.R b/R/memoise.R index 0389c8b..70225de 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -197,7 +197,11 @@ memoise <- memoize <- function( memo_f_env <- new.env(parent = envir) memo_f_env$`_cache` <- cache memo_f_env$`_f` <- f - memo_f_env$`_f_hash` <- digest(f, algo = "sha512") + # Precompute hash of function. This saves work because when this is added to + # the list of objects to hash, it doesn't need to serialize and hash the + # entire function. Also remove source refs because they can result in spurious + # differences (when the function is the same but the source refs differ). + memo_f_env$`_f_hash` <- digest(removeSource(f), algo = "sha256") memo_f_env$`_additional` <- additional memo_f_env$`_omit_args` <- omit_args memo_f_env$`_algo` <- algo From 3a5b8c344adc9bd0461145d9326d66931ff49575 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Tue, 13 Oct 2020 11:04:40 -0500 Subject: [PATCH 07/25] Extract formals with a default at creation time --- R/memoise.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/memoise.R b/R/memoise.R index 70225de..ccb9033 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -138,7 +138,7 @@ memoise <- memoize <- function( called_args <- as.list(mc)[-1] # Formals with a default - default_args <- Filter(function(x) !identical(x, quote(expr = )), as.list(formals())) + default_args <- encl$`_default_args` # That has not been called default_args <- default_args[setdiff(names(default_args), names(called_args))] @@ -205,6 +205,8 @@ memoise <- memoize <- function( memo_f_env$`_additional` <- additional memo_f_env$`_omit_args` <- omit_args memo_f_env$`_algo` <- algo + # Formals with a default value + memo_f_env$`_default_args` <- Filter(function(x) !identical(x, quote(expr = )), f_formals) environment(memo_f) <- memo_f_env From d01f1d9de417eef9a80aae227d08055a6320a74c Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Fri, 16 Oct 2020 12:42:55 -0500 Subject: [PATCH 08/25] Add keys() method to old-style cache wrapper --- R/old_cache.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/old_cache.R b/R/old_cache.R index fe89f74..9a330bd 100644 --- a/R/old_cache.R +++ b/R/old_cache.R @@ -16,7 +16,8 @@ wrap_old_cache <- function(x) { x$get(key) }, exists = x$has_key, - remove = x$drop_key + remove = x$drop_key, + keys = x$keys ) } From ffa50c6195e04cc715c859ef6d8bdde37b288fff Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Mon, 19 Oct 2020 11:46:01 -0500 Subject: [PATCH 09/25] Don't hash environment for function --- R/memoise.R | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/R/memoise.R b/R/memoise.R index ccb9033..3438709 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -199,9 +199,8 @@ memoise <- memoize <- function( memo_f_env$`_f` <- f # Precompute hash of function. This saves work because when this is added to # the list of objects to hash, it doesn't need to serialize and hash the - # entire function. Also remove source refs because they can result in spurious - # differences (when the function is the same but the source refs differ). - memo_f_env$`_f_hash` <- digest(removeSource(f), algo = "sha256") + # entire function. This does not include the environment or source refs. + memo_f_env$`_f_hash` <- digest(list(formals(f), body(f)), algo = "sha256") memo_f_env$`_additional` <- additional memo_f_env$`_omit_args` <- omit_args memo_f_env$`_algo` <- algo From 60ee253d5f5f7b8f637cefd4e4ec28d18238b26c Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Mon, 19 Oct 2020 11:46:30 -0500 Subject: [PATCH 10/25] cache_s3: keys() returns unnamed vector --- R/cache_s3.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/cache_s3.R b/R/cache_s3.R index 975d277..1667985 100644 --- a/R/cache_s3.R +++ b/R/cache_s3.R @@ -61,7 +61,7 @@ cache_s3 <- function(cache_name, algo = "sha512", compress = FALSE) { cache_keys <- function() { items <- lapply(aws.s3::get_bucket(bucket = cache_name), `[[`, "Key") - unlist(Filter(Negate(is.null), items)) + as.character(unlist(Filter(Negate(is.null), items))) } list( From 898026621f3c43f761e37ec4a6ce3912e80714a4 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Fri, 23 Oct 2020 21:13:50 -0500 Subject: [PATCH 11/25] Use cache_mem --- R/memoise.R | 2 +- man/memoise.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/memoise.R b/R/memoise.R index 3438709..cf5a5f4 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -120,7 +120,7 @@ memoise <- memoize <- function( f, ..., envir = environment(f), - cache = cache::memoryCache(), + cache = cache::cache_mem(), omit_args = c(), algo = "spookyhash") { diff --git a/man/memoise.Rd b/man/memoise.Rd index 20a3dad..ebbcb95 100644 --- a/man/memoise.Rd +++ b/man/memoise.Rd @@ -9,7 +9,7 @@ memoise( f, ..., envir = environment(f), - cache = cache::memoryCache(), + cache = cache::cache_mem(), omit_args = c(), algo = "spookyhash" ) From 0965e25a205c1018ce925f0e0ae522e953101fe1 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Tue, 27 Oct 2020 10:56:32 -0500 Subject: [PATCH 12/25] Use cachem package --- DESCRIPTION | 4 ++-- NAMESPACE | 1 - R/memoise.R | 9 +++++---- R/old_cache.R | 2 ++ README.Rmd | 18 +++++++++++++++++- man/memoise.Rd | 7 ++++--- 6 files changed, 30 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8be0ebd..fc37206 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,7 +30,7 @@ URL: https://github.com/r-lib/memoise BugReports: https://github.com/r-lib/memoise/issues Imports: digest (>= 0.6.3), - cache + cachem Suggests: aws.s3, covr, @@ -39,6 +39,6 @@ Suggests: httr, testthat Remotes: - wch/cache + wch/cachem Encoding: UTF-8 RoxygenNote: 7.1.1 diff --git a/NAMESPACE b/NAMESPACE index 45c4745..159abbb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,5 @@ export(is.memoized) export(memoise) export(memoize) export(timeout) -import(cache) importFrom(digest,digest) importFrom(stats,setNames) diff --git a/R/memoise.R b/R/memoise.R index cf5a5f4..24413eb 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -1,3 +1,5 @@ +#' Memoise a function +#' #' \code{mf <- memoise(f)} creates \code{mf}, a memoised copy of #' \code{f}. A memoised copy is basically a #' lazier version of the same function: it saves the answers of @@ -49,12 +51,12 @@ #' } #' } #' @name memoise -#' @title Memoise a function. #' @param f Function of which to create a memoised copy. #' @param ... optional variables to use as additional restrictions on #' caching, specified as one-sided formulas (no LHS). See Examples for usage. #' @param envir Environment of the returned function. -#' @param cache Cache function. +#' @param cache Cache object. The default is a [cachem::cache_mem()] with a max +#' size of 1 GB. #' @param omit_args Names of arguments to ignore when calculating hash. #' @seealso \code{\link{forget}}, \code{\link{is.memoised}}, #' \code{\link{timeout}}, \url{http://en.wikipedia.org/wiki/Memoization} @@ -115,12 +117,11 @@ #' memA4 <- memoise(a, ~timeout(10)) #' memA4(2) #' @importFrom stats setNames -#' @import cache memoise <- memoize <- function( f, ..., envir = environment(f), - cache = cache::cache_mem(), + cache = cachem::cache_mem(max_size = 1024 * 1024^2), omit_args = c(), algo = "spookyhash") { diff --git a/R/old_cache.R b/R/old_cache.R index 9a330bd..24a3320 100644 --- a/R/old_cache.R +++ b/R/old_cache.R @@ -1,5 +1,7 @@ # Wrap an old-style cache so that the external API is consistent with that from # the cache package. + +#' @importFrom cachem key_missing wrap_old_cache <- function(x) { if (!is_old_cache(x)) { stop("`x` must be an old-style cache.", call. = FALSE) diff --git a/README.Rmd b/README.Rmd index d6efc5a..94e5902 100644 --- a/README.Rmd +++ b/README.Rmd @@ -1,5 +1,7 @@ --- output: github_document +editor_options: + chunk_output_type: console --- @@ -57,7 +59,21 @@ And you can test whether a function is memoised with `is.memoised()`. ## Caches -By default, memoise uses an in-memory cache. But you can customise this with the `cache` arugment and another built-in cache: +By default, memoise uses an in-memory cache, using `cache_mem()` from the [cachem](https://github.com/wch/cachem) package. + +The `cache` argument can be used to pick a custom size: + +- `cachem::cache_disk()` allows caching using files on a local filesystem. This is useful for preserving the cache between R sessions as well as sharing between systems when using a shared or synced files system such as Dropbox or Google Drive. + +Both `cachem::cache_mem()` and `cachem::cache_disk()` support automatic pruning by default; this means that they will not keep growing past a certain size, and eventually older items will be removed from the cache. + +Before version 1.2, memoise used different caching objects, which did not have built-in limits, and had a slightly different API. These caching objects can still be used, but we recommend that + + + + +As of + - `cache_filesystem()` allows caching using files on a local filesystem. This is useful for preserving the cache between R sessions as well as sharing between systems when using a shared or synced files system such as Dropbox or Google Drive. diff --git a/man/memoise.Rd b/man/memoise.Rd index ebbcb95..dc5f7a3 100644 --- a/man/memoise.Rd +++ b/man/memoise.Rd @@ -3,13 +3,13 @@ \name{memoise} \alias{memoise} \alias{memoize} -\title{Memoise a function.} +\title{Memoise a function} \usage{ memoise( f, ..., envir = environment(f), - cache = cache::cache_mem(), + cache = cachem::cache_mem(max_size = 1024 * 1024^2), omit_args = c(), algo = "spookyhash" ) @@ -22,7 +22,8 @@ caching, specified as one-sided formulas (no LHS). See Examples for usage.} \item{envir}{Environment of the returned function.} -\item{cache}{Cache function.} +\item{cache}{Cache object. The default is a [cachem::cache_mem()] with a max +size of 1 GB.} \item{omit_args}{Names of arguments to ignore when calculating hash.} } From 2ffe0d89ae4d5d808f29fc27d4593e44fbfe634b Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Fri, 30 Oct 2020 19:55:54 -0500 Subject: [PATCH 13/25] Add more collision tests --- NAMESPACE | 1 + R/memoise.R | 3 +- tests/testthat/test-memoise.R | 63 +++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 1 deletion(-) diff --git a/NAMESPACE b/NAMESPACE index 159abbb..841bf41 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,5 +13,6 @@ export(is.memoized) export(memoise) export(memoize) export(timeout) +importFrom(cachem,key_missing) importFrom(digest,digest) importFrom(stats,setNames) diff --git a/R/memoise.R b/R/memoise.R index 24413eb..c19870b 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -201,7 +201,8 @@ memoise <- memoize <- function( # Precompute hash of function. This saves work because when this is added to # the list of objects to hash, it doesn't need to serialize and hash the # entire function. This does not include the environment or source refs. - memo_f_env$`_f_hash` <- digest(list(formals(f), body(f)), algo = "sha256") + # The as.character() is there to ensure source refs are not included. + memo_f_env$`_f_hash` <- digest(list(formals(f), as.character(body(f))), algo = "sha256") memo_f_env$`_additional` <- additional memo_f_env$`_omit_args` <- omit_args memo_f_env$`_algo` <- algo diff --git a/tests/testthat/test-memoise.R b/tests/testthat/test-memoise.R index 46edb01..bb26a4a 100644 --- a/tests/testthat/test-memoise.R +++ b/tests/testthat/test-memoise.R @@ -111,6 +111,69 @@ test_that("symbol collision", { expect_equal(cachem(), 5) }) +test_that("different body avoids collisions", { + # Same args, different body + m <- cachem::cache_mem() + times2 <- memoise(function(x) { x * 2 }, cache = m) + times4 <- memoise(function(x) { x * 4 }, cache = m) + + expect_identical(times2(10), 20) + expect_equal(m$size(), 1) + expect_identical(times4(10), 40) + expect_equal(m$size(), 2) +}) + +test_that("different formals avoids collisions", { + # Different formals (even if not used) avoid collisions, because formals + # are used in key. + m <- cachem::cache_mem() + f <- function(x, y) { x * 2 } + times2 <- memoise(function(x, y) { x * 2 }, cache = m) + times2a <- memoise(function(x, y = 1) { x * 2 }, cache = m) + + expect_identical(times2(10), 20) + expect_equal(m$size(), 1) + expect_identical(times2a(10), 20) + expect_equal(m$size(), 2) +}) + +test_that("same body results in collisions", { + # Two identical memoised functions should result in cache hits so that cache + # can be shared more easily. + # https://github.com/r-lib/memoise/issues/58 + m <- cachem::cache_mem() + times2 <- memoise(function(x, y) { x * 2 }, cache = m) + times2a <- memoise(function(x, y) { x * 2 }, cache = m) + + expect_identical(times2(10), 20) + expect_identical(times2a(10), 20) + expect_equal(m$size(), 1) +}) + +test_that("same body results in collisions", { + # Even though t2 and t4 produce different results, the memoised versions, + # times2 and times4, have cache collisions because the functions have the same + # body and formals. It would be nice if we could somehow avoid this. + m <- cachem::cache_mem() + + t2 <- local({ + n <- 2 + function(x) x * n + }) + t4 <- local({ + n <- 4 + function(x) x * n + }) + + times2 <- memoise(t2, cache = m) + times4 <- memoise(t4, cache = m) + + expect_identical(times2(10), 20) + expect_identical(times4(10), 20) # Bad (but expected) cache collision! + expect_equal(m$size(), 1) +}) + + test_that("visibility", { vis <- function() NULL invis <- function() invisible() From 51606de29b49c6b9104176b45c8ac1fcfea32846 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Fri, 30 Oct 2020 20:14:09 -0500 Subject: [PATCH 14/25] Update memoise docs --- R/memoise.R | 11 ++++------- man/memoise.Rd | 11 ++++------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/R/memoise.R b/R/memoise.R index c19870b..01ea099 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -56,7 +56,7 @@ #' caching, specified as one-sided formulas (no LHS). See Examples for usage. #' @param envir Environment of the returned function. #' @param cache Cache object. The default is a [cachem::cache_mem()] with a max -#' size of 1 GB. +#' size of 512 MB. #' @param omit_args Names of arguments to ignore when calculating hash. #' @seealso \code{\link{forget}}, \code{\link{is.memoised}}, #' \code{\link{timeout}}, \url{http://en.wikipedia.org/wiki/Memoization} @@ -109,13 +109,10 @@ #' memA(2) #' memA <- memoise(a) #' memA(2) -#' # Making a memoized automatically time out after 10 seconds. -#' memA3 <- memoise(a, ~{current <- as.numeric(Sys.time()); (current - current %% 10) %/% 10 }) -#' memA3(2) #' -#' # The timeout function is an easy way to do the above. -#' memA4 <- memoise(a, ~timeout(10)) -#' memA4(2) +#' # Make a memoized result automatically time out after 10 seconds. +#' memA3 <- memoise(a, cache = cachem::cache_mem(max_age = 10)) +#' memA3(2) #' @importFrom stats setNames memoise <- memoize <- function( f, diff --git a/man/memoise.Rd b/man/memoise.Rd index dc5f7a3..b4fba40 100644 --- a/man/memoise.Rd +++ b/man/memoise.Rd @@ -23,7 +23,7 @@ caching, specified as one-sided formulas (no LHS). See Examples for usage.} \item{envir}{Environment of the returned function.} \item{cache}{Cache object. The default is a [cachem::cache_mem()] with a max -size of 1 GB.} +size of 512 MB.} \item{omit_args}{Names of arguments to ignore when calculating hash.} } @@ -126,13 +126,10 @@ memA2(2) # Different cache, different outcome memA(2) memA <- memoise(a) memA(2) -# Making a memoized automatically time out after 10 seconds. -memA3 <- memoise(a, ~{current <- as.numeric(Sys.time()); (current - current \%\% 10) \%/\% 10 }) -memA3(2) -# The timeout function is an easy way to do the above. -memA4 <- memoise(a, ~timeout(10)) -memA4(2) +# Make a memoized result automatically time out after 10 seconds. +memA3 <- memoise(a, cache = cachem::cache_mem(max_age = 10)) +memA3(2) } \seealso{ \code{\link{forget}}, \code{\link{is.memoised}}, From 70f1ed61904130e1fc1721cb03d3dbadabf9fad5 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Fri, 30 Oct 2020 20:14:55 -0500 Subject: [PATCH 15/25] Update README --- README.Rmd | 52 ++++++++++++++++++++++++-------- README.md | 87 +++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 108 insertions(+), 31 deletions(-) diff --git a/README.Rmd b/README.Rmd index 94e5902..a12cf69 100644 --- a/README.Rmd +++ b/README.Rmd @@ -44,14 +44,20 @@ f <- function(x) { mean(x) } mf <- memoise(f) +``` +```{r eval=FALSE} system.time(mf(1:10)) +#> user system elapsed +#> 0.002 0.000 1.003 system.time(mf(1:10)) +#> user system elapsed +#> 0.000 0.000 0.001 ``` You can clear `mf`'s cache with: -```{r} +```{r eval=FALSE} forget(mf) ``` @@ -59,30 +65,50 @@ And you can test whether a function is memoised with `is.memoised()`. ## Caches -By default, memoise uses an in-memory cache, using `cache_mem()` from the [cachem](https://github.com/wch/cachem) package. +By default, memoise uses an in-memory cache, using `cache_mem()` from the [cachem](https://github.com/r-lib/cachem) package. `cachem::cache_disk()` allows caching using files on a local filesystem. -The `cache` argument can be used to pick a custom size: +Both `cachem::cache_mem()` and `cachem::cache_disk()` support automatic pruning by default; this means that they will not keep growing past a certain size, and eventually older items will be removed from the cache. The default size `cache_mem()` is 512 MB, and the default size for a `cache_disk()` is 1 GB, but this can be customized by specifying `max_size`: -- `cachem::cache_disk()` allows caching using files on a local filesystem. This is useful for preserving the cache between R sessions as well as sharing between systems when using a shared or synced files system such as Dropbox or Google Drive. +```{r} +# 100 MB limit +cm <- cachem::cache_mem(max_size = 100 * 1024^2) -Both `cachem::cache_mem()` and `cachem::cache_disk()` support automatic pruning by default; this means that they will not keep growing past a certain size, and eventually older items will be removed from the cache. +mf <- memoise(f, cache = cm) +``` -Before version 1.2, memoise used different caching objects, which did not have built-in limits, and had a slightly different API. These caching objects can still be used, but we recommend that +You can also change the maximum age of items in the cache with `max_age`: +```{r} +# Expire items in cache after 15 minutes +cm <- cachem::cache_mem(max_age = 15 * 60) +mf <- memoise(f, cache = cm) +``` +By default, a `cache_disk()` uses a subdirectory the R process's temp directory, but it is possible to specify the directory. This is useful for persisting a cache across R sessions, sharing a cache among different processes, or even for synchronizing across the network. -As of +```{r, eval = FALSE} +# Store in "R-myapp" directory inside of user-level cache directory +cd <- cachem::cache_disk(rappdirs::user_cache_dir("R-myapp")) +# Store in Dropbox +cdb <- cachem::cache_disk("~/Dropbox/.rcache") +``` -- `cache_filesystem()` allows caching using files on a local filesystem. This is useful for preserving the cache between R sessions as well as sharing between systems when using a shared or synced files system such as Dropbox or Google Drive. +A single cache object can be shared among multiple memoised functions. By default, the cache key includes not only the arguments to the function, but also the body of the function. This essentially eliminates the possibility of a cache collision, even if two memoised functions are called with the same arguments. - ```{r, eval = FALSE} - fc <- cache_filesystem("~/.cache") +```{r} +m <- cachem::cache_mem() - # Store in Dropbox - dbc <- cache_filesystem("~/Dropbox/.rcache") - ``` +times2 <- memoise(function(x) { x * 2 }, cache = m) +times4 <- memoise(function(x) { x * 4 }, cache = m) + +times2(10) +times4(10) +``` + + +Before version 1.2, memoise used different caching objects, which did not have automatic pruning and had a slightly different API. These caching objects can still be used, but we recommend using the caching objects from cachem when possible. The following cache objects do not currently have an equivalent in cachem. - `cache_s3()` allows caching on [Amazon S3](https://aws.amazon.com/s3/) Requires you to specify a bucket using `cache_name`. When creating buckets, they must be unique among all s3 users when created. diff --git a/README.md b/README.md index 5e7eada..47db549 100644 --- a/README.md +++ b/README.md @@ -37,40 +37,91 @@ f <- function(x) { mean(x) } mf <- memoise(f) +``` +``` r system.time(mf(1:10)) -#> user system elapsed -#> 0.000 0.000 1.003 +#> user system elapsed +#> 0.002 0.000 1.003 system.time(mf(1:10)) -#> user system elapsed -#> 0.031 0.001 0.032 +#> user system elapsed +#> 0.000 0.000 0.001 ``` You can clear `mf`’s cache with: ``` r forget(mf) -#> [1] TRUE ``` And you can test whether a function is memoised with `is.memoised()`. ## Caches -By default, memoise uses an in-memory cache. But you can customise this -with the `cache` arugment and another built-in cache: +By default, memoise uses an in-memory cache, using `cache_mem()` from +the [cachem](https://github.com/r-lib/cachem) package. +`cachem::cache_disk()` allows caching using files on a local filesystem. - - `cache_filesystem()` allows caching using files on a local - filesystem. This is useful for preserving the cache between R - sessions as well as sharing between systems when using a shared or - synced files system such as Dropbox or Google Drive. - - ``` r - fc <- cache_filesystem("~/.cache") - - # Store in Dropbox - dbc <- cache_filesystem("~/Dropbox/.rcache") - ``` +Both `cachem::cache_mem()` and `cachem::cache_disk()` support automatic +pruning by default; this means that they will not keep growing past a +certain size, and eventually older items will be removed from the cache. +The default size `cache_mem()` is 512 MB, and the default size for a +`cache_disk()` is 1 GB, but this can be customized by specifying +`max_size`: + +``` r +# 100 MB limit +cm <- cachem::cache_mem(max_size = 100 * 1024^2) + +mf <- memoise(f, cache = cm) +``` + +You can also change the maximum age of items in the cache with +`max_age`: + +``` r +# Expire items in cache after 15 minutes +cm <- cachem::cache_mem(max_age = 15 * 60) + +mf <- memoise(f, cache = cm) +``` + +By default, a `cache_disk()` uses a subdirectory the R process’s temp +directory, but it is possible to specify the directory. This is useful +for persisting a cache across R sessions, sharing a cache among +different processes, or even for synchronizing across the network. + +``` r +# Store in "R-myapp" directory inside of user-level cache directory +cd <- cachem::cache_disk(rappdirs::user_cache_dir("R-myapp")) + +# Store in Dropbox +cdb <- cachem::cache_disk("~/Dropbox/.rcache") +``` + +A single cache object can be shared among multiple memoised functions. +By default, the cache key includes not only the arguments to the +function, but also the body of the function. This essentially eliminates +the possibility of a cache collision, even if two memoised functions are +called with the same arguments. + +``` r +m <- cachem::cache_mem() + +times2 <- memoise(function(x) { x * 2 }, cache = m) +times4 <- memoise(function(x) { x * 4 }, cache = m) + +times2(10) +#> [1] 20 +times4(10) +#> [1] 40 +``` + +Before version 1.2, memoise used different caching objects, which did +not have automatic pruning and had a slightly different API. These +caching objects can still be used, but we recommend using the caching +objects from cachem when possible. The following cache objects do not +currently have an equivalent in cachem. - `cache_s3()` allows caching on [Amazon S3](https://aws.amazon.com/s3/) Requires you to specify a bucket From d7ce69c5b70ac288a8fa6b2629cdb06190869a8e Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Fri, 30 Oct 2020 20:25:32 -0500 Subject: [PATCH 16/25] Bump digest dependency for spookyhash --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index fc37206..c6bf9c0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,7 +29,7 @@ License: MIT + file LICENSE URL: https://github.com/r-lib/memoise BugReports: https://github.com/r-lib/memoise/issues Imports: - digest (>= 0.6.3), + digest (>= 0.6.25), cachem Suggests: aws.s3, From 54977c87f615733e4c65fc84df50e15cc294ddd6 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Fri, 30 Oct 2020 20:30:07 -0500 Subject: [PATCH 17/25] Bump version and update NEWS --- DESCRIPTION | 2 +- NEWS.md | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c6bf9c0..40fe2cc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: memoise Title: Memoisation of Functions -Version: 1.1.0.9000 +Version: 1.1.0.9001 Authors@R: c(person(given = "Hadley", family = "Wickham", diff --git a/NEWS.md b/NEWS.md index adec4f2..b462f10 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,6 @@ -# Version 1.1.0.9000 +# Version 1.1.0.9001 + +* Memoise now uses caching objects from the cachem package. These caches support automatic pruning, so that they won't grow indefinitely. (#112) * Name clashes between function arguments and variables defined when memoising no longer occur (@egnha, #43). @@ -7,10 +9,10 @@ * Add `compress` option for non-memory caches (@coolbutuseless, #71). -* Use absolute path in cache file system backend, so user can change working +* Use absolute path in cache file system backend, so user can change working directory after using relative path (@xhdong-umd, #51, #65) -* Add `drop_cache()` to drop the cached result for particular arguments +* Add `drop_cache()` to drop the cached result for particular arguments (@richardkunze, #78) * Suppress messages of `aws.s3::head_object` within `cache_s3`'s `cache_has_key` From 140d3eb7a680b55a17251bedf3abdd91b2f3f00a Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Fri, 30 Oct 2020 20:31:40 -0500 Subject: [PATCH 18/25] Use r-lib/cachem remote --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 40fe2cc..54c6408 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -39,6 +39,6 @@ Suggests: httr, testthat Remotes: - wch/cachem + r-lib/cachem Encoding: UTF-8 RoxygenNote: 7.1.1 From b1c628d5e9cd7fcac957bca918c516a2d7324573 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Fri, 30 Oct 2020 20:41:16 -0500 Subject: [PATCH 19/25] Document algo arg --- R/memoise.R | 2 ++ man/memoise.Rd | 3 +++ 2 files changed, 5 insertions(+) diff --git a/R/memoise.R b/R/memoise.R index 01ea099..68419b1 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -57,6 +57,8 @@ #' @param envir Environment of the returned function. #' @param cache Cache object. The default is a [cachem::cache_mem()] with a max #' size of 512 MB. +#' @param algo Hashing algorithm to use for cache keys. This is passed to +#' [digest::digest()]. #' @param omit_args Names of arguments to ignore when calculating hash. #' @seealso \code{\link{forget}}, \code{\link{is.memoised}}, #' \code{\link{timeout}}, \url{http://en.wikipedia.org/wiki/Memoization} diff --git a/man/memoise.Rd b/man/memoise.Rd index b4fba40..4ea9aee 100644 --- a/man/memoise.Rd +++ b/man/memoise.Rd @@ -26,6 +26,9 @@ caching, specified as one-sided formulas (no LHS). See Examples for usage.} size of 512 MB.} \item{omit_args}{Names of arguments to ignore when calculating hash.} + +\item{algo}{Hashing algorithm to use for cache keys. This is passed to +[digest::digest()].} } \description{ \code{mf <- memoise(f)} creates \code{mf}, a memoised copy of From 4ee2f9562af5be515948d3cfcaf95c8aa3d46829 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Mon, 2 Nov 2020 09:34:56 -0600 Subject: [PATCH 20/25] Add example with shared cache --- R/memoise.R | 5 +++++ man/memoise.Rd | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/R/memoise.R b/R/memoise.R index 68419b1..95ca667 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -103,6 +103,11 @@ #' memA(2) # Still the same outcome #' memA2(2) # Different cache, different outcome #' +#' # Multiple memoized functions can share a cache. +#' cm <- cachem::cache_mem(max_size = 50 * 1024^2) +#' memA <- memoise(a, cache = cm) +#' memB <- memoise(b, cache = cm) +#' #' # Don't do the same memoisation assignment twice: a brand-new #' # memoised function also means a brand-new cache, and *that* #' # you could as easily and more legibly achieve using forget(). diff --git a/man/memoise.Rd b/man/memoise.Rd index 4ea9aee..d986032 100644 --- a/man/memoise.Rd +++ b/man/memoise.Rd @@ -121,6 +121,11 @@ memA2 <- memoise(a) memA(2) # Still the same outcome memA2(2) # Different cache, different outcome +# Multiple memoized functions can share a cache. +cm <- cachem::cache_mem(max_size = 50 * 1024^2) +memA <- memoise(a, cache = cm) +memB <- memoise(b, cache = cm) + # Don't do the same memoisation assignment twice: a brand-new # memoised function also means a brand-new cache, and *that* # you could as easily and more legibly achieve using forget(). From 5852d802140c797353ba38c75e1fd9d735c93857 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Fri, 18 Dec 2020 14:20:13 -0600 Subject: [PATCH 21/25] Allow user to pass in hashing function --- R/memoise.R | 34 ++++++++++++++-------------------- man/memoise.Rd | 6 +++--- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/R/memoise.R b/R/memoise.R index 95ca667..01bc8c4 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -57,8 +57,8 @@ #' @param envir Environment of the returned function. #' @param cache Cache object. The default is a [cachem::cache_mem()] with a max #' size of 512 MB. -#' @param algo Hashing algorithm to use for cache keys. This is passed to -#' [digest::digest()]. +#' @param hash A function which takes an R object as input and returns a string +#' which is used as a cache key. #' @param omit_args Names of arguments to ignore when calculating hash. #' @seealso \code{\link{forget}}, \code{\link{is.memoised}}, #' \code{\link{timeout}}, \url{http://en.wikipedia.org/wiki/Memoization} @@ -127,7 +127,7 @@ memoise <- memoize <- function( envir = environment(f), cache = cachem::cache_mem(max_size = 1024 * 1024^2), omit_args = c(), - algo = "spookyhash") + hash = function(x) digest::digest(x, algo = "spookyhash")) { f_formals <- formals(args(f)) if(is.memoised(f)) { @@ -155,22 +155,20 @@ memoise <- memoize <- function( args <- c(lapply(called_args, eval, parent.frame()), lapply(default_args, eval, envir = environment())) - # Use getNamespace() instead of `::`, because the latter is slow. - hash <- getNamespace("digest")$digest( + key <- `_hash`( c( encl$`_f_hash`, args, lapply(encl$`_additional`, function(x) eval(x[[2L]], environment(x))) - ), - algo = encl$`_algo` + ) ) - res <- encl$`_cache`$get(hash) + res <- encl$`_cache`$get(key) if (inherits(res, "key_missing")) { # modify the call to use the original function and evaluate it mc[[1L]] <- encl$`_f` res <- withVisible(eval(mc, parent.frame())) - encl$`_cache`$set(hash, res) + encl$`_cache`$set(key, res) } if (res$visible) { @@ -189,17 +187,14 @@ memoise <- memoize <- function( # Handle old-style memoise cache objects if (is_old_cache(cache)) { - algo <- cache + # Old-style caches include their own digest algorithm, so use that instead + # of whatever is passed in. + hash <- cache$digest cache <- wrap_old_cache(cache) - # Old-style caches include their own digest algorithm, so rewrite - # digest::digest(xx, algo = encl$`_algo`) - # to: - # encl$`_cache`$digest(xx) - body(memo_f)[[9]][[3]][[1]] <- quote(encl$`_cache`$digest) - body(memo_f)[[9]][[3]][[3]] <- NULL } memo_f_env <- new.env(parent = envir) + memo_f_env$`_hash` <- hash memo_f_env$`_cache` <- cache memo_f_env$`_f` <- f # Precompute hash of function. This saves work because when this is added to @@ -209,7 +204,6 @@ memoise <- memoize <- function( memo_f_env$`_f_hash` <- digest(list(formals(f), as.character(body(f))), algo = "sha256") memo_f_env$`_additional` <- additional memo_f_env$`_omit_args` <- omit_args - memo_f_env$`_algo` <- algo # Formals with a default value memo_f_env$`_default_args` <- Filter(function(x) !identical(x, quote(expr = )), f_formals) @@ -320,7 +314,7 @@ has_cache <- function(f) { # Modify the function body of the function to simply return TRUE and FALSE # rather than get or set the results of the cache body <- body(f) - body[[11]] <- quote(return(encl$`_cache`$exists(hash))) + body[[11]] <- quote(return(encl$`_cache`$exists(key))) body(f) <- body f @@ -347,8 +341,8 @@ drop_cache <- function(f) { # Modify the function body of the function to simply drop the key # and return TRUE if successfully removed body <- body(f) - body[[11]] <- quote(if (encl$`_cache`$exists(hash)) { - encl$`_cache`$remove(hash) + body[[10]] <- quote(if (encl$`_cache`$exists(key)) { + encl$`_cache`$remove(key) return(TRUE) } else { return(FALSE) diff --git a/man/memoise.Rd b/man/memoise.Rd index d986032..987cd37 100644 --- a/man/memoise.Rd +++ b/man/memoise.Rd @@ -11,7 +11,7 @@ memoise( envir = environment(f), cache = cachem::cache_mem(max_size = 1024 * 1024^2), omit_args = c(), - algo = "spookyhash" + hash = function(x) digest::digest(x, algo = "spookyhash") ) } \arguments{ @@ -27,8 +27,8 @@ size of 512 MB.} \item{omit_args}{Names of arguments to ignore when calculating hash.} -\item{algo}{Hashing algorithm to use for cache keys. This is passed to -[digest::digest()].} +\item{hash}{A function which takes an R object as input and returns a string +which is used as a cache key.} } \description{ \code{mf <- memoise(f)} creates \code{mf}, a memoised copy of From bfd861ce864567bfb6cf0d646aa7da606094d328 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Wed, 6 Jan 2021 10:46:30 -0600 Subject: [PATCH 22/25] Use rlang::hash and move digest to Suggests --- DESCRIPTION | 3 ++- NAMESPACE | 1 - R/cache_filesystem.R | 1 + R/cache_gcs.R | 1 + R/cache_memory.R | 1 + R/cache_s3.R | 1 + R/memoise.R | 5 ++--- man/memoise.Rd | 2 +- 8 files changed, 9 insertions(+), 6 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 54c6408..7bca071 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -29,9 +29,10 @@ License: MIT + file LICENSE URL: https://github.com/r-lib/memoise BugReports: https://github.com/r-lib/memoise/issues Imports: - digest (>= 0.6.25), + rlang (>= 0.4.10), cachem Suggests: + digest, aws.s3, covr, googleAuthR, diff --git a/NAMESPACE b/NAMESPACE index 841bf41..82acdd3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -14,5 +14,4 @@ export(memoise) export(memoize) export(timeout) importFrom(cachem,key_missing) -importFrom(digest,digest) importFrom(stats,setNames) diff --git a/R/cache_filesystem.R b/R/cache_filesystem.R index 1fb4b0f..153f8d4 100644 --- a/R/cache_filesystem.R +++ b/R/cache_filesystem.R @@ -26,6 +26,7 @@ #' @export #' @inheritParams cache_memory cache_filesystem <- function(path, algo = "xxhash64", compress = FALSE) { + if (!(requireNamespace("digest"))) { stop("Package `digest` must be installed for `cache_filesystem()`.") } # nocov if (!dir.exists(path)) { dir.create(path, showWarnings = FALSE) diff --git a/R/cache_gcs.R b/R/cache_gcs.R index 701e9fb..7addb2d 100644 --- a/R/cache_gcs.R +++ b/R/cache_gcs.R @@ -22,6 +22,7 @@ cache_gcs <- function(cache_name = googleCloudStorageR::gcs_get_global_bucket(), algo = "sha512", compress = FALSE) { + if (!(requireNamespace("digest"))) { stop("Package `digest` must be installed for `cache_gcs()`.") } # nocov if (!(requireNamespace("googleCloudStorageR"))) { stop("Package `googleCloudStorageR` must be installed for `cache_gcs()`.") } # nocov path <- tempfile("memoise-") diff --git a/R/cache_memory.R b/R/cache_memory.R index 8f8f065..3baecf1 100644 --- a/R/cache_memory.R +++ b/R/cache_memory.R @@ -5,6 +5,7 @@ #' \code{\link[digest]{digest}} for available algorithms. #' @export cache_memory <- function(algo = "sha512") { + if (!(requireNamespace("digest"))) { stop("Package `digest` must be installed for `cache_memory()`.") } # nocov cache <- NULL cache_reset <- function() { diff --git a/R/cache_s3.R b/R/cache_s3.R index 1667985..fd2e74f 100644 --- a/R/cache_s3.R +++ b/R/cache_s3.R @@ -22,6 +22,7 @@ cache_s3 <- function(cache_name, algo = "sha512", compress = FALSE) { + if (!(requireNamespace("digest"))) { stop("Package `digest` must be installed for `cache_s3()`.") } # nocov if (!(requireNamespace("aws.s3"))) { stop("Package `aws.s3` must be installed for `cache_s3()`.") } # nocov if (!(aws.s3::bucket_exists(cache_name))) { diff --git a/R/memoise.R b/R/memoise.R index 01bc8c4..a2f94f3 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -64,7 +64,6 @@ #' \code{\link{timeout}}, \url{http://en.wikipedia.org/wiki/Memoization} #' @aliases memoise memoize #' @export memoise memoize -#' @importFrom digest digest #' @examples #' # a() is evaluated anew each time. memA() is only re-evaluated #' # when you call it with a new set of parameters. @@ -127,7 +126,7 @@ memoise <- memoize <- function( envir = environment(f), cache = cachem::cache_mem(max_size = 1024 * 1024^2), omit_args = c(), - hash = function(x) digest::digest(x, algo = "spookyhash")) + hash = rlang::hash) { f_formals <- formals(args(f)) if(is.memoised(f)) { @@ -201,7 +200,7 @@ memoise <- memoize <- function( # the list of objects to hash, it doesn't need to serialize and hash the # entire function. This does not include the environment or source refs. # The as.character() is there to ensure source refs are not included. - memo_f_env$`_f_hash` <- digest(list(formals(f), as.character(body(f))), algo = "sha256") + memo_f_env$`_f_hash` <- rlang::hash(list(formals(f), as.character(body(f)))) memo_f_env$`_additional` <- additional memo_f_env$`_omit_args` <- omit_args # Formals with a default value diff --git a/man/memoise.Rd b/man/memoise.Rd index 987cd37..e49709f 100644 --- a/man/memoise.Rd +++ b/man/memoise.Rd @@ -11,7 +11,7 @@ memoise( envir = environment(f), cache = cachem::cache_mem(max_size = 1024 * 1024^2), omit_args = c(), - hash = function(x) digest::digest(x, algo = "spookyhash") + hash = rlang::hash ) } \arguments{ From 760e5ce43e27d9fc8ea602ba6bed0ac533bd097b Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Wed, 6 Jan 2021 10:46:48 -0600 Subject: [PATCH 23/25] Add Winston to Authors --- DESCRIPTION | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/DESCRIPTION b/DESCRIPTION index 7bca071..a38eb60 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,6 +10,10 @@ Authors@R: family = "Hester", role = c("aut", "cre"), email = "jim.hester@rstudio.com"), + person(given = "Winston", + family = "Chang", + role = "aut", + email = "winston@rstudio.com"), person(given = "Kirill", family = "Müller", role = "aut", From 18ee275f537e0bab3fcf2446b346e83d60c5f572 Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Wed, 6 Jan 2021 10:50:04 -0600 Subject: [PATCH 24/25] Update cache size in doc --- R/memoise.R | 2 +- man/memoise.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/memoise.R b/R/memoise.R index a2f94f3..0b51674 100644 --- a/R/memoise.R +++ b/R/memoise.R @@ -56,7 +56,7 @@ #' caching, specified as one-sided formulas (no LHS). See Examples for usage. #' @param envir Environment of the returned function. #' @param cache Cache object. The default is a [cachem::cache_mem()] with a max -#' size of 512 MB. +#' size of 1024 MB. #' @param hash A function which takes an R object as input and returns a string #' which is used as a cache key. #' @param omit_args Names of arguments to ignore when calculating hash. diff --git a/man/memoise.Rd b/man/memoise.Rd index e49709f..341b316 100644 --- a/man/memoise.Rd +++ b/man/memoise.Rd @@ -23,7 +23,7 @@ caching, specified as one-sided formulas (no LHS). See Examples for usage.} \item{envir}{Environment of the returned function.} \item{cache}{Cache object. The default is a [cachem::cache_mem()] with a max -size of 512 MB.} +size of 1024 MB.} \item{omit_args}{Names of arguments to ignore when calculating hash.} From fe1d38064ecb2f7d575a0f1ca49d49f61b64001a Mon Sep 17 00:00:00 2001 From: Winston Chang Date: Wed, 6 Jan 2021 10:54:21 -0600 Subject: [PATCH 25/25] Update NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index b462f10..3169a72 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,6 @@ # Version 1.1.0.9001 -* Memoise now uses caching objects from the cachem package. These caches support automatic pruning, so that they won't grow indefinitely. (#112) +* Memoise now uses caching objects from the cachem package by default. These caches support automatic pruning, so that they won't grow indefinitely. The older-style cache objects in the memoise package are still supported, but we suggest using new-style caches from cachem. (#112) * Name clashes between function arguments and variables defined when memoising no longer occur (@egnha, #43).