diff --git a/.travis.yml b/.travis.yml index 352761a..a0894be 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,19 @@ env: global: - secure: "QrhNWrS/nyBZKsydo2exbMSp2huBaCkRBAt0hWTYoY+9pSfbbNa9OD2elsRWUi1RztK9oiubxK3KoHOGOP3L6N4CiRJ2zIX4Z25OkDagphdeAXTAtAPG6as3+ucz/Ujl8shEvHuhrUeeuKvtNsVJQ0b0DHIDRXkzizDg7yWJiNk=" + - MEMOISE_PG_USER: postgres + - MEMOISE_PG_TABLE: cache + - MEMOISE_PG_DBNAME: cache language: r sudo: false cache: packages +services: + - postgres + +before_script: + - psql -U postgres -c "CREATE DATABASE cache;" + - psql -U postgres cache -c "CREATE TABLE cache (key varchar(255) PRIMARY KEY, val text);" after_success: - Rscript -e 'covr::codecov()' diff --git a/DESCRIPTION b/DESCRIPTION index 94b3e04..ae6595a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,6 +19,8 @@ Suggests: aws.s3, httr, covr, - googleCloudStorageR + googleCloudStorageR, + RPostgreSQL, + DBI License: MIT + file LICENSE -RoxygenNote: 6.1.0 +RoxygenNote: 6.1.1 diff --git a/NAMESPACE b/NAMESPACE index 159abbb..33a8a32 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -4,6 +4,7 @@ S3method(print,memoised) export(cache_filesystem) export(cache_gcs) export(cache_memory) +export(cache_postgresql) export(cache_s3) export(drop_cache) export(forget) diff --git a/NEWS.md b/NEWS.md index 02e39c3..3c67c0d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,5 @@ # Version 1.1.0.9000 +* Add postgres support via `cache_postgresql()` (#? - @mattyb) * Name clashes between function arguments and variables defined when memoising no longer occur (#43, @egnha). * Add Google Cloud Storage support via `cache_gcs()` (#59 - @MarkEdmondson1234) diff --git a/R/cache_postgresql.R b/R/cache_postgresql.R new file mode 100644 index 0000000..8074403 --- /dev/null +++ b/R/cache_postgresql.R @@ -0,0 +1,134 @@ +#' PostgreSQL Cache +#' PostgreSQL-backed cache, for remote caching. +#' +#' Create a table with key and val columns. +#' CREATE TABLE r_cache ( +#' key VARCHAR(128) PRIMARY KEY, +#' val TEXT +#' ) +#' +#' @examples +#' +#' \dontrun{ +#' pg <- cache_postgresql(pg_con, "r_cache") +#' mem_runif <- memoise(runif, cache = pg) +#' } +#' +#' +#' @param pg_con An RPostgreSQL connection object +#' @param table_name A postgres table name with val and key fields. May include a schema. +#' @param compress Argument passed to \code{saveRDS}. One of FALSE, "gzip", +#' "bzip2" or "xz". Default: FALSE. +#' @inheritParams cache_memory +#' @export +cache_postgresql <- function(pg_con, table_name, algo = "sha512", compress = FALSE) { + + path <- tempfile("memoise-") + dir.create(path) + + if (!(requireNamespace("RPostgreSQL"))) { stop("Package `RPostgreSQL` must be installed for `cache_postgresql()`.") } # nocov + if (!(requireNamespace("DBI"))) { stop("Package `DBI` must be installed for `cache_postgresql()`.") } # nocov + + cache_reset <- function() { + DBI::dbSendQuery( + pg_con, + paste0("DELETE FROM ", table_name) + ) + } + + cache_set <- function(key, value) { + temp_file <- file.path(path, key) + on.exit(unlink(temp_file)) + saveRDS(value, file = temp_file, compress = compress, ascii = TRUE) + size <- file.info(temp_file)$size + encoded <- readChar(temp_file, size, useBytes = TRUE) + try({ + DBI::dbSendQuery( + pg_con, + DBI::sqlInterpolate( + pg_con, + paste0( + "INSERT INTO ", table_name, + " VALUES (?key, ?encoded)" + ), + key = key, + encoded = encoded + ) + ) + }) + } + + cache_get <- function(key) { + temp_file <- file.path(path, key) + on.exit(unlink(temp_file)) + rs <- DBI::dbGetQuery( + pg_con, + DBI::sqlInterpolate( + pg_con, + paste0( + "SELECT val FROM ", table_name, + " WHERE key = ?key" + ), + key = key + ) + ) + writeChar(rs[1][[1]], temp_file) + + readRDS(temp_file) + } + + cache_has_key <- function(key) { + rs <- NULL + try({ + rs <- DBI::dbGetQuery( + pg_con, + DBI::sqlInterpolate( + pg_con, + paste0( + "SELECT 1 FROM ", table_name, + " WHERE key = ?key" + ), + key = key + ) + ) + }) + if (!is.null(rs) && nrow(rs) == 1) TRUE else FALSE + } + + cache_drop_key <- function(key) { + DBI::dbSendQuery( + pg_con, + DBI::sqlInterpolate( + pg_con, + paste0( + "DELETE FROM ", table_name, + " WHERE key = ?key" + ), + key = key + ) + ) + } + + cache_keys <- function() { + items <- DBI::dbGetQuery( + pg_con, + DBI::sqlInterpolate( + pg_con, + paste0( + "SELECT key FROM ", table_name + ) + ) + ) + items$key + } + + list( + digest = function(...) digest::digest(..., algo = algo), + reset = cache_reset, + set = cache_set, + get = cache_get, + has_key = cache_has_key, + drop_key = cache_drop_key, + keys = cache_keys + ) +} diff --git a/README.md b/README.md index 52deeb9..b82c3be 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,12 @@ devtools::install_github("r-lib/memoise") * `cache_filesystem()` allows caching using files on a local filesystem. You can point this to a shared file such as dropbox or google drive to share caches between systems. -* `cache_s3()` allows caching on [Amazon S3](https://aws.amazon.com/s3/) +* `cache_s3()`, `cache_gcs()`, and `cache_postgresql()` allows caching on remote + services, accessible from multiple app instances. Note that the implementation of + `forget()` on these systems will forget values for all functions in + the cache. You can use multiple caches to avoid this. +* `cache_s3()`, `cache_gcs()`, and `cache_postgresql()` do not create separate caches + if you memoise the same function. ## AWS S3 @@ -96,3 +101,18 @@ mrunif <- memoise(runif, cache = gcs) mrunif(10) # First run, saves cache mrunif(10) # Loads cache, results should be identical ``` + +## PostgreSQL + +`cache_postgresql` saves the cache to postgres table. It requires you to create an `RPostgreSQL` connection , and specify a pre-made table: + +```r +library(RPostgreSQL) +# Create Postgres connection +pg_con <- dbConnect(PostgreSQL()) + +pg <- cache_postgresql(pg_con, "cache_table") +mrunif <- memoise(runif, cache = pg) +mrunif(10) # First run, saves cache +mrunif(10) # Loads cache, results should be identical +``` diff --git a/man/cache_postgresql.Rd b/man/cache_postgresql.Rd new file mode 100644 index 0000000..b8ed628 --- /dev/null +++ b/man/cache_postgresql.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cache_postgresql.R +\name{cache_postgresql} +\alias{cache_postgresql} +\title{PostgreSQL Cache +PostgreSQL-backed cache, for remote caching.} +\usage{ +cache_postgresql(pg_con, table_name, algo = "sha512", compress = FALSE) +} +\arguments{ +\item{pg_con}{An RPostgreSQL connection object} + +\item{table_name}{A postgres table name with val and key fields. May include a schema.} + +\item{algo}{The hashing algorithm used for the cache, see +\code{\link[digest]{digest}} for available algorithms.} + +\item{compress}{Argument passed to \code{saveRDS}. One of FALSE, "gzip", +"bzip2" or "xz". Default: FALSE.} +} +\description{ +Create a table with key and val columns. +CREATE TABLE r_cache ( + key VARCHAR(128) PRIMARY KEY, + val TEXT +) +} +\examples{ + +\dontrun{ +pg <- cache_postgresql(pg_con, "r_cache") +mem_runif <- memoise(runif, cache = pg) +} + + +} diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R index af8164c..28bbb39 100644 --- a/tests/testthat/helper.R +++ b/tests/testthat/helper.R @@ -16,6 +16,17 @@ skip_without_aws_credentials <- function() { testthat::skip("No AWS Credentials") } +skip_without_postgres_credentials <- function() { + # -# Sys.setenv("MEMOISE_PG_USER" = "", "MEMOISE_PG_PASSWORD" = "") + # -# Sys.setenv("MEMOISE_PG_DBNAME" = "", "MEMOISE_PG_HOST" = "") + # -# Sys.setenv("MEMOISE_PG_TABLE" = ".") + if (nzchar(Sys.getenv("MEMOISE_PG_TABLE"))) { + return(invisible(TRUE)) + } + + testthat::skip("No PostgreSQL Credentials") +} + skip_on_travis_pr <- function() { if (identical(Sys.getenv("TRAVIS"), "true") && !identical(Sys.getenv("TRAVIS_PULL_REQUEST", "false"), "false")) { return(testthat::skip("On Travis PR")) diff --git a/tests/testthat/test-postgresql.R b/tests/testthat/test-postgresql.R new file mode 100644 index 0000000..171e5d7 --- /dev/null +++ b/tests/testthat/test-postgresql.R @@ -0,0 +1,55 @@ +context("postgresql") + +setup_cache <- function() { + pg_con <- DBI::dbConnect( + RPostgreSQL::PostgreSQL(), + user = Sys.getenv("MEMOISE_PG_USER"), + password = Sys.getenv("MEMOISE_PG_PASSWORD"), + dbname = Sys.getenv("MEMOISE_PG_DBNAME"), + host = Sys.getenv("MEMOISE_PG_HOST") + ) + + cache_postgresql(pg_con, Sys.getenv("MEMOISE_PG_TABLE")) +} + +test_that("using a postgresql cache works", { + skip_without_postgres_credentials() + + pg <- setup_cache() + + i <- 0 + fn <- function() { i <<- i + 1; i } + fnm <- memoise(fn, cache = pg) + on.exit(forget(fnm)) + + expect_equal(fn(), 1) + expect_equal(fn(), 2) + expect_equal(fnm(), 3) + expect_equal(fnm(), 3) + expect_equal(fn(), 4) + expect_equal(fnm(), 3) + + expect_false(forget(fn)) + expect_true(forget(fnm)) + expect_equal(fnm(), 5) + + expect_true(drop_cache(fnm)()) + expect_equal(fnm(), 6) + + expect_true(is.memoised(fnm)) + expect_false(is.memoised(fn)) + drop_cache(fnm)() +}) + +test_that("two functions with the same arguments produce different caches", { + skip_without_postgres_credentials() + + pg <- setup_cache() + + f1 <- memoise(function() 1, cache = pg) + f2 <- memoise(function() 2, cache = pg) + + expect_equal(f1(), 1) + expect_equal(f2(), 2) + drop_cache(f1)() +})