Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add google cloud storage funcs #722

Merged
merged 13 commits into from
Jan 10, 2022
3 changes: 3 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ Suggests:
fst (>= 0.9.2),
future (>= 1.19.1),
future.callr (>= 0.6.0),
googleCloudStorageR (>= 0.6.0),
gt (>= 0.2.2),
keras (>= 2.2.5.0),
markdown (>= 1.1),
Expand All @@ -90,6 +91,8 @@ Suggests:
torch (>= 0.1.0),
usethis (>= 1.6.3),
visNetwork (>= 2.0.9)
Remotes:
MarkEdmondson1234/googleCloudStorageR,
wlandau marked this conversation as resolved.
Show resolved Hide resolved
Encoding: UTF-8
Language: en-US
VignetteBuilder: knitr
Expand Down
106 changes: 106 additions & 0 deletions R/utils_gcp.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Semi-automated tests of Google Cloud Storage integration live in tests/gcp/. # nolint
# These tests should not be fully automated because they
# automatically create S3 buckets and upload data,
# which could put an unexpected and unfair burden on
# external contributors from the open source community.
# nocov start
gcp_gcs_exists <- function(key,
bucket = gcp_gcs_bucket(),
version = NULL) {
tryCatch(
gcp_gcs_head_true(
key = key,
bucket = bucket,
version = version
),
error = function(condition) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there an error condition that would allow gcp_gcs_exists() to tell if the object really does not exist (as opposed to a configuration error etc.)? When testing locally, gcp_gcs_exists() at first incorrectly returned FALSE (silently) because I had the wrong version of googleCloudStorageR installed and gcs_get_object() could not accept a generation argument. Detecting HTML 400 errors has been helpful for AWS:

http_400 = function(condition) {

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rlang::abort() has a class argument to give custom classes to errors which can then be detected with tryCatch().

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FALSE
}
)
}

# to keep lines shorter
gcp_gcs_bucket <- function(){
googleCloudStorageR::gcs_get_global_bucket()
}

gcp_gcs_head <- function(
key,
bucket = gcp_gcs_bucket(),
version = NULL
) {
suppressMessages(
googleCloudStorageR::gcs_get_object(
key,
bucket = bucket,
meta = TRUE,
generation = version
)
)

MarkEdmondson1234 marked this conversation as resolved.
Show resolved Hide resolved
}

gcp_gcs_head_true <- function(
key,
bucket = gcp_gcs_bucket(),
version = NULL
) {
gcp_gcs_head(
key = key,
bucket = bucket,
version = version
)
TRUE
}

gcp_gcs_download <- function(
file,
key,
bucket = gcp_gcs_bucket(),
version = NULL
) {

googleCloudStorageR::gcs_get_object(
wlandau marked this conversation as resolved.
Show resolved Hide resolved
key,
bucket = bucket,
saveToDisk = file,
overwrite = TRUE,
generation = version
)

}


gcp_gcs_upload <- function(
file,
key,
bucket = gcp_gcs_bucket(),
metadata = list(),
predefined_acl = c(
"private", "bucketLevel", "authenticatedRead",
"bucketOwnerFullControl", "bucketOwnerRead",
"projectPrivate", "publicRead",
"default")
) {

predefined_acl <- match.arg(predefined_acl)

meta <- NULL
if(length(metadata) > 0){
meta <- googleCloudStorageR::gcs_metadata_object(
object_name = key,
metadata = metadata
)
}

googleCloudStorageR::gcs_upload(
MarkEdmondson1234 marked this conversation as resolved.
Show resolved Hide resolved
file,
bucket = bucket,
name = key,
object_metadata = meta,
predefinedAcl = predefined_acl
)

}

# nocov end
195 changes: 195 additions & 0 deletions tests/gcp/test-utils_gcp.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
tar_test("gcp_gcs_exists()", {
skip_if_no_gcp()
auth_gcp()
bucket <- random_bucket_name()
# needs to be a GCP project the tester auth has access to
projectId <- "gcp-setup-demo"
MarkEdmondson1234 marked this conversation as resolved.
Show resolved Hide resolved

googleCloudStorageR::gcs_create_bucket(bucket, projectId = projectId)
on.exit(gcp_gcs_delete_bucket(bucket))
expect_false(gcp_gcs_exists(key = "x", bucket = bucket))
tmp <- tempfile()
writeLines("x", tmp)
googleCloudStorageR::gcs_upload(tmp, bucket = bucket, name = "x")
expect_true(gcp_gcs_exists(key = "x", bucket = bucket))
})

tar_test("gcp_gcs_head()", {
skip_if_no_gcp()
auth_gcp()
bucket <- random_bucket_name()
# needs to be a GCP project the tester auth has access to
projectId <- "gcp-setup-demo"

googleCloudStorageR::gcs_create_bucket(bucket, projectId = projectId)
on.exit(gcp_gcs_delete_bucket(bucket))
expect_false(gcp_gcs_exists(key = "x", bucket = bucket))
tmp <- tempfile()
writeLines("x", tmp)
googleCloudStorageR::gcs_upload(tmp, bucket = bucket, name = "x")
expect_true(gcp_gcs_exists(key = "x", bucket = bucket))

head <- gcp_gcs_head(key = "x", bucket = bucket)
expect_true(inherits(head, "gcs_objectmeta"))
expect_true(is.character(head$etag))
expect_true(nzchar(head$etag))
})

tar_test("gcp_gcs_download()", {
skip_if_no_gcp()
auth_gcp()
bucket <- random_bucket_name()
# needs to be a GCP project the tester auth has access to
projectId <- "gcp-setup-demo"

googleCloudStorageR::gcs_create_bucket(bucket, projectId = projectId)
on.exit(gcp_gcs_delete_bucket(bucket))
expect_false(gcp_gcs_exists(key = "x", bucket = bucket))
tmp <- tempfile()
writeLines("x", tmp)
googleCloudStorageR::gcs_upload(tmp, bucket = bucket, name = "x")
tmp2 <- tempfile()
expect_false(file.exists(tmp2))
gcp_gcs_download(file = tmp2, key = "x", bucket = bucket)
expect_equal(readLines(tmp2), "x")
})

tar_test("gcp_gcs_upload() without headers", {
skip_if_no_gcp()
auth_gcp()
bucket <- random_bucket_name()
# needs to be a GCP project the tester auth has access to
projectId <- "gcp-setup-demo"

googleCloudStorageR::gcs_create_bucket(bucket, projectId = projectId)
on.exit(gcp_gcs_delete_bucket(bucket))

expect_false(gcp_gcs_exists(key = "x", bucket = bucket))
tmp <- tempfile()
writeLines("x", tmp)
gcp_gcs_upload(
file = tmp,
key = "x",
bucket = bucket
)
expect_true(gcp_gcs_exists(key = "x", bucket = bucket))
})

tar_test("gcp_gcs_upload() and download with metadata", {
skip_if_no_gcp()
auth_gcp()
bucket <- random_bucket_name()
# needs to be a GCP project the tester auth has access to
projectId <- "gcp-setup-demo"

googleCloudStorageR::gcs_create_bucket(bucket, projectId = projectId)
on.exit(gcp_gcs_delete_bucket(bucket))

expect_false(gcp_gcs_exists(key = "x", bucket = bucket))
tmp <- tempfile()
writeLines("x", tmp)
gcp_gcs_upload(
file = tmp,
key = "x",
bucket = bucket,
metadata = list("custom" = "custom_metadata")
)
expect_true(gcp_gcs_exists(key = "x", bucket = bucket))
head <- gcp_gcs_head(key = "x", bucket = bucket)
expect_equal(head$metadata$custom, "custom_metadata")
tmp2 <- tempfile()
expect_false(file.exists(tmp2))
gcp_gcs_download(file = tmp2, key = "x", bucket = bucket)
expect_equal(readLines(tmp2), "x")
})

tar_test("gcp_gcs upload twice, get the correct version", {
skip_if_no_gcp()
auth_gcp()
bucket <- random_bucket_name()
# needs to be a GCP project the tester auth has access to
projectId <- "gcp-setup-demo"
googleCloudStorageR::gcs_create_bucket(bucket,
projectId = projectId,
versioning = TRUE)
on.exit(gcp_gcs_delete_bucket(bucket))

tmp <- tempfile()
writeLines("first", tmp)
head_first <- gcp_gcs_upload(
file = tmp,
key = "x",
bucket = bucket,
metadata = list("custom" = "first-meta")
)
v1 <- head_first$generation
writeLines("second", tmp)
head_second <- gcp_gcs_upload(
file = tmp,
key = "x",
bucket = bucket,
metadata = list("custom" = "second-meta")
)
v2 <- head_second$generation
expect_true(gcp_gcs_exists(key = "x", bucket = bucket))
expect_true(gcp_gcs_exists(key = "x", bucket = bucket, version = v1))
expect_true(gcp_gcs_exists(key = "x", bucket = bucket, version = v2))
expect_false(gcp_gcs_exists(key = "x", bucket = bucket, version = "v3"))
h1 <- gcp_gcs_head(key = "x", bucket = bucket, version = v1)
h2 <- gcp_gcs_head(key = "x", bucket = bucket, version = v2)
expect_equal(h1$generation, v1)
expect_equal(h2$generation, v2)
expect_equal(h1$metadata$custom, "first-meta")
expect_equal(h2$metadata$custom, "second-meta")
unlink(tmp)
gcp_gcs_download(file = tmp, key = "x", bucket = bucket, version = v1)
expect_equal(readLines(tmp), "first")
gcp_gcs_download(file = tmp, key = "x", bucket = bucket, version = v2)
expect_equal(readLines(tmp), "second")
})

tar_test("gcp_gcs_upload: upload twice, get the correct version", {
skip_if_no_gcp()
auth_gcp()
bucket <- random_bucket_name()
# needs to be a GCP project the tester auth has access to
projectId <- "gcp-setup-demo"
googleCloudStorageR::gcs_create_bucket(bucket,
projectId = projectId,
versioning = TRUE)
on.exit(gcp_gcs_delete_bucket(bucket))

tmp <- tempfile()
writeLines("first", tmp)
head_first <- gcp_gcs_upload(
file = tmp,
key = "x",
bucket = bucket,
metadata = list("custom" = "first-meta")
)
v1 <- head_first$generation
writeLines("second", tmp)
head_second <- gcp_gcs_upload(
file = tmp,
key = "x",
bucket = bucket,
metadata = list("custom" = "second-meta")
)
v2 <- head_second$generation
expect_true(gcp_gcs_exists(key = "x", bucket = bucket))
expect_true(gcp_gcs_exists(key = "x", bucket = bucket, version = v1))
expect_true(gcp_gcs_exists(key = "x", bucket = bucket, version = v2))
expect_false(gcp_gcs_exists(key = "x", bucket = bucket, version = "v3"))
h1 <- gcp_gcs_head(key = "x", bucket = bucket, version = v1)
h2 <- gcp_gcs_head(key = "x", bucket = bucket, version = v2)
expect_equal(h1$generation, v1)
expect_equal(h2$generation, v2)
expect_equal(h1$metadata$custom, "first-meta")
expect_equal(h2$metadata$custom, "second-meta")
unlink(tmp)
gcp_gcs_download(file = tmp, key = "x", bucket = bucket, version = v1)
expect_equal(readLines(tmp), "first")
gcp_gcs_download(file = tmp, key = "x", bucket = bucket, version = v2)
expect_equal(readLines(tmp), "second")
})

39 changes: 39 additions & 0 deletions tests/testthat/helper-gcp.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
skip_if_no_gcp <- function() {
skip_if(Sys.getenv("GCS_AUTH_FILE") == "")
skip_if_not_installed("googleCloudStorageR")
skip_if_offline()
skip_on_cran()
}

auth_gcp <- function() {
# the auth service email in this file needs GCP "Storage Admin" IAM role
googleCloudStorageR::gcs_auth(Sys.getenv("GCS_AUTH_FILE"))
}

gcp_gcs_delete_bucket <- function(bucket){
# have to delete all objects in a bucket first
os <- googleCloudStorageR::gcs_list_objects(
bucket,
versions = TRUE,
detail = "full")

safe_delete <- function(x, bucket, version = NULL){
tryCatch({
googleCloudStorageR::gcs_delete_object(x,
bucket = bucket,
generation = version)
}, error = function(ex) {
NULL
})
}

lapply(os$name, safe_delete, bucket = bucket)
mapply(safe_delete,
x = os$name, version = os$generation,
MoreArgs = list(bucket = bucket))

bb <- googleCloudStorageR::gcs_get_bucket(bucket)

googleCloudStorageR::gcs_delete_bucket(bb$name)

}
MarkEdmondson1234 marked this conversation as resolved.
Show resolved Hide resolved
1 change: 1 addition & 0 deletions tests/testthat/test-tar_make_clustermq.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ tar_test("custom script and store args with callr function", {
tar_test("bootstrap builder for shortcut", {
skip_on_cran()
skip_on_os("windows")
skip_if_not_installed("clustermq")
wlandau marked this conversation as resolved.
Show resolved Hide resolved
tar_script({
options(clustermq.scheduler = "multicore")
list(
Expand Down