From b1aecacc4fb522a18e36b642e7633515940c2d51 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Fri, 28 Jan 2022 14:39:31 -0300 Subject: [PATCH 1/6] Improve how we download stuff: increase timout + download to temp than copy to avoid corrupted files living in the cache dir. --- R/dataset-cifar.R | 2 +- R/utils.R | 14 ++++++++++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/R/dataset-cifar.R b/R/dataset-cifar.R index 53fa8511..7f4dcb59 100644 --- a/R/dataset-cifar.R +++ b/R/dataset-cifar.R @@ -83,7 +83,7 @@ cifar10_dataset <- torch::dataset( p <- download_and_cache(self$url) if (!tools::md5sum(p) == self$md5) - runtime_error("Corrupt file!") + runtime_error(sprintf("Corrupt file! Delete the file in '%s' and try again.", p)) utils::untar(p, exdir = self$root) }, diff --git a/R/utils.R b/R/utils.R index 6bd80456..ede6427e 100644 --- a/R/utils.R +++ b/R/utils.R @@ -6,8 +6,18 @@ download_and_cache <- function(url, redownload = FALSE) { fs::dir_create(cache_path) path <- file.path(cache_path, fs::path_file(url)) - if (!file.exists(path) || redownload) - utils::download.file(url, path, mode = "wb") + if (!file.exists(path) || redownload) { + # we should first download to a temporary file because + # download probalems could cause hard to debug errors. + tmp <- tempfile(fileext = fs::path_ext(path)) + on.exit({try({fs::file_delete(tmp)}, silent = TRUE)}, add = TRUE) + + withr::with_options( + list(timeout = 600), + utils::download.file(url, tmp, mode = "wb") + ) + fs::file_move(tmp, path) + } path } From c821bf18174ba8e4957f3ab0e751fd2f677f0c15 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Fri, 28 Jan 2022 14:56:30 -0300 Subject: [PATCH 2/6] Handle all datasets with `dowload_and_cache`. --- R/dataset-mnist.R | 8 +++----- R/tiny-imagenet-dataset.R | 3 ++- R/utils.R | 6 +++++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/R/dataset-mnist.R b/R/dataset-mnist.R index 65c95f51..0e066873 100644 --- a/R/dataset-mnist.R +++ b/R/dataset-mnist.R @@ -64,13 +64,11 @@ mnist_dataset <- dataset( filename <- tail(strsplit(r[1], "/")[[1]], 1) destpath <- file.path(self$raw_folder, filename) - withr::with_options( - list(timeout = 600), - utils::download.file(r[1], destfile = destpath) - ) + p <- download_and_cache(r[1], prefix = class(self)[1]) + fs::file_copy(p, destpath) if (!tools::md5sum(destpath) == r[2]) - runtime_error("MD5 sums are not identical for file: {r[1}.") + runtime_error("MD5 sums are not identical for file: {r[1]}.") } diff --git a/R/tiny-imagenet-dataset.R b/R/tiny-imagenet-dataset.R index 0205d62b..f09180e3 100644 --- a/R/tiny-imagenet-dataset.R +++ b/R/tiny-imagenet-dataset.R @@ -41,7 +41,8 @@ tiny_imagenet_dataset <- torch::dataset( rlang::inform("Downloding tiny imagenet dataset!") - download.file(self$url, raw_path) + p <- download_and_cache(self$url) + fs::file_copy(p, raw_path) rlang::inform("Download complete. Now unzipping.") diff --git a/R/utils.R b/R/utils.R index ede6427e..1f145b30 100644 --- a/R/utils.R +++ b/R/utils.R @@ -1,9 +1,13 @@ -download_and_cache <- function(url, redownload = FALSE) { +download_and_cache <- function(url, redownload = FALSE, prefix = NULL) { cache_path <- rappdirs::user_cache_dir("torch") fs::dir_create(cache_path) + if (!is.null(prefix)) { + cache_path <- file.path(cache_path, prefix) + } + try(fs::dir_create(cache_path, recurse = TRUE), silent = TRUE) path <- file.path(cache_path, fs::path_file(url)) if (!file.exists(path) || redownload) { From 87545e672921f984c1c3341c5825786b75c70d0c Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Fri, 28 Jan 2022 15:04:36 -0300 Subject: [PATCH 3/6] Trigger build --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 04336e59..ea16d3d0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: torchvision Title: Models, Datasets and Transformations for Images -Version: 0.4.0.9000 +Version: 0.4.0.9001 Authors@R: c( person(given = "Daniel", family = "Falbel", From dbf7dad3bbfadbe1d931cb8a6f55f7942f17e6d9 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Fri, 28 Jan 2022 15:23:40 -0300 Subject: [PATCH 4/6] Add dependency on withr. --- DESCRIPTION | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ea16d3d0..82943801 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,8 @@ RoxygenNote: 7.1.2 Suggests: testthat, magick, - coro + coro, + withr Imports: torch (>= 0.3.0), fs, From e9374a09a340540c4cf9cbf925d213adcbca6535 Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Fri, 28 Jan 2022 15:38:37 -0300 Subject: [PATCH 5/6] Try to fix suspected memory error. --- tests/testthat/test-models-vgg.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-models-vgg.R b/tests/testthat/test-models-vgg.R index 200d505a..d035bfd7 100644 --- a/tests/testthat/test-models-vgg.R +++ b/tests/testthat/test-models-vgg.R @@ -23,7 +23,7 @@ test_that("vgg models works", { for (m in vggs) { model <- m(pretrained = TRUE) - expect_tensor_shape(model(torch_ones(5, 3, 224, 224)), c(5, 1000)) + expect_tensor_shape(model(torch_ones(1, 3, 224, 224)), c(1, 1000)) rm(model) gc() From 8182802de702f1f4ec2b989c56800282e25c44db Mon Sep 17 00:00:00 2001 From: Daniel Falbel Date: Fri, 28 Jan 2022 16:11:36 -0300 Subject: [PATCH 6/6] Skip VGG on CI --- tests/testthat/test-models-vgg.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-models-vgg.R b/tests/testthat/test-models-vgg.R index d035bfd7..3b02c061 100644 --- a/tests/testthat/test-models-vgg.R +++ b/tests/testthat/test-models-vgg.R @@ -18,8 +18,9 @@ test_that("vgg models works", { } - skip_on_os(os = "mac") # not downloading a bunch of files locally. - skip_on_os(os = "windows") # not downloading a bunch of files locally. + skip_on_ci() # unfortunatelly we don't have anough RAM on CI for that. + #skip_on_os(os = "mac") # not downloading a bunch of files locally. + #skip_on_os(os = "windows") # not downloading a bunch of files locally. for (m in vggs) { model <- m(pretrained = TRUE)