diff --git a/NEWS.md b/NEWS.md index 074155557..4ffe698a2 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,7 +10,9 @@ Also, there are some updates. ### Bug fixes -- `$len()` now correctly includes `null` values in the count (#1044). +* `$len()` now correctly includes `null` values in the count (#1044). +* In some read/scan functions, downloading files could fail if the URL was too + long. This is now fixed (#1049, @DyfanJones). ### Other improvements diff --git a/R/io_csv.R b/R/io_csv.R index dfadc407c..4002eb6fc 100644 --- a/R/io_csv.R +++ b/R/io_csv.R @@ -134,7 +134,9 @@ pl_scan_csv = function( return(RPolarsRNullValues$new_named(unlist(null_values))) } - stop("null_values arg must be a string OR unamed char vec OR named char vec") + Err_plain( + "null_values arg must be a string OR unamed char vec OR named char vec" + ) |> unwrap("in pl$scan_csv():") })() args$null_values = RNullValues @@ -187,6 +189,7 @@ pl_read_csv = function( unwrap("in pl$read_csv():") } +cache_temp_file = new.env(parent = new.env()) check_is_link = function(path, reuse_downloaded, raise_error = FALSE) { # do nothing let path fail on rust side if (is.na(path)) { @@ -213,16 +216,17 @@ check_is_link = function(path, reuse_downloaded, raise_error = FALSE) { # try download file if valid url if (!is.null(con)) { close(con) - tmp_file = paste0(tempdir(), "/", make.names(actual_url)) - if (isFALSE(reuse_downloaded) || isFALSE(file.exists(tmp_file))) { - download.file(url = actual_url, destfile = tmp_file) - message(paste("tmp file placed in \n", tmp_file)) + if (is.null(cache_temp_file[[actual_url]])) + cache_temp_file[[actual_url]] <- tempfile() + if (isFALSE(reuse_downloaded) || isFALSE(file.exists(cache_temp_file[[actual_url]]))) { + download.file(url = actual_url, destfile = cache_temp_file[[actual_url]]) + message(paste("tmp file placed in \n", cache_temp_file[[actual_url]])) } - path = tmp_file # redirect path to tmp downloaded file + path = cache_temp_file[[actual_url]] # redirect path to tmp downloaded file } else { if (raise_error) { - stop("failed to locate file at path/url: ", path) + Err_plain(paste("failed to locate file at path/url:", path)) |> unwrap() } # do nothing let path fail on rust side path = NULL @@ -235,7 +239,8 @@ check_is_link = function(path, reuse_downloaded, raise_error = FALSE) { list_to_datatype_vector = function(x) { if (!is.list(x) || !is_named(x)) { - stop("could not interpret dtypes, must be a named list of DataTypes") + Err_plain("could not interpret dtypes, must be a named list of DataTypes") |> + unwrap() } datatype_vector = RPolarsDataTypeVector$new() # mutable mapply( @@ -247,7 +252,8 @@ list_to_datatype_vector = function(x) { type = DataType$new(type) } if (!inherits(type, "RPolarsDataType")) { - stop("arg dtypes must be a named list of dtypes or dtype names") + Err_plain("arg dtypes must be a named list of dtypes or dtype names") |> + unwrap() } datatype_vector$push(name, type) } diff --git a/tests/testthat/test-csv-read.R b/tests/testthat/test-csv-read.R index 9a4d874cd..bc2fa5cc8 100644 --- a/tests/testthat/test-csv-read.R +++ b/tests/testthat/test-csv-read.R @@ -181,3 +181,19 @@ test_that("bad paths", { "failed to locate file" ) }) + +test_that("cache url tempfile", { + skip_if_offline() + url <- "https://vincentarelbundock.github.io/Rdatasets/csv/AER/BenderlyZwick.csv" + local_mocked_bindings( + download.file = function(...) invisible(NULL), + ) + check_is_link(url, reuse_downloaded = TRUE) + attempt_1 = cache_temp_file[[url]] + + check_is_link(url, reuse_downloaded = TRUE) + attempt_2 = cache_temp_file[[url]] + + expect_true(!is.null(cache_temp_file[[url]])) + expect_equal(attempt_1, attempt_2) +})