Skip to content

Commit

Permalink
fix: store URLs in environment to avoid issues with long filenames (#…
Browse files Browse the repository at this point in the history
…1049)

Co-authored-by: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com>
  • Loading branch information
DyfanJones and etiennebacher committed Apr 23, 2024
1 parent 636daa0 commit 5754d06
Show file tree
Hide file tree
Showing 3 changed files with 34 additions and 10 deletions.
4 changes: 3 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ Also, there are some updates.

### Bug fixes

- `$len()` now correctly includes `null` values in the count (#1044).
* `$len()` now correctly includes `null` values in the count (#1044).
* In some read/scan functions, downloading files could fail if the URL was too
long. This is now fixed (#1049, @DyfanJones).

### Other improvements

Expand Down
24 changes: 15 additions & 9 deletions R/io_csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,9 @@ pl_scan_csv = function(
return(RPolarsRNullValues$new_named(unlist(null_values)))
}

stop("null_values arg must be a string OR unamed char vec OR named char vec")
Err_plain(
"null_values arg must be a string OR unamed char vec OR named char vec"
) |> unwrap("in pl$scan_csv():")
})()

args$null_values = RNullValues
Expand Down Expand Up @@ -187,6 +189,7 @@ pl_read_csv = function(
unwrap("in pl$read_csv():")
}

cache_temp_file = new.env(parent = new.env())
check_is_link = function(path, reuse_downloaded, raise_error = FALSE) {
# do nothing let path fail on rust side
if (is.na(path)) {
Expand All @@ -213,16 +216,17 @@ check_is_link = function(path, reuse_downloaded, raise_error = FALSE) {
# try download file if valid url
if (!is.null(con)) {
close(con)
tmp_file = paste0(tempdir(), "/", make.names(actual_url))
if (isFALSE(reuse_downloaded) || isFALSE(file.exists(tmp_file))) {
download.file(url = actual_url, destfile = tmp_file)
message(paste("tmp file placed in \n", tmp_file))
if (is.null(cache_temp_file[[actual_url]]))
cache_temp_file[[actual_url]] <- tempfile()
if (isFALSE(reuse_downloaded) || isFALSE(file.exists(cache_temp_file[[actual_url]]))) {
download.file(url = actual_url, destfile = cache_temp_file[[actual_url]])
message(paste("tmp file placed in \n", cache_temp_file[[actual_url]]))
}

path = tmp_file # redirect path to tmp downloaded file
path = cache_temp_file[[actual_url]] # redirect path to tmp downloaded file
} else {
if (raise_error) {
stop("failed to locate file at path/url: ", path)
Err_plain(paste("failed to locate file at path/url:", path)) |> unwrap()
}
# do nothing let path fail on rust side
path = NULL
Expand All @@ -235,7 +239,8 @@ check_is_link = function(path, reuse_downloaded, raise_error = FALSE) {

list_to_datatype_vector = function(x) {
if (!is.list(x) || !is_named(x)) {
stop("could not interpret dtypes, must be a named list of DataTypes")
Err_plain("could not interpret dtypes, must be a named list of DataTypes") |>
unwrap()
}
datatype_vector = RPolarsDataTypeVector$new() # mutable
mapply(
Expand All @@ -247,7 +252,8 @@ list_to_datatype_vector = function(x) {
type = DataType$new(type)
}
if (!inherits(type, "RPolarsDataType")) {
stop("arg dtypes must be a named list of dtypes or dtype names")
Err_plain("arg dtypes must be a named list of dtypes or dtype names") |>
unwrap()
}
datatype_vector$push(name, type)
}
Expand Down
16 changes: 16 additions & 0 deletions tests/testthat/test-csv-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,19 @@ test_that("bad paths", {
"failed to locate file"
)
})

test_that("cache url tempfile", {
skip_if_offline()
url <- "https://vincentarelbundock.github.io/Rdatasets/csv/AER/BenderlyZwick.csv"
local_mocked_bindings(
download.file = function(...) invisible(NULL),
)
check_is_link(url, reuse_downloaded = TRUE)
attempt_1 = cache_temp_file[[url]]

check_is_link(url, reuse_downloaded = TRUE)
attempt_2 = cache_temp_file[[url]]

expect_true(!is.null(cache_temp_file[[url]]))
expect_equal(attempt_1, attempt_2)
})

0 comments on commit 5754d06

Please sign in to comment.