Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: store URLs in environment to avoid issues with long filenames #1049

Merged
merged 12 commits into from
Apr 23, 2024
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Bug fixes

* `$len()` now correctly includes `null` values in the count (#1044).
* cache temporary file name for url file download (#1046)
etiennebacher marked this conversation as resolved.
Show resolved Hide resolved

## Polars R Package 0.16.0

Expand Down
24 changes: 15 additions & 9 deletions R/io_csv.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,9 @@ pl_scan_csv = function(
return(RPolarsRNullValues$new_named(unlist(null_values)))
}

stop("null_values arg must be a string OR unamed char vec OR named char vec")
Err_plain(
"null_values arg must be a string OR unamed char vec OR named char vec"
) |> unwrap(" in pl$scan_csv():")
DyfanJones marked this conversation as resolved.
Show resolved Hide resolved
})()

args$null_values = RNullValues
Expand Down Expand Up @@ -187,6 +189,7 @@ pl_read_csv = function(
unwrap("in pl$read_csv():")
}

cache_temp_file = new.env(parent = new.env())
check_is_link = function(path, reuse_downloaded, raise_error = FALSE) {
# do nothing let path fail on rust side
if (is.na(path)) {
Expand All @@ -213,16 +216,17 @@ check_is_link = function(path, reuse_downloaded, raise_error = FALSE) {
# try download file if valid url
if (!is.null(con)) {
close(con)
tmp_file = paste0(tempdir(), "/", make.names(actual_url))
if (isFALSE(reuse_downloaded) || isFALSE(file.exists(tmp_file))) {
download.file(url = actual_url, destfile = tmp_file)
message(paste("tmp file placed in \n", tmp_file))
if (is.null(cache_temp_file[[actual_url]]))
cache_temp_file[[actual_url]] <- tempfile()
if (isFALSE(reuse_downloaded) || isFALSE(file.exists(cache_temp_file[[actual_url]]))) {
download.file(url = actual_url, destfile = cache_temp_file[[actual_url]])
message(paste("tmp file placed in \n", cache_temp_file[[actual_url]]))
}

path = tmp_file # redirect path to tmp downloaded file
path = cache_temp_file[[actual_url]] # redirect path to tmp downloaded file
} else {
if (raise_error) {
stop("failed to locate file at path/url: ", path)
Err_plain(paste("failed to locate file at path/url:", path)) |> unwrap()
}
# do nothing let path fail on rust side
path = NULL
Expand All @@ -235,7 +239,8 @@ check_is_link = function(path, reuse_downloaded, raise_error = FALSE) {

list_to_datatype_vector = function(x) {
if (!is.list(x) || !is_named(x)) {
stop("could not interpret dtypes, must be a named list of DataTypes")
Err_plain("could not interpret dtypes, must be a named list of DataTypes") |>
unwrap()
}
datatype_vector = RPolarsDataTypeVector$new() # mutable
mapply(
Expand All @@ -247,7 +252,8 @@ list_to_datatype_vector = function(x) {
type = DataType$new(type)
}
if (!inherits(type, "RPolarsDataType")) {
stop("arg dtypes must be a named list of dtypes or dtype names")
Err_plain("arg dtypes must be a named list of dtypes or dtype names") |>
unwrap()
}
datatype_vector$push(name, type)
}
Expand Down
16 changes: 16 additions & 0 deletions tests/testthat/test-csv-read.R
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,19 @@ test_that("bad paths", {
"failed to locate file"
)
})

test_that("cache url tempfile", {
skip_if_offline()
url <- "https://vincentarelbundock.github.io/Rdatasets/csv/AER/BenderlyZwick.csv"
local_mocked_bindings(
download.file = function(...) invisible(NULL),
)
check_is_link(url, reuse_downloaded = TRUE)
attempt_1 = cache_temp_file[[url]]

check_is_link(url, reuse_downloaded = TRUE)
attempt_2 = cache_temp_file[[url]]

expect_true(!is.null(cache_temp_file[[url]]))
expect_equal(attempt_1, attempt_2)
})