Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transition from httr to httr2 #198

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Imports:
dbplyr (>= 1.1.0),
dplyr (>= 0.7.4),
httr (>= 1.3.1),
httr2,
lubridate (>= 1.6.0),
rappdirs (>= 0.3.1),
readr (>= 1.1.1),
Expand Down
36 changes: 21 additions & 15 deletions R/download.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,15 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) {
new_hydat <- hy_remote()
# Make the download URL
url <- paste0(hy_base_url(), "Hydat_sqlite3_", new_hydat, ".zip")
response <- httr::HEAD(url)
httr::stop_for_status(response)
size <- round(as.numeric(httr::headers(response)[["Content-Length"]]) / 1000000, 0)
req <- httr2::request(url)
req <- httr2::req_method(req, "HEAD")
req <- tidyhydat_agent(req)
req <- tidyhydat_perform(req)
httr2::resp_check_status(req)

size <- round(as.numeric(
httr2::resp_header(req, "Content-Length")
) / 1000000, 0)


## Do we need to download a new version?
Expand All @@ -77,11 +83,10 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) {
if (!dl_overwrite) {
info("HYDAT is updated on a quarterly basis, check again soon for an updated version.")
}

if (new_hydat != existing_hydat & ask) { # New DB available or no local DB at all
msg <- paste0(
"Downloading HYDAT will take up to 10 minutes (",
size, " MB). \nThis will remove any older versions of HYDAT, if applicable. \nIs that okay?"
"This version of HYDAT is ", size, "MB in size and will take some time to download.
\nThis will remove any older versions of HYDAT, if applicable. \nIs that okay?"
)
ans <- ask(msg)
} else {
Expand All @@ -106,12 +111,10 @@ download_hydat <- function(dl_hydat_here = NULL, ask = TRUE) {
tmp <- tempfile("hydat_", fileext = ".zip")

## Download the zip file
res <- httr::GET(
url, httr::write_disk(tmp), httr::progress("down"),
httr::user_agent("https://github.com/ropensci/tidyhydat")
)
on.exit(file.remove(tmp), add = TRUE)
httr::stop_for_status(res)
hydb_req <- httr2::request(url)
hydb_req <- tidyhydat_agent(hydb_req)
resp <- tidyhydat_perform(hydb_req, path = tmp)
httr2::resp_check_status(resp)

## Extract the file to a temporary dir
if (file.exists(tmp)) info("Extracting HYDAT")
Expand Down Expand Up @@ -152,10 +155,13 @@ hy_remote <- function() {
# Run network check
network_check(hy_base_url())

x <- httr::GET(hy_base_url())
httr::stop_for_status(x)
req <- httr2::request(hy_base_url())
req <- tidyhydat_perform(req)
resp <- httr2::resp_check_status(req)


raw_date <- substr(
gsub("^.*\\Hydat_sqlite3_", "", httr::content(x, "text")),
gsub("^.*\\Hydat_sqlite3_", "", httr2::resp_body_string(req)),
1, 8
)

Expand Down
74 changes: 44 additions & 30 deletions R/realtime-webservice.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,10 @@
#' @export


realtime_ws <- function(station_number, parameters = NULL,
start_date = Sys.Date() - 30, end_date = Sys.Date()) {
# if (length(station_number) >= 300) {
# stop("Only 300 stations are supported for one request. If more stations are required,
# a separate request should be issued to include the excess stations. This second request can
# be issued on the same token if it isn't required.")
# }
realtime_ws <- function(station_number,
parameters = NULL,
start_date = Sys.Date() - 30,
end_date = Sys.Date()) {

if (is.null(parameters)) parameters <- c(46, 16, 52, 47, 8, 5, 41, 18)

Expand All @@ -93,17 +90,26 @@ realtime_ws <- function(station_number, parameters = NULL,


if (!grepl("[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]", start_date)) {
stop("Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats", call. = FALSE)
stop(
"Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats",
call. = FALSE
)
}

if (!grepl("[0-9]{4}-[0-1][0-9]-[0-3][0-9] [0-2][0-9]:[0-5][0-9]:[0-5][0-9]", end_date)) {
stop("Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats", call. = FALSE)
stop(
"Invalid date format. start_date need to be in either YYYY-MM-DD or YYYY-MM-DD HH:MM:SS formats",
call. = FALSE
)
}


if (!is.null(start_date) & !is.null(end_date)) {
if (lubridate::ymd_hms(end_date) < lubridate::ymd_hms(start_date)) {
stop("start_date is after end_date. Try swapping values.", call. = FALSE)
stop(
"start_date is after end_date. Try swapping values.",
call. = FALSE
)
}
}

Expand All @@ -114,64 +120,72 @@ realtime_ws <- function(station_number, parameters = NULL,

## Build link for GET
baseurl <- "https://wateroffice.ec.gc.ca/services/real_time_data/csv/inline?"


station_string <- paste0("stations[]=", station_number, collapse = "&")
parameters_string <- paste0("parameters[]=", parameters, collapse = "&")
date_string <- paste0("start_date=", substr(start_date, 1, 10), "%20", substr(start_date, 12, 19),
"&end_date=", substr(end_date, 1, 10), "%20", substr(end_date, 12, 19))
date_string <- paste0(
"start_date=", substr(start_date, 1, 10), "%20", substr(start_date, 12, 19),
"&end_date=", substr(end_date, 1, 10), "%20", substr(end_date, 12, 19)
)

## paste them all together
url_for_GET <- paste0(
query_url <- paste0(
baseurl,
station_string, "&",
parameters_string, "&",
date_string
)

## Get data
get_ws <- httr::GET(url_for_GET, httr::user_agent("https://github.com/ropensci/tidyhydat"))
req <- httr2::request(query_url)
req <- tidyhydat_agent(req)
resp <- httr2::req_perform(req)

## Give webservice some time
Sys.sleep(1)


## Check the GET status
httr::stop_for_status(get_ws)

if (httr::headers(get_ws)$`content-type` != "text/csv; charset=utf-8") {
stop("GET response is not a csv file")
## Check the respstatus
httr2::resp_check_status(resp)


if (httr2::resp_headers(resp)$`Content-Type` != "text/csv; charset=utf-8") {
stop("Response is not a csv file")
}

## Turn it into a tibble and specify correct column classes
csv_df <- httr::content(
get_ws,
type = "text/csv",
encoding = "UTF-8",
col_types = "cTidcci")
csv_df <- readr::read_csv(
httr2::resp_body_string(resp),
col_types = "cTidccc"
)


## Check here to see if csv_df has any data in it
if (nrow(csv_df) == 0) {
stop("No data exists for this station query")
}

## Rename columns to reflect tidyhydat naming
colnames(csv_df) <- c("STATION_NUMBER","Date","Parameter","Value","Grade","Symbol","Approval")
colnames(csv_df) <- c("STATION_NUMBER", "Date", "Parameter", "Value", "Grade", "Symbol", "Approval")

csv_df <- dplyr::left_join(
csv_df,
dplyr::select(tidyhydat::param_id, -Name_Fr),
by = c("Parameter")
)
csv_df <- dplyr::select(csv_df, STATION_NUMBER, Date, Name_En, Value, Unit,
Grade, Symbol, Approval, Parameter, Code)
csv_df <- dplyr::select(
csv_df, STATION_NUMBER, Date, Name_En, Value, Unit,
Grade, Symbol, Approval, Parameter, Code
)

## What stations were missed?
differ <- setdiff(unique(station_number), unique(csv_df$STATION_NUMBER))
if (length(differ) != 0) {
if (length(differ) <= 10) {
message("The following station(s) were not retrieved: ", paste0(differ, sep = " "))
message("Check station number for typos or if it is a valid station in the network")
}
else {
} else {
message("More than 10 stations from the initial query were not returned. Ensure realtime and active status are correctly specified.")
}
} else {
Expand All @@ -180,7 +194,7 @@ realtime_ws <- function(station_number, parameters = NULL,

p_differ <- setdiff(unique(parameters), unique(csv_df$Parameter))
if (length(p_differ) != 0) {
message("The following valid parameter(s) were not retrieved for at least one station you requested: ", paste0(p_differ, sep = " "))
message("The following valid parameter(s) were not retrieved for at least one station you requested: ", paste0(p_differ, sep = " "))
} else {
message("All parameters successfully retrieved")
}
Expand Down
4 changes: 2 additions & 2 deletions R/realtime_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ plot.realtime <- function(x = NULL, Parameter = c("Flow", "Level"), ...) {
Parameter <- match.arg(Parameter)

if (length(unique(x$STATION_NUMBER)) > 1L) {
stop("realtime plot methods only work with objects that contain one station", call. = FALSE)
stop("realtime plots only work with objects that contain one station", call. = FALSE)
}

if (is.null(x)) stop("Station(s) not present in the datamart")
if (is.null(x)) stop("Station not present in the datamart")

## Catch mis labelled parameter
if (Parameter == "Level" && ((nrow(x[x$Parameter == "Level", ]) == 0) | all(is.na(x[x$Parameter == "Level", ]$Value)))) {
Expand Down
20 changes: 18 additions & 2 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,12 @@ ask <- function(...) {
# issues and fail with an informative error
# message on where to download HYDAT.
#' @noRd
network_check <- function(url) {
tryCatch(httr::GET(url),
network_check <- function(url, proxy_url = NULL, proxy_port = NULL) {
req <- httr2::request(base_url = url)
if (!is.null(proxy_url) && !is.null(proxy_port)) {
req <- httr2::req_proxy(req, url = proxy_url, port = proxy_port)
}
tryCatch(httr2::req_perform(req),
error = function(e) {
if (grepl("Timeout was reached:", e$message)) {
stop(paste0("Could not connect to HYDAT source. Check your connection settings.
Expand All @@ -199,6 +203,12 @@ network_check <- function(url) {
)
}

tidyhydat_agent <- function(req) {
httr2::req_user_agent(
req,
string = "https://github.com/ropensci/tidyhydat")
}


#' Convenience function to pull station number from tidyhydat functions
#'
Expand Down Expand Up @@ -246,4 +256,10 @@ hy_expected_tbls <- function() {
is_mac <- function() {
system_info <- Sys.info()
grepl("darwin", tolower(system_info["sysname"]))
}

tidyhydat_perform <- function(req, ...) {
req <- httr2::req_retry(req, max_tries = 5)
req <- httr2::req_progress(req)
httr2::req_perform(req, ...)
}
7 changes: 7 additions & 0 deletions tests/testthat/_snaps/utils.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# downloading hydat fails behind a proxy server with informative error message

Could not connect to HYDAT source. Check your connection settings.
Try downloading HYDAT_sqlite3 from this url:
[http://collaboration.cmc.ec.gc.ca/cmc/hydrometrics/www/]
and unzipping the saved file to this directory: /Users/samalbers/Library/Application Support/tidyhydat

14 changes: 4 additions & 10 deletions tests/testthat/test_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,11 @@ test_that("hy_version returns a dataframe and works", {

test_that("downloading hydat fails behind a proxy server with informative error message", {
skip_on_cran()
httr::set_config(httr::use_proxy("64.251.21.73", 8080), override = TRUE)
skip_on_ci()
base_url_cmc <- "http://collaboration.cmc.ec.gc.ca/cmc/hydrometrics/www/"
expect_error(tidyhydat:::network_check(base_url_cmc), message = paste0(
"Error: Could not connect to HYDAT source.",
"Check your connection settings.",
"Try downloading HYDAT_sqlite3 from this url: ",
"[http://collaboration.cmc.ec.gc.ca/cmc/hydrometrics/www/]",
"and unzipping the saved file to this directory: ",
hy_dir()
))
httr::reset_config()
expect_snapshot_error(
tidyhydat:::network_check(base_url_cmc, "64.251.21.73", 8080)
)
})


Expand Down
Loading