From b0adc8124b2c6499c7a4bc9cc92a92949ce7b7bf Mon Sep 17 00:00:00 2001 From: Jens von Bergmann Date: Sun, 28 Jul 2019 00:07:57 -0700 Subject: [PATCH 1/3] better error messages when statcan server is unavailable --- R/cansim_helpers.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/R/cansim_helpers.R b/R/cansim_helpers.R index bbf81ebd..515e151d 100644 --- a/R/cansim_helpers.R +++ b/R/cansim_helpers.R @@ -42,6 +42,10 @@ response_status_code_translation <- list( "8"="Invalid number of reference periods" ) +response_error_translation <- list( + "503"="StatCan website is currently unavailable" +) + get_with_timeout_retry <- function(url,timeout=200,retry=3,path=NA){ if (!is.na(path)) { response <- purrr::safely(httr::GET)(url,httr::timeout(timeout),httr::write_disk(path,overwrite = TRUE)) @@ -56,6 +60,10 @@ get_with_timeout_retry <- function(url,timeout=200,retry=3,path=NA){ message("Got timeout from StatCan, giving up") response=response$result } + } else if (response$result$status_code %in% names(response_error_translation)){ + stop(sprintf("%s\nReturned status code %s",response_error_translation[[as.character(response$result$status_code)]], response$result$status_code),call.=FALSE) + } else if (response$result$status_code != 200){ + stop(sprintf("Problem downloading data, returned status code %s.",response$result$status_code),call.=FALSE) } else { response=response$result } From 99aac31ab5ab84bd1232d1f01d34220175fc02d6 Mon Sep 17 00:00:00 2001 From: Jens von Bergmann Date: Wed, 14 Aug 2019 14:42:14 -0700 Subject: [PATCH 2/3] Fix issue with new 6000 row limit. --- R/cansim_vectors.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/cansim_vectors.R b/R/cansim_vectors.R index a58776f0..44b2ee26 100644 --- a/R/cansim_vectors.R +++ b/R/cansim_vectors.R @@ -1,3 +1,5 @@ +MAX_PERIODS = 6000 + extract_vector_data <- function(data1){ vf=list("DECIMALS"="decimals", "VALUE"="value", @@ -104,7 +106,7 @@ get_cansim_vector<-function(vectors, start_time, end_time=Sys.Date(), use_ref_da else result <- tibble::tibble() } else { - result <- get_cansim_vector_for_latest_periods(vectors,periods=10000) %>% + result <- get_cansim_vector_for_latest_periods(vectors,periods=MAX_PERIODS) %>% filter(as.Date(.data$REF_DATE)>=start_time,as.Date(.data$REF_DATE)<=end_time) } result @@ -124,6 +126,10 @@ get_cansim_vector<-function(vectors, start_time, end_time=Sys.Date(), use_ref_da #' #' @export get_cansim_vector_for_latest_periods<-function(vectors, periods=1){ + if (periods*length(vectors)>MAX_PERIODS) { + periods=pmin(periods,floor(as.numeric(MAX_PERIODS)/length(vectors))) + warning(paste0("Can access at most ",MAX_PERIODS," data points, capping value to ",periods," periods per vector.")) + } vectors=gsub("^v","",vectors) # allow for leading "v" by conditionally stripping it url="https://www150.statcan.gc.ca/t1/wds/rest/getDataFromVectorsAndLatestNPeriods" vectors_string=paste0("[",paste(purrr::map(as.character(vectors),function(x)paste0('{"vectorId":',x,',"latestN":',periods,'}')),collapse = ", "),"]") From 4ae06b9ccf077219d30bc4d2eb92598a37ad7e0e Mon Sep 17 00:00:00 2001 From: Jens von Bergmann Date: Thu, 15 Aug 2019 16:41:56 -0700 Subject: [PATCH 3/3] Update documentation. --- DESCRIPTION | 2 +- NEWS.md | 8 +++ R/cansim_vectors.R | 63 +++++++++--------- docs/LICENSE-text.html | 4 +- docs/LICENSE.html | 4 +- docs/articles/cansim.html | 18 ++--- docs/articles/index.html | 4 +- docs/articles/listing_cansim_tables.html | 6 +- .../figure-html/unnamed-chunk-4-1.png | Bin 193972 -> 196058 bytes docs/articles/retrieving_cansim_vectors.html | 6 +- docs/articles/working_with_hierarchies.html | 6 +- docs/authors.html | 4 +- docs/index.html | 6 +- docs/news/index.html | 47 +++++++++---- .../add_provincial_abbreviations.html | 4 +- .../adjust_cansim_values_by_variable.html | 4 +- docs/reference/cansim_old_to_new.html | 4 +- docs/reference/categories_for_level.html | 4 +- docs/reference/correspondence.html | 4 +- docs/reference/get_cansim.html | 4 +- docs/reference/get_cansim_changed_tables.html | 4 +- .../get_cansim_column_categories.html | 4 +- docs/reference/get_cansim_column_list.html | 4 +- docs/reference/get_cansim_cube_metadata.html | 4 +- ...t_cansim_data_for_table_coord_periods.html | 4 +- docs/reference/get_cansim_ndm.html | 4 +- docs/reference/get_cansim_table_info.html | 4 +- docs/reference/get_cansim_table_list.html | 4 +- .../reference/get_cansim_table_list_page.html | 4 +- docs/reference/get_cansim_table_notes.html | 4 +- docs/reference/get_cansim_table_overview.html | 4 +- docs/reference/get_cansim_table_subject.html | 4 +- docs/reference/get_cansim_table_survey.html | 4 +- docs/reference/get_cansim_table_url.html | 4 +- docs/reference/get_cansim_vector.html | 4 +- .../get_cansim_vector_for_latest_periods.html | 4 +- docs/reference/get_cansim_vector_info.html | 4 +- docs/reference/index.html | 4 +- docs/reference/list_cansim_tables.html | 4 +- docs/reference/normalize_cansim_values.html | 4 +- .../reference/parse_and_fold_in_metadata.html | 4 +- docs/reference/search_cansim_tables.html | 4 +- docs/reference/view_cansim_webpage.html | 4 +- 43 files changed, 161 insertions(+), 133 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f4b90c37..af95262f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: cansim Type: Package Title: Accessing Statistics Canada Data Table and Vectors -Version: 0.3.0 +Version: 0.3.1 Authors@R: c( person("Jens", "von Bergmann", email = "jens@mountainmath.ca", role = c("cre")), person("Dmitry", "Shkolnik", email = "shkolnikd@gmail.com", role = c("aut"))) diff --git a/NEWS.md b/NEWS.md index 4cc6d43a..11c40bda 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,11 @@ +## cansim 0.3.1 + +### Major changes +- Fixes issues arising from StatCan changing their API row limit + +### Minor changes +- Optimize vector retrieval by REF_DATE + ## cansim 0.3.0 ### Major changes diff --git a/R/cansim_vectors.R b/R/cansim_vectors.R index 44b2ee26..2e11402e 100644 --- a/R/cansim_vectors.R +++ b/R/cansim_vectors.R @@ -10,8 +10,10 @@ extract_vector_data <- function(data1){ "frequencyCode"="frequencyCode", "SCALAR_ID"="scalarFactorCode") result <- purrr::map(data1,function(d){ + vdp <- d$object$vectorDataPoint + if (length(vdp)==0) {return(NULL)} value_data = lapply(vf,function(f){ - x=purrr::map(d$object$vectorDataPoint,function(cc)cc[[f]]) + x=purrr::map(vdp,function(cc)cc[[f]]) x[sapply(x, is.null)] <- NA unlist(x) }) %>% @@ -77,37 +79,36 @@ rename_vectors <- function(data,vectors){ #' @export get_cansim_vector<-function(vectors, start_time, end_time=Sys.Date(), use_ref_date=TRUE){ start_time=as.Date(start_time) - end_time=as.Date(end_time) - if (!use_ref_date) { - time_format="%Y-%m-%dT%H:%m" - vectors=gsub("^v","",vectors) # allow for leading "v" by conditionally stripping it - url="https://www150.statcan.gc.ca/t1/wds/rest/getBulkVectorDataByRange" - vectors_string=paste0('"vectorIds":[',paste(purrr::map(as.character(vectors),function(x)paste0('"',x,'"')),collapse = ", "),"]") - time_string=paste0('"startDataPointReleaseDate": "',strftime(start_time,time_format), - '","endDataPointReleaseDate": "',strftime(end_time,time_format),'"') - response <- post_with_timeout_retry(url, body=paste0("{",vectors_string,",",time_string,"}")) - if (is.null(response)) return(response) - if (response$status_code!=200) { - stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response)) - } - data <- httr::content(response) - data1 <- Filter(function(x)x$status=="SUCCESS",data) - data2 <- Filter(function(x)x$status!="SUCCESS",data) - if (length(data2)>0) { - message(paste0("Failed to load data for ",length(data2)," vector(s).")) - data2 %>% purrr::map(function(x){ - message(paste0("Problem downloading data: ",response_status_code_translation[as.character(x$object$responseStatusCode)])) - }) - } + original_end_time=as.Date(end_time) + if (use_ref_date) end_time=Sys.Date() else end_time=original_end_time + time_format="%Y-%m-%dT%H:%m" + vectors=gsub("^v","",vectors) # allow for leading "v" by conditionally stripping it + url="https://www150.statcan.gc.ca/t1/wds/rest/getBulkVectorDataByRange" + vectors_string=paste0('"vectorIds":[',paste(purrr::map(as.character(vectors),function(x)paste0('"',x,'"')),collapse = ", "),"]") + time_string=paste0('"startDataPointReleaseDate": "',strftime(start_time,time_format), + '","endDataPointReleaseDate": "',strftime(end_time,time_format),'"') + response <- post_with_timeout_retry(url, body=paste0("{",vectors_string,",",time_string,"}")) + if (is.null(response)) return(response) + if (response$status_code!=200) { + stop("Problem downloading data, status code ",response$status_code,"\n",httr::content(response)) + } + data <- httr::content(response) + data1 <- Filter(function(x)x$status=="SUCCESS",data) + data2 <- Filter(function(x)x$status!="SUCCESS",data) + if (length(data2)>0) { + message(paste0("Failed to load data for ",length(data2)," vector(s).")) + data2 %>% purrr::map(function(x){ + message(paste0("Problem downloading data: ",response_status_code_translation[as.character(x$object$responseStatusCode)])) + }) + } - if (length(data1)>0) - result <- extract_vector_data(data1) %>% - rename_vectors(vectors) - else - result <- tibble::tibble() - } else { - result <- get_cansim_vector_for_latest_periods(vectors,periods=MAX_PERIODS) %>% - filter(as.Date(.data$REF_DATE)>=start_time,as.Date(.data$REF_DATE)<=end_time) + if (length(data1)>0) + result <- extract_vector_data(data1) %>% rename_vectors(vectors) + else + result <- tibble::tibble() + if (use_ref_date) { + result <- result %>% + filter(as.Date(.data$REF_DATE)>=start_time,as.Date(.data$REF_DATE)<=original_end_time) } result } diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 93236908..13e95873 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -41,7 +41,7 @@ - + @@ -82,7 +82,7 @@ cansim - 0.3.0 + 0.3.1 diff --git a/docs/LICENSE.html b/docs/LICENSE.html index f8b2ae62..d2e28b73 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -41,7 +41,7 @@ - + @@ -82,7 +82,7 @@ cansim - 0.3.0 + 0.3.1 diff --git a/docs/articles/cansim.html b/docs/articles/cansim.html index 938f64d3..cf0565f7 100644 --- a/docs/articles/cansim.html +++ b/docs/articles/cansim.html @@ -18,7 +18,7 @@ - +