Dev 0.1.9.9999 (#95)

* update renv * development version number * fix examples * epa moved clusters to different API call * update mocked test data * update news * update readme rendering * add 502 error message * valid arguments for echo * checkout v4 * use stat="sf_coordinates" * prettymapr * use `get_facilities` endpoint and update examples * update examples * update vignettes and mocked files * add prettymapr for vignette * update mocked results * adds covr * adds meta end point tests * get_qid test * arg = sdw instead of cwa * typo * fix #93 * update mocks * update mocks * update covr workflow * clean up unused internal functions * fix codecov secrets * add coverage dependency on xml2 --------- Co-authored-by: Michael Schramm <michael.schramm@agnet.tamu.edu>
mps9506 · May 15, 2024 · dcec5e5 · dcec5e5
1 parent a3b590e
commit dcec5e5
Show file tree

Hide file tree

Showing 64 changed files with 41,603 additions and 64,793 deletions.
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -29,7 +29,7 @@ jobs:
       R_KEEP_PKG_SOURCE: yes
 
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-pandoc@v2
 

diff --git a/.github/workflows/build-packagedown.yaml b/.github/workflows/build-packagedown.yaml
@@ -20,7 +20,7 @@ jobs:
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-pandoc@v2
 

diff --git a/.github/workflows/render-readme.yaml b/.github/workflows/render-readme.yaml
@@ -21,7 +21,7 @@ jobs:
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - uses: r-lib/actions/setup-pandoc@v2
 
@@ -32,15 +32,10 @@ jobs:
       - name: Install system dependencies
         run: |
           # install spatial dependencies
-          sudo add-apt-repository ppa:ubuntugis/ubuntugis-unstable
-          sudo apt update
-          sudo apt install \
-            libudunits2-dev \
-            libgdal-dev \
-            libgeos-dev \
-            libproj-dev \
-            libharfbuzz-dev \
-            libfribidi-dev
+          sudo apt-get install libgdal-dev libproj-dev libgeos-dev libudunits2-dev
+          # install systemfont and ragg dependencies
+          sudo apt-get install libharfbuzz-dev libfribidi-dev
+
 
       - uses: r-lib/actions/setup-renv@v2
       - uses: r-lib/actions/setup-r-dependencies@v2

diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -0,0 +1,60 @@
+# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
+# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
+on:
+  push:
+    branches: [main, master]
+  pull_request:
+    branches: [main, master]
+
+name: test-coverage
+
+jobs:
+  test-coverage:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+      CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: r-lib/actions/setup-r@v2
+        with:
+          use-public-rspm: true
+
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        with:
+          extra-packages: any::covr
+          needs: coverage
+
+      - name: Test coverage
+        run: |
+          cov <- covr::package_coverage(
+            quiet = FALSE,
+            clean = FALSE,
+            install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package")
+          )
+          covr::to_cobertura(cov)
+        shell: Rscript {0}
+
+      - uses: codecov/codecov-action@v4
+        with:
+          fail_ci_if_error: ${{ github.event_name != 'pull_request' && true || false }}
+          file: ./cobertura.xml
+          plugin: noop
+          disable_search: true
+          token: ${{ secrets.CODECOV_TOKEN }}
+
+      - name: Show testthat output
+        if: always()
+        run: |
+          ## --------------------------------------------------------------------
+          find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true
+        shell: bash
+
+      - name: Upload test results
+        if: failure()
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-test-failures
+          path: ${{ runner.temp }}/package
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: echor
 Type: Package
 Title: Access EPA 'ECHO' Data
-Version: 0.1.9
+Version: 0.1.9.9999
 Authors@R: 
     person(given = "Michael",
            family = "Schramm",
@@ -18,7 +18,7 @@ Description: An R interface to United States Environmental
     from <https://echo.epa.gov/>. 
 License: MIT + file LICENSE
 Encoding: UTF-8
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
 Imports:
   curl,
   dplyr,
@@ -48,6 +48,9 @@ Config/Needs/website:
   here,
   kableExtra,
   knitr,
+  prettymapr,
   raster,
   rmarkdown,
   sf
+Config/Needs/coverage: 
+  xml2
diff --git a/NAMESPACE b/NAMESPACE
@@ -32,5 +32,4 @@ importFrom(tibble,is_tibble)
 importFrom(tibble,tibble)
 importFrom(tidyr,gather_)
 importFrom(tidyr,pivot_longer)
-importFrom(tidyr,unnest_wider)
 importFrom(utils,URLencode)
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# echor 0.1.9.9999 (in development)
+* Update examples in `echoWaterGetFacilityInfo()`. The allowable values for `p_pcomp` were changed by EPA resulting in zero values returned. (fixes #94)
+* There is no longer an (unknown) upper limit on the values returned when requesting an `sf` dataframe. Previously, ECHO returned "clusters" instead of records when a large number of records were requested. ECHO no provides a different endpoint to request clusters. This package does not currently provide a function to access the clusters endpoint.
+
 # echor 0.1.9
 * when server responses != 200 or 202, functions return an invisible NULL with a message instead of an error and message. (fixes #87)
 * removed geojsonsf dependency.

diff --git a/R/air.R b/R/air.R
@@ -17,17 +17,17 @@
 #' ## These examples require an internet connection to run
 #'
 #' ## Retrieve table of facilities by bounding box
-#' echoAirGetFacilityInfo(xmin = '-96.407563',
-#' ymin = '30.554395',
-#' xmax = '-96.25947',
-#' ymax = '30.751984',
+#' echoAirGetFacilityInfo(p_c1lon = '-96.407563',
+#' p_c1lat = '30.554395',
+#' p_c2lon = '-96.25947',
+#' p_c2lat = '30.751984',
 #' output = 'df')
 #'
 #' ## Retrieve a simple features dataframe by bounding box
-#' spatialdata <- echoAirGetFacilityInfo(xmin = '-96.407563',
-#' ymin = '30.554395',
-#' xmax = '-96.25947',
-#' ymax = '30.751984',
+#' spatialdata <- echoAirGetFacilityInfo(p_c1lon = '-96.407563',
+#' p_c1lat = '30.554395',
+#' p_c2lon = '-96.25947',
+#' p_c2lat = '30.751984',
 #' output = 'sf')
 #'
 #' }
@@ -63,8 +63,12 @@ echoAirGetFacilityInfo <- function(output = "df", verbose = FALSE, ...) {
   queryDots <- queryList(valuesList)
 
   ## build the request URL statement
-  path <- "echo/air_rest_services.get_facility_info"
-  query <- paste("output=JSON", queryDots, sep = "&")
+  path <- "echo/air_rest_services.get_facilities"
+  ## responseset is the maximum number of records return on one page of paginated
+  ## results
+  responseSet <- 1000
+  baseParams <- paste("output=JSON", paste0("responseset=",responseSet), sep="&")
+  query <- paste(baseParams, queryDots, sep = "&")
   getURL <- requestURL(path = path, query = query)
 
   ## Make the request
@@ -87,6 +91,12 @@ echoAirGetFacilityInfo <- function(output = "df", verbose = FALSE, ...) {
 
   info <- httr::content(request)
 
+  ## if query returns an error message, print message and return invisible null
+  if(length(info$Results$Error$ErrorMessage)>0){
+    message(info$Results$Error$ErrorMessage)
+    return(invisible(NULL))
+  }
+
   ## return the query id
   qid <- info[["Results"]][["QueryID"]]
 
@@ -108,57 +118,33 @@ echoAirGetFacilityInfo <- function(output = "df", verbose = FALSE, ...) {
   ## if df return output from air_rest_services.get_download
   if (output == "df") {
 
-    if (n_records <= 100000) {
-
-      buildOutput <- getDownload("caa",
-                                 qid,
-                                 qcolumns,
-                                 col_types = colTypes)
-    } else {
-
-      # number of pages returned is n_records/5000
-      pages <- ceiling(n_records/5000)
-      # create the progress bar
-      pb <- progress_bar$new(total = pages)
-
-      buildOutput <- getQID("caa",
-                            qid,
-                            qcolumns,
-                            page = 1)
-      pb$tick()
-
-      for (i in 2:pages) {
-        buildOutput <- bind_rows(buildOutput,
-                                 getQID("caa",
-                                        qid,
-                                        qcolumns,
-                                        page = i))
-        Sys.sleep(0.5)
-        pb$tick()
-      }
-    }
+    buildOutput <- getDownload("caa",
+                               qid,
+                               qcolumns,
+                               col_types = colTypes)
+
     return(buildOutput)
   }
 
   ## if df return output from air_rest_services.get_geojson
   if (output == "sf") {
 
-    ## if returns clusters, there are to many records to
-    ## return records via geojson and the request needs to
-    ## be more specific. I'm not sure how many records are too
-    ## many. If the length of facilities == 0, it means
-    ## the query either return no records, or the request returned
-    ## clusters and we can stop the function and return a message.
-    if(length(info[["Results"]][["Facilities"]]) == 0) {
-      if(n_records > 0) {
-        message("Too many records to return spatial a object, please subset your request and try again.")
-        return(invisible(NULL))
-      }
-      if(n_records == 0) {
-        message("No records returned in your request")
-        return(invisible(NULL))
-      }
-    }
+    # ## if returns clusters, there are to many records to
+    # ## return records via geojson and the request needs to
+    # ## be more specific. I'm not sure how many records are too
+    # ## many. If the length of facilities == 0, it means
+    # ## the query either return no records, or the request returned
+    # ## clusters and we can stop the function and return a message.
+    # if(length(info[["Results"]][["Facilities"]]) == 0) {
+    #   if(n_records > 0) {
+    #     message("Too many records to return spatial a object, please subset your request and try again.")
+    #     return(invisible(NULL))
+    #   }
+    #   if(n_records == 0) {
+    #     message("No records returned in your request")
+    #     return(invisible(NULL))
+    #   }
+    # }
 
     buildOutput <- getGeoJson("caa",
                               qid,

diff --git a/R/sdw.R b/R/sdw.R
@@ -59,7 +59,6 @@ echoSDWGetMeta <- function(verbose = FALSE){
 #' Uses EPA's ECHO API: \url{https://echo.epa.gov/tools/web-services/facility-search-drinking-water#!/Safe_Drinking_Water/get_sdw_rest_services_get_systems}.
 #' @param verbose Logical, indicating whether to provide processing and retrieval messages. Defaults to FALSE
 #' @param ... Further arguments passed as query parameters in request sent to EPA ECHO's API. For more options see: \url{https://echo.epa.gov/tools/web-services/facility-search-drinking-water#!/Safe_Drinking_Water/get_sdw_rest_services_get_systems} for a complete list of parameter options. Examples provided below.
-#' @importFrom purrr map
 #' @import httr
 #' @import dplyr
 #' @return returns a dataframe
@@ -102,7 +101,11 @@ echoSDWGetSystems <- function(verbose = FALSE, ...) {
 
   ## build the request URL statement
   path <- "echo/sdw_rest_services.get_systems"
-  query <- paste("output=JSON", queryDots, sep = "&")
+  ## responseset is the maximum number of records return on one page of paginated
+  ## results
+  responseSet <- 1000
+  baseParams <- paste("output=JSON", paste0("responseset=",responseSet), sep="&")
+  query <- paste(baseParams, queryDots, sep = "&")
   getURL <- requestURL(path = path, query = query)
 
   ## Make the request
@@ -125,6 +128,13 @@ echoSDWGetSystems <- function(verbose = FALSE, ...) {
 
   info <- httr::content(request)
 
+  ## if query returns an error message, print message and return invisible null
+  if(length(info$Results$Error$ErrorMessage)>0){
+    message(info$Results$Error$ErrorMessage)
+    return(invisible(NULL))
+  }
+
+
   ## return the query id
   qid <- info[["Results"]][["QueryID"]]
 
@@ -150,36 +160,10 @@ echoSDWGetSystems <- function(verbose = FALSE, ...) {
 
   colTypes <- columnsToParse(program = "sdw", colNums)
 
-  ## if <= 100000 records use getDownload
-  if (n_records <= 100000) {
-
-    buildOutput <- getDownload("sdw",
-                               qid,
-                               qcolumns,
-                               col_types = colTypes)
-  } else {
-
-    # number of pages returned is n_records/5000
-    pages <- ceiling(n_records/5000)
-    # create the progress bar
-    pb <- progress_bar$new(total = pages)
-
-    buildOutput <- getQID("cwa",
-                          qid,
-                          qcolumns,
-                          page = 1)
-    pb$tick()
-
-    for (i in 2:pages) {
-      buildOutput <- bind_rows(buildOutput,
-                               getQID("sdw",
-                                      qid,
-                                      qcolumns,
-                                      page = i))
-      Sys.sleep(0.5)
-      pb$tick()
-    }
 
-  }
+  buildOutput <- getDownload("sdw",
+                             qid,
+                             qcolumns,
+                             col_types = colTypes)
   return(buildOutput)
 }