Merge pull request #146 from signaturescience/dev

v0.1.0
signaturescience · Jul 17, 2024 · 6e9fe6d · 6e9fe6d
2 parents a7ec413 + 5679f25
commit 6e9fe6d
Show file tree

Hide file tree

Showing 22 changed files with 469 additions and 99 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -11,3 +11,4 @@ scratch_dw.R
 ^pkgdown$
 ^doc$
 ^Meta$
+^cran-comments\.md$
diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml
@@ -36,7 +36,7 @@ jobs:
           needs: website
 
       - name: Build site
-        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
+        run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE, examples = FALSE)
         shell: Rscript {0}
 
       - name: Deploy to GitHub pages 🚀

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: rplanes
-Title: Plausibility Analysis of Epidemiological Signals in 'R'
-Version: 0.0.3
+Title: Plausibility Analysis of Epidemiological Signals
+Version: 0.1.0
 Authors@R: 
     c(person(given = "VP",
              family = "Nagraj",
@@ -13,7 +13,7 @@ Authors@R:
       person(given = "Amy",
              family = "Benefield",
              role = "aut"))
-Description: The 'rplanes' package provides functionality to prepare data and and analyze plausibility of both forecasted and reported epidemiological signals. The functions implement a set of plausbility algorithms that are agnostic to geographic and time resolutions and are calculated independently then presented as a combined score.
+Description: Provides functionality to prepare data and analyze plausibility of both forecasted and reported epidemiological signals. The functions implement a set of plausibility algorithms that are agnostic to geographic and time resolutions and are calculated independently then presented as a combined score.
 License: MIT + file LICENSE
 URL: https://signaturescience.github.io/rplanes/
 Depends: 

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,19 @@
+# rplanes 0.1.0
+
+## New features
+
+### Performance optimization for `plane_shape()`
+
+In this release, we have introduced a parameter to customize the "method" used to identify shapes within `plane_shape()`. Previously, the function was only able to use a Dynamic Time Warping ("dtw") algorithm to identify shapes in the time series. This approach involved calculations that were computationally expensive, particularly on datasets with multiple locations in the seed. We have introduced a second method that uses a scaled difference approach ("sdiff") to ascertain shapes. The "sdiff" option is set as the default, as it is much more computationally efficient than the "dtw" option. For more details on both of these methods see `?plane_shape()`.
+
+### Interpretation vignette
+
+The package now includes a narrative vignette that discusses how to interpret results from PLANES analysis. Topics include how to apply the weighting scheme in `plane_score()`, strategies to mitigate limitations that may arise from seed data, and considerations for operationally taking action based on plausibility scores.
+
+### More informative warning for missing data
+
+This release introduces messaging that communicates when a location has fewer time steps compared to others in the seed. The warning message is formatted as "{LOCATION} has fewer values than some or all of the locations. This may introduce issues in downstream plausibility analysis."
+
 # rplanes 0.0.3
 
 ## New features

diff --git a/R/planes.R b/R/planes.R
diff --git a/R/utils.R b/R/utils.R
@@ -377,7 +377,7 @@ create_sliding_windows_df <- function(vector, window_size) {
 #'
 #' @description
 #'
-#' This unexported helper is used inside of the individual plausibility component functions (e.g., `plane_diff()`) to validate that the location specified appears in both the input signal and seed.
+#' This unexported helper is used inside of the individual plausibility component functions (e.g., `plane_diff()`) to validate that the location specified appears in both the input signal and seed and that the location has as many values as other locations in the seed.
 #'
 #'
 #' @param location Character vector with location code; the location must appear in input and seed
@@ -398,7 +398,113 @@ valid_location <- function(location, input, seed) {
     stop(sprintf("%s does not appear in the input object. Check that the input was prepared with the location specified.", location))
   }
 
+  ## check to see if the location has fewer than max values
+  all_lengths <-
+    seed %>%
+    purrr::map(., "all_values") %>%
+    purrr::map(., function(x) x[!is.na(x)]) %>%
+    purrr::map_dbl(., length)
+
+  if(!all_lengths[location] == max(all_lengths)) {
+    warning(sprintf("%s has fewer values than some or all of the locations. This may introduce issues in downstream plausibility analysis.", location))
+  }
+
   ## if the validation proceeds this far return TRUE
   return(invisible(TRUE))
 
 }
+
+#' Determine shapes
+#'
+#' @description
+#'
+#' This unexported helper function is used to identify the shape in the `plane_shape()` function's scaled difference ("sdiff") method.
+#'
+#' @param input_data A data frame containing at least two columns, one of which must be named "value" with the value assessed and another named "dates" with the date for the observed data
+#' @param window_size The number of of categorical differences used to define the shape
+#'
+#' @return A vector with the shapes identified. Each element of the vector will include a shape, which is a cluster of categorical differences (of the same size as the specified "window_size") collapsed with ";" (e.g., `c("decrease;stable;stable;stable","stable;stable;stable;increase","stable;stable;increase;increase")`).
+#'
+#'
+get_shapes <- function(input_data, window_size) {
+
+  ## create temporary data with differences for each consecutive observed value
+  ## NOTE: this assumes that data is coming in with date ascending
+  ## the difference is scaled and centered
+  tmp_dat <-
+    input_data %>%
+    dplyr::mutate(diff = .data$value - dplyr::lag(.data$value)) %>%
+    dplyr::mutate(scaled_diff = scale(.data$diff)[,1]) %>%
+    ## chop off first row because the value - lag will be NA
+    dplyr::filter(dplyr::row_number() > 1) %>%
+    ## resort with the dates descending for processing below
+    dplyr::arrange(dplyr::desc(.data$date)) %>%
+    ## describe the categorical difference (inc, decr, stable) with cutter() helper
+    dplyr::mutate(cat_diff = purrr::map_chr(.data$scaled_diff, function(x) cutter(x)))
+
+  ## split the categorical difference into windowed chunks
+  tmp_shapes <-
+    tmp_dat %>%
+    dplyr::pull(.data$cat_diff) %>%
+    to_chunk(., window_size)
+
+  ## the chunking may result in a "remainder" from windows not having enough shapes in the chunk
+  ## need to figure out the remainder and then use that as n rows to trim below
+  rows_to_trim <- nrow(tmp_dat) %% window_size
+
+  ## add a column with the chunked categorical differences created above
+  ## this needs to be reversed to get the order correct and collapsed as a character vector
+  ## also need to rearrange the data ascending by date now
+  ## and lop off the number of "remainder" rows identified above
+  tmp_dat %>%
+    dplyr::mutate(window_shapes = tmp_shapes %>% purrr::map(., rev) %>% purrr::map_chr(., paste0, collapse  = ";")) %>%
+    dplyr::arrange(.data$date) %>%
+    dplyr::filter(dplyr::row_number() > rows_to_trim) %>%
+    dplyr::pull(.data$window_shapes)
+
+}
+
+
+#' Cut into categorical differences
+#'
+#' @description
+#'
+#' This unexported helper function takes an input number for an observed difference and cuts it into a categorical description (e.g., "increase", "decrease", or "stable") of the change.
+#'
+#' @param x Vector of length 1 with scaled difference to be categorized
+#' @param threshold Limit used to define the categorical differences; default is `1`
+#'
+#' @return Character vector of length 1 with the categorical description of difference
+#'
+cutter <- function(x, threshold = 1) {
+  if (x >= threshold) {
+    "increase"
+  } else if (x <= -threshold) {
+    "decrease"
+  } else {
+    "stable"
+  }
+}
+
+#' Chunk a vector
+#'
+#' @description
+#'
+#' This unexported helper function creates a list with contents of a vector spit into chunks. The user can specify how large each chunk should be with the "size" argument.
+#'
+#' @param x Vector to be split into chunks as large as the "size" specified
+#' @param size Width of the chunks for "x" vector
+#'
+#' @return A list with as many elements as the number of chunks created. Each element will include vector with a length equal to the "size" specified.
+#'
+to_chunk <- function(x, size) {
+
+  ## establish the beginning and end indices for the chunks
+  ind1 <- seq(1, length(x), by = 1)
+  ind2 <- ind1 + (size - 1)
+  ## the last of the end indices cant be greater than length of input vector
+  ind2[length(ind2)] <- length(x)
+
+  ## index the input vector in the specified chunk
+  purrr::map2(ind1, ind2, function(start,end) x[start:end])
+}
diff --git a/README.Rmd b/README.Rmd
@@ -19,17 +19,24 @@ knitr::opts_chunk$set(
 
 <!-- badges: start -->
 [![R-CMD-check](https://github.com/signaturescience/rplanes/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/signaturescience/rplanes/actions/workflows/R-CMD-check.yaml)
+[![CRAN status](https://www.r-pkg.org/badges/version/rplanes)](https://CRAN.R-project.org/package=rplanes)
 <!-- badges: end -->
 
-> **DEVELOPMENT STATUS**: The `rplanes` package is being actively developed. As of v0.0.2 the package is under alpha release. Users should be aware that there may be significant changes to the API in this phase.
-
 ## Introduction
 
 The `rplanes` package (**pl**ausibility **an**alysis of **e**pidemiological **s**ignals) provides functionality to prepare data and analyze plausibility of both forecasted and reported epidemiological signals. The functions implement a set of plausibility algorithms that are agnostic to geographic and time resolutions and are calculated independently and then presented as a combined score. 
 
 
 ## Installation
 
+The package is available to install from CRAN:
+
+``` r
+install.packages("rplanes")
+```
+
+Alternatively you can install the package from GitHub:
+
 ``` r
 #install.packages("remotes")
 remotes::install_github("signaturescience/rplanes", build_vignettes=TRUE)

diff --git a/README.md b/README.md
@@ -8,13 +8,10 @@
 <!-- badges: start -->
 
 [![R-CMD-check](https://github.com/signaturescience/rplanes/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/signaturescience/rplanes/actions/workflows/R-CMD-check.yaml)
+[![CRAN
+status](https://www.r-pkg.org/badges/version/rplanes)](https://CRAN.R-project.org/package=rplanes)
 <!-- badges: end -->
 
-> **DEVELOPMENT STATUS**: The `rplanes` package is being actively
-> developed. As of v0.0.2 the package is under alpha release. Users
-> should be aware that there may be significant changes to the API in
-> this phase.
-
 ## Introduction
 
 The `rplanes` package (**pl**ausibility **an**alysis of
@@ -26,6 +23,14 @@ independently and then presented as a combined score.
 
 ## Installation
 
+The package is available to install from CRAN:
+
+``` r
+install.packages("rplanes")
+```
+
+Alternatively you can install the package from GitHub:
+
 ``` r
 #install.packages("remotes")
 remotes::install_github("signaturescience/rplanes", build_vignettes=TRUE)

diff --git a/cran-comments.md b/cran-comments.md
@@ -0,0 +1,31 @@
+This is a re-upload of the initial CRAN submission for the package. We have responded to the suggestions from CRAN maintainers:
+
+- We have removed "in R" from the package title.
+- We no longer begin the description field with "this package".
+- We do not have any references to add to the description field.
+- We were previously using "\dontrun" in two examples, one of which demonstrated how to launch a web server with a wrapper function and the other took >45 seconds to execute. We have now switched to using "\donttest" for the long-running example.
+
+## Test environments
+
+- Local MacOS install, R 4.2.3
+- R hub
+    - Linux (Ubuntu 22.04.4 LTS, R-devel)
+    - Windows (Windows Server 2022 x64, R-devel)
+    - MacOS (macOS Ventura 13.6.7, R-devel)
+    - MacOS ARM 64 (macOS Sonoma 14.5, R-devel)
+
+## R CMD check results
+
+- Local `R CMD check`
+  - 0 errors | 0 warnings | 0 notes
+- R hub: 
+    - Linux (Ubuntu 22.04.4 LTS, R-devel)
+      - 0 errors | 0 warnings | 0 notes
+    - Windows (Windows Server 2022 x64, R-devel)
+      - 0 errors | 0 warnings | 0 notes
+    - MacOS (macOS Ventura 13.6.7, R-devel)
+      - 0 errors | 0 warnings | 0 notes
+    - MacOS ARM 64 (macOS Sonoma 14.5, R-devel)
+      - 0 errors | 0 warnings | 0 notes
+
+
diff --git a/inst/app/app.R b/inst/app/app.R
@@ -58,7 +58,9 @@ ui <- navbarPage(title = "rplanes Explorer",
                                                                                                numericInput("sig", "Significance (Trend)", value = 0.1, min = 0, max = 1, step = 0.01))),
                                                                            shinyjs::hidden(div(id = "args_repeat",
                                                                                                numericInput("tol", label = "Tolerance (Repeat)", value = 0, min = 0, max = 50, step = 1),
-                                                                                               numericInput("pre", label = "Prepend Values (Repeat)",  value = 0, min = 0, max = 365, step = 1)))
+                                                                                               numericInput("pre", label = "Prepend Values (Repeat)",  value = 0, min = 0, max = 365, step = 1))),
+                                                                           shinyjs::hidden(div(id = "args_shape",
+                                                                                               radioButtons("method", label = "Method (Shape)", choices = c("sdiff (Default)" = "sdiff", "Dynamic Time Warping" = "dtw"), selected = "sdiff")))
                                                        )),
                                                        actionBttn("run", "Analyze", style = "unite", color = "danger"),
                                                        actionBttn("reset", "Reset", style = "stretch", color = "warning")
@@ -126,6 +128,7 @@ server <- function(input, output, session){
     shinyjs::toggle(id = "weight_choices", condition = {input$custom_weights == "Custom"})
     shinyjs::toggle(id = "args_trend", condition = {"trend" %in% input$components})
     shinyjs::toggle(id = "args_repeat", condition = {"repeat" %in% input$components})
+    shinyjs::toggle(id = "args_shape", condition = {"shape" %in% input$components})
     })
 
   # update scoring options based on user input of observed or forecast comparison
@@ -288,13 +291,13 @@ server <- function(input, output, session){
     scoring <- eventReactive(input$run,{
 
       if (input$tol == 0 & input$pre == 0){
-        comp_args <- list(trend = list(sig_lvl = input$sig), `repeat` = list(prepend = NULL, tolerance = NULL))
+        comp_args <- list(trend = list(sig_lvl = input$sig), `repeat` = list(prepend = NULL, tolerance = NULL), shape = list(method = input$method))
       } else if (input$tol == 0){
-        comp_args <- list(trend = list(sig_lvl = input$sig), `repeat` = list(prepend = input$pre, tolerance = NULL))
+        comp_args <- list(trend = list(sig_lvl = input$sig), `repeat` = list(prepend = input$pre, tolerance = NULL), shape = list(method = input$method))
       } else if (input$pre == 0){
-        comp_args <- list(trend = list(sig_lvl = input$sig), `repeat` = list(prepend = NULL, tolerance = input$tol))
+        comp_args <- list(trend = list(sig_lvl = input$sig), `repeat` = list(prepend = NULL, tolerance = input$tol), shape = list(method = input$method))
       } else {
-        comp_args <- list(trend = list(sig_lvl = input$sig), `repeat` = list(prepend = input$pre, tolerance = input$tol))
+        comp_args <- list(trend = list(sig_lvl = input$sig), `repeat` = list(prepend = input$pre, tolerance = input$tol), shape = list(method = input$method))
       }
 
       ## handle weights

diff --git a/man/cutter.Rd b/man/cutter.Rd
diff --git a/man/get_shapes.Rd b/man/get_shapes.Rd
diff --git a/man/plane_score.Rd b/man/plane_score.Rd