Updated get_MAP() and expand_behavior() functions; removed get_MAP_in…

…ternal(); updated README to better reflect M3 model; updated new Google Analytics tag; added namespace sourcing for sf functions in Shiny app; updated package citation file
joshcullen · Aug 8, 2023 · f9f330f · f9f330f
1 parent d04a3b8
commit f9f330f
Show file tree

Hide file tree

Showing 12 changed files with 124 additions and 128 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -57,7 +57,7 @@ Suggests:
     datamods,
     viridis
 Language: en-US
-RoxygenNote: 7.1.1
+RoxygenNote: 7.2.3
 LinkingTo: 
     Rcpp,
     RcppArmadillo
diff --git a/NAMESPACE b/NAMESPACE
@@ -12,7 +12,6 @@ export(extract_prop)
 export(filter_time)
 export(find_breaks)
 export(get_MAP)
-export(get_MAP_internal)
 export(get_behav_hist)
 export(get_breakpts)
 export(insert_NAs)

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,9 @@
-# bayesmove 0.2.3 (2022-09-26)
+# bayesmove 0.2.3 (2023-08-08)
 * Removed table and inset map from Shiny app
 * Added dropdown option to color mapped points by a selected variable in Shiny app; not available for mapping lines
+* Updated speed and efficiency of get_MAP()
+* Fixed issue with expand_behavior() where wrong sample size was specified for track segments by ID
+* Updated suggested citations for use of package
 
 # bayesmove 0.2.2 (2021-11-01)
 * Fixed bug with Shiny app where it would return error based on number and/or frequency of unique IDs

diff --git a/R/LDA_helper_functions.R b/R/LDA_helper_functions.R
@@ -188,8 +188,8 @@ get_behav_hist=function(dat, nburn, ngibbs, nmaxclust, var.names) {
 
     behav.list[[i]]<- data.frame(bin = 1:nrow(tmp1), tmp1) %>%
       dplyr::rename_at(dplyr::vars(tidyr::starts_with('X')), ~as.character(1:ncol(tmp1))) %>%
-      tidyr::pivot_longer(-.data$bin, names_to = "behav", values_to = "prop") %>%
-      dplyr::arrange(.data$behav) %>%
+      tidyr::pivot_longer(-bin, names_to = "behav", values_to = "prop") %>%
+      dplyr::arrange(behav) %>%
       dplyr::mutate(var = var.names[i])
   }
 
@@ -262,12 +262,15 @@ expand_behavior=function(dat, theta.estim, obs, nbehav, behav.names, behav.order
   theta.estim1<- data.frame(id = obs$id, tseg = obs$tseg, theta.estim1)
   theta.estim1$id<- as.character(theta.estim1$id)
   names(theta.estim1)<- c("id", "tseg", behav.names)  #define behaviors
-  nobs<- data.frame(id = obs$id, tseg = obs$tseg,
-                    n = dat %>%
-                      dplyr::group_by(.data$id, .data$tseg) %>%
-                      dplyr::tally() %>%
-                      dplyr::ungroup() %>%
-                      dplyr::pull(.data$n))
+
+  nobs<- data.frame(id = obs$id, tseg = obs$tseg)
+  nobs <- dplyr::left_join(x = nobs,
+                           y = dat %>%
+                             dplyr::mutate(id = as.character(id)) %>%
+                             dplyr::group_by(id, tseg) %>%
+                             dplyr::tally() %>%
+                             dplyr::ungroup(),
+                           by = c('id', 'tseg'))
 
 
   for (i in 1:nrow(theta.estim1)) {
@@ -294,15 +297,15 @@ expand_behavior=function(dat, theta.estim, obs, nbehav, behav.names, behav.order
   ind1<- which(names(theta.estim2) != "id")
   theta.estim2<- theta.estim2 %>%
     dplyr::mutate_at(names(theta.estim2)[ind1], as.numeric) %>%
-    dplyr::select(.data$id, .data$tseg, .data$time1, .data$date, dplyr::everything())
+    dplyr::select(id, tseg, time1, date, dplyr::everything())
 
   #Change into long format
   theta.estim.long<- tidyr::pivot_longer(theta.estim2, cols = -c(1:4),
                                          names_to = "behavior", values_to = "prop")
   theta.estim.long$behavior<- factor(theta.estim.long$behavior,
                                      levels = behav.names[behav.order])
   theta.estim.long<- theta.estim.long %>%
-    dplyr::arrange(.data$behavior) %>%
+    dplyr::arrange(behavior) %>%
     dplyr::mutate_at("date", lubridate::as_datetime)
 
   theta.estim.long
@@ -378,8 +381,8 @@ assign_behavior=function(dat.orig, dat.seg.list, theta.estim.long, behav.names)
   for (i in 1:length(dat.seg.list)) {
     sub<- theta.estim.long[theta.estim.long$id == unique(dat.seg.list[[i]]$id),]
     sub<- sub %>%
-      dplyr::arrange(.data$tseg, .data$date, .data$behavior) %>%
-      tidyr::pivot_wider(names_from = .data$behavior, values_from = .data$prop)
+      dplyr::arrange(tseg, date, behavior) %>%
+      tidyr::pivot_wider(names_from = behavior, values_from = prop)
     sub<- sub %>%
       dplyr::mutate(behav = behav.names[apply(sub[,5:ncol(sub)], 1, which.max)])
 

diff --git a/R/segmentation_helper_functions.R b/R/segmentation_helper_functions.R
@@ -549,36 +549,6 @@ traceplot=function(data, type) {
 
 #---------------------------------------------
 
-#' Internal function to find the maximum a posteriori (MAP) estimate of the MCMC
-#' chain
-#'
-#' Internal function to be used by a wrapper.
-#'
-#' @param dat numeric. A vector of log marginal likelihood values for a given
-#'   animal ID.
-#' @param nburn numeric. The size of the burn-in phase after which the MAP
-#'   estimate will be identified.
-#'
-#' @return A numeric value indicating the iteration after the burn-in phase that
-#'   holds the MAP estimate.
-#'
-#'
-#'
-#' @export
-get_MAP_internal=function(dat, nburn) {
-
-  if (which.max(dat[-1]) < nburn) {
-    MAP.est<- dat[-1] %>%
-      order(decreasing = T)
-    MAP.est<- MAP.est[MAP.est > nburn][1]
-  } else {
-    MAP.est<- which.max(dat[-1])
-  }
-
-  return(MAP.est)
-}
-#---------------------------------------------
-
 #' Find the maximum a posteriori (MAP) estimate of the MCMC chain
 #'
 #' Identify the MCMC iteration that holds the MAP estimate. This will be used to
@@ -625,13 +595,12 @@ get_MAP_internal=function(dat, nburn) {
 #' }
 #'
 #' @export
-get_MAP=function(dat, nburn) {
-  MAP.est<- vector()
-  for (i in 1:nrow(dat)) {
-    MAP.est[i]<- get_MAP_internal(dat[i,], nburn)
-  }
+get_MAP = function(dat, nburn) {
 
-  MAP.est
+  tmp <- dat[,(nburn + 2):ncol(dat)]  #subset only columns after burn-in period
+  MAP.est <- as.integer(apply(tmp, 1, function(x) which.max(x)) + nburn)  #find max LML per ID
+
+  return(MAP.est)
 }
 #---------------------------------------------
 

diff --git a/R/shinyapp.R b/R/shinyapp.R
@@ -237,10 +237,10 @@ server <- function(data, epsg) {
         if (input$radio == "lines") {  #if wanting to plot tracks as lines
 
           tracks.sf <- dat.filt.sf %>%
-            st_as_sf(., coords = c('x','y'), crs = 4326) %>%
-            group_by(id) %>%
-            summarize(do_union = FALSE) %>%
-            st_cast("MULTILINESTRING")
+            sf::st_as_sf(coords = c('x','y'), crs = 4326) %>%
+            dplyr::group_by(id) %>%
+            dplyr::summarize(do_union = FALSE) %>%
+            sf::st_cast("MULTILINESTRING")
 
           map1 <- map1 %>%
               addPolylines(data = tracks.sf,
@@ -251,7 +251,7 @@ server <- function(data, epsg) {
         } else {  #if wanting to plot tracks as points
 
           tracks.sf <- dat.filt.sf %>%
-            st_as_sf(., coords = c('x','y'), crs = 4326)
+            sf::st_as_sf(coords = c('x','y'), crs = 4326)
 
           map1 <- map1 %>%
             addCircleMarkers(data = tracks.sf,
@@ -364,16 +364,16 @@ server <- function(data, epsg) {
       if (input$radio == "lines") {  #tracks as lines
 
         tracks.sf <- dat.filt.sf %>%
-          st_as_sf(., coords = c('x','y'), crs = 4326) %>%
-          group_by(id) %>%
-          summarize(do_union = FALSE) %>%
-          st_cast("MULTILINESTRING")
+          sf::st_as_sf(coords = c('x','y'), crs = 4326) %>%
+          dplyr::group_by(id) %>%
+          dplyr::summarize(do_union = FALSE) %>%
+          sf::st_cast("MULTILINESTRING")
 
         df.sf <- df() %>%
-          st_as_sf(., coords = c('x','y'), crs = 4326) %>%
-          group_by(id) %>%
-          summarize(do_union = FALSE) %>%
-          st_cast("MULTILINESTRING")
+          sf::st_as_sf(coords = c('x','y'), crs = 4326) %>%
+          dplyr::group_by(id) %>%
+          dplyr::summarize(do_union = FALSE) %>%
+          sf::st_cast("MULTILINESTRING")
 
 
         tracks.pal <- colorFactor("viridis", factor(df.sf$id))
@@ -404,10 +404,10 @@ server <- function(data, epsg) {
 
         # add full-length tracks per ID
         tracks.sf <- dat.filt.sf %>%
-          st_as_sf(., coords = c('x','y'), crs = 4326)
+          sf::st_as_sf(coords = c('x','y'), crs = 4326)
 
         df.sf <- df() %>%
-          st_as_sf(., coords = c('x','y'), crs = 4326, remove = FALSE)
+          sf::st_as_sf(coords = c('x','y'), crs = 4326, remove = FALSE)
 
         #define color palette depending on whether discrete or continuous var
         if (is.factor(df.sf[[input$track_col_var]]) | is.character(df.sf[[input$track_col_var]])) {

diff --git a/README.Rmd b/README.Rmd
@@ -26,9 +26,9 @@ knitr::opts_chunk$set(
 ## Introduction
 
 The goal of **bayesmove** is to analyze animal movement using a non-parametric Bayesian framework,
-which addresses a number of limitations of existing segmentation methods and state-space models. This package currently offers two different model frameworks on which to make behavioral inference from animal telemetry data: 1) **the mixed-membership method for movement (M4)** that provides segment-level behavioral state estimation, and 2) **observation-level behavioral state estimation**.
+which addresses a number of limitations of existing segmentation methods and state-space models. This package currently offers two different model frameworks on which to make behavioral inference from animal telemetry data: 1) **the mixed-membership method for movement (M4)** that provides *segment-level* behavioral state estimation, and 2) **the mixture model for movement (M3)** that provides *observation-level* behavioral state estimation.
 
-The M4 model is a two-stage framework that first partitions individual tracks into segments (via reversible-jump Markov chain Monte Carlo) and subsequently clusters these segments into latent behavioral states (via non-parametric Latent Dirichlet Allocation). This framework allows the analysis of multiple telemetry and biologging data streams, which must first be discretized into a set of bins before they can be analyzed. The model that makes behavioral inference at the observation-level also requires that data streams are first discretized, but then directly clusters these observations together into behavioral states within a single step (via a non-parametric mixture model). While the outcome is similar to that from state-space and hidden Markov models, this observation-level model does not assume an underlying Markov property or use a mechanistic process (e.g., correlated random walk). Additional details about the M4 method can be found in Cullen et al. (2021) [doi:10.1111/2041-210X.13745](https://doi.org/10.1111/2041-210X.13745).
+The M4 model is a two-stage framework that first partitions individual tracks into segments (via reversible-jump Markov chain Monte Carlo) and subsequently clusters these segments into latent behavioral states (via non-parametric Latent Dirichlet Allocation). This framework allows the analysis of multiple telemetry and biologging data streams, which must first be discretized into a set of bins before they can be analyzed. The  observation-level M3 model also requires that data streams are first discretized, but then directly clusters these observations together into behavioral states within a single step (via a non-parametric mixture model). While the outcome is similar to that from state-space and hidden Markov models, this observation-level model does not assume an underlying Markov property or use a mechanistic process (e.g., correlated random walk). Additional details about the M4 method can be found in Cullen et al. (2022) [doi:10.1111/2041-210X.13745](https://doi.org/10.1111/2041-210X.13745), while further details about the M3 method can be found in Valle et al. (2022) [doi:10.1002/eap.2524](https://doi.org/10.1002/eap.2524).
 
 This package also includes features to check model convergence based on the log-likelihood for each MCMC iteration. Model output are often returned in a format that is `tidyverse`-friendly, which allows for easy visualization using `ggplot2`. Additionally, a Shiny app can be launched to dynamically and interactively explore animal movement data, including options to filter and export data from the app.
 

diff --git a/README.md b/README.md
@@ -24,25 +24,28 @@ non-parametric Bayesian framework, which addresses a number of
 limitations of existing segmentation methods and state-space models.
 This package currently offers two different model frameworks on which to
 make behavioral inference from animal telemetry data: 1) **the
-mixed-membership method for movement (M4)** that provides segment-level
-behavioral state estimation, and 2) **observation-level behavioral state
-estimation**.
+mixed-membership method for movement (M4)** that provides
+*segment-level* behavioral state estimation, and 2) **the mixture model
+for movement (M3)** that provides *observation-level* behavioral state
+estimation.
 
 The M4 model is a two-stage framework that first partitions individual
 tracks into segments (via reversible-jump Markov chain Monte Carlo) and
 subsequently clusters these segments into latent behavioral states (via
 non-parametric Latent Dirichlet Allocation). This framework allows the
 analysis of multiple telemetry and biologging data streams, which must
 first be discretized into a set of bins before they can be analyzed. The
-model that makes behavioral inference at the observation-level also
-requires that data streams are first discretized, but then directly
-clusters these observations together into behavioral states within a
-single step (via a non-parametric mixture model). While the outcome is
-similar to that from state-space and hidden Markov models, this
-observation-level model does not assume an underlying Markov property or
-use a mechanistic process (e.g., correlated random walk). Additional
-details about the M4 method can be found in Cullen et al. (2021)
-[doi:10.1111/2041-210X.13745](https://doi.org/10.1111/2041-210X.13745).
+observation-level M3 model also requires that data streams are first
+discretized, but then directly clusters these observations together into
+behavioral states within a single step (via a non-parametric mixture
+model). While the outcome is similar to that from state-space and hidden
+Markov models, this observation-level model does not assume an
+underlying Markov property or use a mechanistic process (e.g.,
+correlated random walk). Additional details about the M4 method can be
+found in Cullen et al. (2022)
+[doi:10.1111/2041-210X.13745](https://doi.org/10.1111/2041-210X.13745),
+while further details about the M3 method can be found in Valle et
+al. (2022) [doi:10.1002/eap.2524](https://doi.org/10.1002/eap.2524).
 
 This package also includes features to check model convergence based on
 the log-likelihood for each MCMC iteration. Model output are often
@@ -80,16 +83,16 @@ remotes::install_github("joshcullen/bayesmove@dev")
 If installing from GitHub, ensure that you have a tool installed for
 compiling C++ code:
 
--   For PC’s running Windows, install
-    [Rtools](https://cran.r-project.org/bin/windows/Rtools/) if you have
-    not already done so.
+- For PC’s running Windows, install
+  [Rtools](https://cran.r-project.org/bin/windows/Rtools/) if you have
+  not already done so.
 
--   For Mac’s, install the [Command Line Tools for
-    Xcode](https://developer.apple.com/xcode/resources/) by executing
-    `xcode-select --install` in the terminal; or you can download the
-    latest version from the URL (free developer registration may be
-    required). A full Xcode install uses up a lot of disk space and is
-    not required.
+- For Mac’s, install the [Command Line Tools for
+  Xcode](https://developer.apple.com/xcode/resources/) by executing
+  `xcode-select --install` in the terminal; or you can download the
+  latest version from the URL (free developer registration may be
+  required). A full Xcode install uses up a lot of disk space and is not
+  required.
 
 ## Support
 

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -39,5 +39,14 @@ articles:
 
 
 template:
-  params:
-    ganalytics: UA-151357058-2
+  includes:
+    in_header: |
+      <!-- Google tag (gtag.js) -->
+      <script async src="https://www.googletagmanager.com/gtag/js?id=G-Z86M853GE5"></script>
+      <script>
+        window.dataLayer = window.dataLayer || [];
+        function gtag(){dataLayer.push(arguments);}
+        gtag('js', new Date());
+
+        gtag('config', 'G-Z86M853GE5');
+       </script>
diff --git a/inst/CITATION b/inst/CITATION
@@ -1,19 +1,52 @@
-note <- sprintf("R package version %s", meta$Version)
 
+citHeader("To cite bayesmove in publications, please use:")
 
-citHeader("To cite bayesmove in publications use:")
+citEntry(entry = "Article",
+  title        = "Identifying latent behavioural states in animal movement with M4, a nonparametric Bayesian method",
+  author       = personList(
+  as.person("Joshua A Cullen"),
+  as.person("Caroline L Poli"),
+  as.person("Robert J Fletcher"),
+  as.person("Denis Valle")
+  ),
+  year         = "2022",
+  journal      = "Methods in Ecology and Evolution",
+  volume       = 13,
+  number       = 2,
+  pages        = "432–446",
+  url          = "https://doi.org/10.1111/2041-210X.13745",
+  doi          = "10.1111/2041-210X.13745",
 
-citEntry(entry = "Manual",
-  title        = "{bayesmove}: Non-Parametric Bayesian Analyses of Animal Movement",
-  author       = personList(as.person("Joshua Cullen"),
-                   as.person("Denis Valle")),
-  year         = "2021",
-  note         = note,
-  url          = "https://joshcullen.github.io/bayesmove/",
+  textVersion  =
+  paste("Cullen JA, Poli CL, Fletcher RJ, Valle D. (2022).",
+        "Identifying latent behavioural states in animal movement with M4, a nonparametric Bayesian method.",
+        "Methods in Ecology and Evolution. 13(2):432–446.",
+        "DOI: 10.1111/2041-210X.13745.")
+)
+
+
+
+citEntry(entry = "Article",
+  title        = "Automatic selection of the number of clusters using Bayesian clustering and sparsity-inducing priors",
+  author       = personList(
+  as.person("Denis Valle"),
+  as.person("Yusuf Jameel"),
+  as.person("Brenda Betancourt"),
+  as.person("Ermias T Azeria"),
+  as.person("Nina Attis"),
+  as.person("Joshua Cullen")
+  ),
+  year         = "2022",
+  journal      = "Ecological Applications",
+  volume       = 32,
+  number       = 3,
+  pages        = "e2524",
+  url          = "https://doi.org/10.1002/eap.2524",
+  doi          = "10.1002/eap.2524",
 
   textVersion  =
-  paste("Cullen JA, Valle DR (2021).",
-        "bayesmove: Non-Parametric Bayesian Analysis of Animal Movement.",
-        "R Package version 0.2.1.",
-        "URL https://joshcullen.github.io/bayesmove/.")
+  paste("Valle D, Jameel Y, Betancourt B, Azeria ET, Attias N, Cullen J. (2022).",
+        "Automatic selection of the number of clusters using Bayesian clustering and sparsity-inducing priors.",
+        "Ecological Applications. 32(3):e2524.",
+        "DOI: 10.1002/eap.2524.")
 )
diff --git a/man/get_MAP_internal.Rd b/man/get_MAP_internal.Rd