Grouping species identity effects

rafamoral · May 4, 2023 · 4524912 · 4524912
1 parent 246e5c1
commit 4524912
Show file tree

Hide file tree

Showing 7 changed files with 201 additions and 48 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -20,3 +20,4 @@ Description: The 'DImodels' package is suitable for analysing data from biodiver
 License: GPL (>=2)
 NeedsCompilation: no
 Config/testthat/edition: 3
+RoxygenNote: 7.2.3
diff --git a/R/DI.R b/R/DI.R
diff --git a/R/dataDI.R b/R/dataDI.R
@@ -1,4 +1,4 @@
-DI_data_prepare <- function(y, block, density, prop, treat, FG = NULL, data, theta = 1) {
+DI_data_prepare <- function(y, block, density, prop, treat, FG = NULL, ID, data, theta = 1) {
   if(any(is.na(data))) {
     stop("The dataset contains missing values. Please remove them prior to analysis.") 
   }
@@ -83,6 +83,7 @@ DI_data_prepare <- function(y, block, density, prop, treat, FG = NULL, data, the
     ADD$ADD <- 0
   }
   newdata <- data.frame(newdata, ADD$ADD)
+  FGnames <- FG
   ## calculating FG variables
   if(!is.null(FG)) FG <- DI_data_FG(prop = prop, FG = FG, data = data, theta = theta)$FG
 
@@ -91,9 +92,17 @@ DI_data_prepare <- function(y, block, density, prop, treat, FG = NULL, data, the
   # Add variabes to data
   newdata <- cbind(newdata, FULL)
 
+  # Grouping ID effects 
+  if(missing(ID)){
+    ID <- paste0(prop, "_ID")
+  } 
+  ID_name_check(ID = ID, prop = prop, FG = FGnames)
+  grouped_ID <- group_IDs(data = data, prop = prop, ID = ID)
+  newdata <- cbind(newdata, grouped_ID)
+
   ## return object
   return(list("newdata" = newdata, "y" = y, "block" = block, density = density,
-              "prop" = prop, "treat" = treat, "FG" = FG,
+              "prop" = prop, "treat" = treat, "FG" = FG, ID = unique(ID), 
               "P_int_flag" = P_int_flag, "even_flag" = even_flag, "nSpecies" = length(prop)))
 }
 
@@ -361,6 +370,74 @@ add_name_check <- function(data){
     stop('Certain column names cause internal conflicts when calculating additive interactions. Please rename any columns containing the string \'_add\'.')
   }
 }
+
+ID_name_check <- function(ID, prop, FG){
+  cond1 <- length(grep(":", ID)) > 0
+  cond2 <- any(ID == "_")
+  cond3 <- any(ID == "i")
+  cond4 <- any(ID == "n")
+  cond5 <- any(ID == "f")
+  cond6 <- any(ID == "g")
+  cond7 <- any(ID == "_i")
+  cond8 <- any(ID == "in")
+  cond9 <- any(ID == "nf")
+  cond10 <- any(ID == "fg")
+  cond11 <- any(ID == "g_")
+  cond12 <- any(ID == "_in")
+  cond13 <- any(ID == "inf")
+  cond14 <- any(ID == "nfg")
+  cond15 <- any(ID == "fg_")
+  cond16 <- any(ID == "_inf")
+  cond17 <- any(ID == "infg")
+  cond18 <- any(ID == "nfg_")
+  cond19 <- any(ID == "_infg")
+  cond20 <- any(ID == "infg_")
+  cond21 <- any(ID == "_infg_")
+  cond22 <- length(grep(pattern = "^add", x = ID, ignore.case = TRUE)) > 0
+  cond23 <- length(grep(pattern = "^full", x = ID, ignore.case = TRUE)) > 0
+  cond24 <- length(grep(pattern = "^fg", x = ID, ignore.case = TRUE)) > 0
+  cond25 <- any(ID == "E")
+  cond26 <- any(ID == "AV")
+  if(cond1 | cond2 | cond3 | cond4 | cond5 | cond6 | cond7 |
+     cond8 | cond9 | cond10 | cond11 | cond12 | cond13 | cond14 |
+     cond15 | cond16 | cond17 | cond18 | cond19 | cond20 | cond21 | 
+     cond22 | cond23 | cond24| cond25 | cond26) {
+    stop("Please give your IDs a different name.",
+         " Names should not include colons (':'), or any single or multiple",
+         " character combination of the expressions '_infg_', 'ADD', 'FG', 'FULL', 'E', and 'AV'.",
+         " These expressions are reserved for internal computations.")
+  }
+  if(length(ID)!=length(prop)){
+    stop("'ID' should be a vector of same length as prop vector specifying the grouping structure for the ID effects")
+  }
+
+  if(any(ID %in% FG)){
+    stop("'ID' cannot have any names common with names of functional groups specified by 'FG'. Change the name of groups in either 'ID' or 'FG'")
+  }
+}
+
+group_IDs <- function(data, prop, ID){
+  # Species proportions to sum 
+  props <- data[, prop] 
+
+  # Unique IDs
+  uIDs <- unique(ID) 
+
+  # Number of unique ID terms
+  nIDs <- length(uIDs) 
+
+  sapply(uIDs, function(x){
+    # Index of prop to sum
+    idx <- which(ID == x) 
+    if(length(idx) == 1){
+      # No need to sum if only one element in group
+      result <- props[, idx]
+    } else {
+      # Sum of species proportions
+      result <-  rowSums(props[, idx])
+    }
+  })
+}
 #########################################
 
 DI_data <- function(prop, FG, data, theta = 1, what = c("E","AV","FG","ADD","FULL")) {

diff --git a/R/predictDI.R b/R/predictDI.R
@@ -28,6 +28,7 @@ predict.DI <- function (object, newdata, se.fit = FALSE, type = c("link",
   original_data <- object$original_data
   model_data <- eval(object$model)
   prop_cols <- eval(object$DIcall$prop)
+  ID_cols <- eval(object$DIcall$ID)
 
   if (is.numeric(prop_cols)){
     prop <- names(original_data[, prop_cols])
@@ -96,6 +97,15 @@ predict.DI <- function (object, newdata, se.fit = FALSE, type = c("link",
     updated_newdata <- newdata
   }
 
+  # Grouping ID terms 
+  # If not grouping was specified
+  if(is.null(ID_cols)){
+    ID_cols <- paste0(prop, "_ID")
+  }
+
+  grouped_IDs <- group_IDs(data = updated_newdata, prop = prop, ID = ID_cols)
+  updated_newdata <- cbind(updated_newdata, grouped_IDs)
+
   # Removing the dummy row added
   if (only_one_row) {
     updated_newdata <- updated_newdata[1,]
@@ -211,6 +221,14 @@ predict.DI <- function (object, newdata, se.fit = FALSE, type = c("link",
     if (DImodel_tag == 'ID' | DImodel_tag == 'STR'){
       original_data_updated <- original_data
     }
+    # Add grouped ID terms
+    if(is.null(ID_cols)){
+      ID_cols <- paste0(prop, "_ID")
+    }
+
+    grouped_IDs <- group_IDs(data = original_data_updated, prop = prop, ID = ID_cols)
+    original_data_updated <- cbind(original_data_updated, grouped_IDs)
+
     glm_fit <- glm(fmla, data = original_data_updated, family = object$family)
   }
 
@@ -287,6 +305,21 @@ contrasts_DI <- function(object, contrast, contrast_vars, ...){
     }
 
     og_data <- object$original_data
+    # Addtional check to ensure ID gropings work with contrasts
+    prop_cols <- eval(object$DIcall$prop)
+    ID_cols <- eval(object$DIcall$ID)
+
+    if (is.numeric(prop_cols)){
+      prop <- names(og_data[, prop_cols])
+    } else {
+      prop <- prop_cols
+    }
+    if(is.null(ID_cols)){
+      ID_cols <- paste0(prop, "_ID")
+    }
+
+    grouped_IDs <- group_IDs(data = og_data, prop = prop, ID = ID_cols)
+    og_data <- cbind(og_data, grouped_IDs)
     the_C <- matrix(0, ncol = length(betas))
     colnames(the_C) <- names(betas)
 

diff --git a/man/DI.Rd b/man/DI.Rd
@@ -31,7 +31,7 @@ This function will fit a wide range of Diversity-Interactions (DI) models, one a
 
 \usage{
 DI(y, prop, DImodel, custom_formula, data,
-   block, density, treat, FG, extra_formula,
+   block, density, treat, FG, ID, extra_formula,
    estimate_theta = FALSE, theta = 1)
 }
 
@@ -82,6 +82,14 @@ If species are classified by \emph{g} functional groups, this argument takes a t
 
   \item The \code{FG} argument is defunct when using the \code{custom_formula} argument, since species interactions must be included directly in the \code{custom_formula} argument. 
 }
+}
+
+  \item{ID}{
+  %%      ~~Describe \code{ID} here~~
+  If you wish to group the species identity effects, this argument takes a text list (of length \emph{s}) of the group to which each species identity effect belongs. For example, if there are four species and you wish to group the identity effects of species 1, 2, and 3, and let the identity effect of species 4 be estimated \code{ID} could be c("ID1", "ID1", "ID1", "ID2"). Note that the group names specified in FG and ID should be different.
+  \itemize{
+    \item The \code{ID} argument is defunct when using the \code{custom_formula} argument, since species identity effects must be included directly in the \code{custom_formula} argument.  
+}
 }
   \item{DImodel}{
 %%     ~~Describe \code{DImodel} here~~

diff --git a/tests/testthat/test-DI.R b/tests/testthat/test-DI.R
@@ -36,14 +36,14 @@ test_that("DI works with \"E\" model", {
                density = "density", DImodel = "E",
                extra_formula = ~p1:nitrogen, data = Switzerland)
   expect_equal(mod_E$coef,
-               c(8.19821278836538, 8.52407309971615, 15.4175280362485, 11.9330267505285, 5.155459, -0.640652903945622, 0.137755987617647, 0.400343881654642),
+               c(8.5985566700200202, 8.52407309971615, 15.4175280362485392, 11.9330267505285228, 5.1554594166571261, -0.6406529039456219, 0.13775598761764632, -0.40034388165464241),
                ignore_attr = TRUE)
 
   mod_E <- DI(y = "yield", prop = 4:7, block = "nitrogen",
                density = "density", DImodel = "E",
                extra_formula = ~p1:nitrogen, data = Switzerland)
   expect_equal(mod_E$coef,
-               c(8.19821278836538, 8.52407309971615, 15.4175280362485, 11.9330267505285, 5.155459, -0.640652903945622, 0.137755987617647, 0.400343881654642),
+               c(8.5985566700200202, 8.52407309971615, 15.4175280362485392, 11.9330267505285228, 5.1554594166571261, -0.6406529039456219, 0.13775598761764632, -0.40034388165464241),
                ignore_attr = TRUE)
 
   mod_E <- DI(y = "yield", prop = 4:7, estimate_theta = T,
@@ -181,6 +181,30 @@ test_that("FG model works with theta", {
 
 })
 
+## Conditions to check ID groupings
+test_that("ID grouping works", {
+  data("Switzerland")
+
+  expect_error(DI(y = "yield", DImodel = "AV", ID = c("G1", "G1"),
+                  prop = 4:7, data = Switzerland),
+               regexp = "'ID' should be a vector of same length as prop")
+
+  expect_error(DI(y = "yield", DImodel = "AV", ID = c("Add1", "Add1", "Add2", "Add3"),
+                  prop = 4:7, data = Switzerland),
+               regexp = "Please give your IDs a different name.")
+
+  expect_error(DI(y = "yield", DImodel = "FG", ID = c("G1", "G2", "G2", "G3"),
+                  FG = c("FG1", "FG6", "FG2", "G3"), prop = 4:7, data = Switzerland),
+               regexp = "'ID' cannot have any names common with names of functional groups specified by 'FG'")
+
+  data("Bell")
+  mod_group <- suppressWarnings(DI(y = "response", DImodel = "AV", ID = rep("ID1", 72),
+                  prop = paste0("p", 1:72), data = Bell, theta = 0.5))
+  rich_mod <- lm(response ~ richness, data = Bell)
+  expect_equal(predict(mod_group),
+               predict(rich_mod))
+})
+
 ## S3 methods for class DI (extract, AIC, BIC)
 test_that("S3 methods work", {
   data("Switzerland")
@@ -320,3 +344,13 @@ test_that("Interior functions in DI_internal work", {
                c(NA, 37.8141225527972, 11.5427716510587))
 })
 
+# Testing richness_vs_DI
+test_that("richness_vs_DI works", {
+  data("Switzerland")
+  ob <- richness_vs_DI(y = "yield", prop = 4:7,
+                       data = Switzerland)
+  my_mod <- DI(y = "yield", prop = 4:7, theta = 0.835712755619118286, 
+               DImodel = "AV", data = Switzerland)
+  expect_equal(predict(ob),
+               predict(my_mod))
+})
diff --git a/tests/testthat/test-predictDI.R b/tests/testthat/test-predictDI.R
@@ -229,7 +229,7 @@ test_that("contrasts function works", {
   # Using contrast_vars
   the_C <- matrix(c(1, 0, 0, 0, 0, 0, 0, 0), nrow = 1)
   colnames(the_C) <- names(mod$coefficients[1:8])
-  expect_equal(contrasts_DI(mod, contrast_vars = list("p1" = c(1))),
+  expect_equal(contrasts_DI(mod, contrast_vars = list("p1_ID" = c(1))),
                multcomp::glht(mod, linfct = the_C , 
                               coef = mod$coef[1:8], vcov = vcov(mod)),
                ignore_attr = TRUE)