From 047c027c6c5f1018ef15c915838d1bf47cc52682 Mon Sep 17 00:00:00 2001 From: Rishabh Vishwakarma Date: Tue, 21 Nov 2023 16:06:27 +0000 Subject: [PATCH] DImodels v1.3 --- DESCRIPTION | 3 +- R/DI.R | 122 ++++---- R/DI_internal.R | 54 ++-- R/autoDI.R | 15 +- R/autoDI_internal.R | 43 +-- R/dataDI.R | 97 +++++- R/predictDI.R | 536 +++++++++++++++++++------------- README.Rmd | 130 +++++++- README.md | 344 ++++++++++++++++---- man/DI.Rd | 36 ++- man/autoDI.Rd | 23 +- man/predict.DI.Rd | 12 +- tests/testthat/test-DI.R | 51 ++- tests/testthat/test-autoDI.R | 2 +- tests/testthat/test-dataDI.R | 7 + tests/testthat/test-predictDI.R | 169 +++++++++- vignettes/DImodels.Rmd | 121 ++++++- 17 files changed, 1355 insertions(+), 410 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fed0fe2..9d4b69d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: DImodels Type: Package Title: Diversity-Interactions (DI) Models -Version: 1.2.1 +Version: 1.3 Date: 2023-05-20 Authors@R: c(person("Rafael", "de Andrade Moral", role = c("aut", "cre"), email = "rafael.deandrademoral@mu.ie"), person("John", "Connolly", role = "aut"), person("Rishabh", "Vishwakarma", role = "ctb"), person("Caroline", "Brophy", role = "aut")) Author: Rafael de Andrade Moral [aut, cre], John Connolly [aut], Rishabh Vishwakarma [ctb], Caroline Brophy [aut] @@ -20,3 +20,4 @@ Description: The 'DImodels' package is suitable for analysing data from biodiver License: GPL (>=2) NeedsCompilation: no Config/testthat/edition: 3 +RoxygenNote: 7.2.3 diff --git a/R/DI.R b/R/DI.R index 676ebd3..1e3fa02 100644 --- a/R/DI.R +++ b/R/DI.R @@ -1,5 +1,5 @@ -DI <- function(y, prop, DImodel, custom_formula, data, - block, density, treat, FG, extra_formula, +DI <- function(y, prop, DImodel, custom_formula, data, + block, density, treat, ID, FG, extra_formula, estimate_theta = FALSE, theta = 1) { DIcall <- match.call() if(theta < 0){ @@ -84,6 +84,7 @@ DI <- function(y, prop, DImodel, custom_formula, data, if(DImodel == "FG" & is.null(FG)) { stop("The argument FG must be specified alongside DImodel = 'FG'.\n") } + if(!missing(custom_formula)) { if(DImodel != "CUSTOM") { warning("fitting custom DI model using supplied custom_formula instead of model ", DImodel, @@ -93,16 +94,25 @@ DI <- function(y, prop, DImodel, custom_formula, data, model_fit <- DI_CUSTOM(custom_formula = custom_formula, data = data, family = family, estimate_theta = estimate_theta, total = total) } else { - # preparing new data object - data_obj <- DI_data_prepare(y = y, block = block, density = density, prop = prop, treat = treat, FG = FG, data = data, theta = theta) - newdata <- data_obj$newdata - nSpecies <- data_obj$nSpecies + # Ensure there are enough species to fit different DI models + nSpecies <- length(prop) if(nSpecies <= 2 & DImodel == "E") { stop("you must have > 2 species to fit model ", DImodel, " (", namesub_DI(DImodel), ")") } if(nSpecies <= 2 & DImodel == "AV") { stop("you must have > 2 species to fit model ", DImodel, " (", namesub_DI(DImodel), ")") } + if(nSpecies <= 2 & DImodel == "FG") { + stop("you must have > 2 species to fit model ", DImodel, " (", namesub_DI(DImodel), ")") + } + if(nSpecies <= 3 & DImodel == "ADD") { + stop("you must have > 3 species to fit model ", DImodel, " (", namesub_DI(DImodel), ")") + } + + # preparing new data object + data_obj <- DI_data_prepare(y = y, block = block, density = density, prop = prop, treat = treat, ID = ID, FG = FG, data = data, theta = theta) + newdata <- data_obj$newdata + nSpecies <- data_obj$nSpecies if(data_obj$P_int_flag & DImodel == "ADD") { stop("you must have > 3 species to fit model ", DImodel, " (", namesub_DI(DImodel), ")") } @@ -117,11 +127,11 @@ DI <- function(y, prop, DImodel, custom_formula, data, ## fitting the model if(DImodel == "FG") { model_fit <- model_fun(y = data_obj$y, block = data_obj$block, density = data_obj$density, prop = data_obj$prop, FG = data_obj$FG, - treat = data_obj$treat, data = newdata, family = family, extra_formula = extra_formula, + ID = data_obj$ID, treat = data_obj$treat, data = newdata, family = family, extra_formula = extra_formula, estimate_theta = estimate_theta, nSpecies = nSpecies, total = total, FGnames = FGnames) } else { model_fit <- model_fun(y = data_obj$y, block = data_obj$block, density = data_obj$density, prop = data_obj$prop, FG = data_obj$FG, - treat = data_obj$treat, data = newdata, family = family, extra_formula = extra_formula, + ID = data_obj$ID, treat = data_obj$treat, data = newdata, family = family, extra_formula = extra_formula, estimate_theta = estimate_theta, nSpecies = nSpecies, total = total) } } @@ -158,7 +168,7 @@ DI_CUSTOM <- function(custom_formula, data, family, estimate_theta, total) { return(modlist) } -DI_STR <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { +DI_STR <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { DIinternalcall <- match.call() if(density != "density_zero") { @@ -199,7 +209,7 @@ DI_STR <- function(y, block, density, prop, treat, FG, data, family, estimate_th "theta" = mod_STR)) } -DI_STR_treat <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { +DI_STR_treat <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { DIinternalcall <- match.call() if(density != "density_zero") { @@ -233,14 +243,14 @@ DI_STR_treat <- function(y, block, density, prop, treat, FG, data, family, estim } } mod_STR_treat <- DI_check_and_fit(fmla = fmla_STR_treat, y = y, - block = block, density = density, treat = treat, - family = family, data = data) + block = block, density = density, treat = treat, + family = family, data = data) mod_STR_treat$DIinternalcall <- DIinternalcall return(list("model" = mod_STR_treat, "theta" = mod_STR_treat)) } -DI_ID <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { +DI_ID <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { DIinternalcall <- match.call() if(density != "density_zero") { @@ -262,10 +272,10 @@ DI_ID <- function(y, block, density, prop, treat, FG, data, family, estimate_the } if(block == "block_zero") { fmla_ID <- as.formula(paste(y, "~", "0+", - paste(prop, collapse = "+"), + paste(ID, collapse = "+"), "+", extra_terms)) } else { - fmla_ID <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+", block, + fmla_ID <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+", block, "+", extra_terms)) } # check design matrix and fit model @@ -277,7 +287,7 @@ DI_ID <- function(y, block, density, prop, treat, FG, data, family, estimate_the "theta" = mod_ID)) } -DI_ID_treat <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { +DI_ID_treat <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { DIinternalcall <- match.call() if(density != "density_zero") { @@ -298,10 +308,10 @@ DI_ID_treat <- function(y, block, density, prop, treat, FG, data, family, estima y <- paste("cbind(", y, ",", total, "-", y, ")") } if(block == "block_zero") { - fmla_ID_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+", treat, + fmla_ID_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+", treat, "+", extra_terms)) } else { - fmla_ID_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+", block, "+", treat, + fmla_ID_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+", block, "+", treat, "+", extra_terms)) } # check design matrix and fit model @@ -313,7 +323,7 @@ DI_ID_treat <- function(y, block, density, prop, treat, FG, data, family, estima "theta" = mod_ID_treat)) } -DI_AV <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { +DI_AV <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { DIinternalcall <- match.call() if(density != "density_zero") { @@ -336,11 +346,11 @@ DI_AV <- function(y, block, density, prop, treat, FG, data, family, estimate_the if(!any(names(data) == "AV")) stop("'AV' variable not found in dataset.") if(block == "block_zero") { fmla_AV <- as.formula(paste(y, "~", "0+", - paste(prop, collapse = "+"), "+AV", + paste(ID, collapse = "+"), "+AV", "+", extra_terms)) } else { fmla_AV <- as.formula(paste(y, "~", "0+", - paste(prop, collapse = "+"), "+AV+", block, + paste(ID, collapse = "+"), "+AV+", block, "+", extra_terms)) } # check design matrix and fit model @@ -350,7 +360,7 @@ DI_AV <- function(y, block, density, prop, treat, FG, data, family, estimate_the modlist <- list("model" = mod_AV) modlist$model$DIinternalcall <- DIinternalcall if(estimate_theta) { - modlist$theta <- DI_theta(obj = mod_AV, DImodel = "AV", + modlist$theta <- DI_theta(obj = mod_AV, DImodel = "AV", prop = prop, nSpecies = nSpecies, family = family) modlist$theta$DIinternalcall <- DIinternalcall # RV change @@ -359,7 +369,7 @@ DI_AV <- function(y, block, density, prop, treat, FG, data, family, estimate_the return(modlist) } -DI_AV_treat <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { +DI_AV_treat <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { DIinternalcall <- match.call() if(density != "density_zero") { @@ -381,10 +391,10 @@ DI_AV_treat <- function(y, block, density, prop, treat, FG, data, family, estima } if(!any(names(data) == "AV")) stop("'AV' variable not found in dataset.") if(block == "block_zero") { - fmla_AV_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+AV+", treat, + fmla_AV_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+AV+", treat, "+", extra_terms)) } else { - fmla_AV_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+AV+", block, "+", treat, + fmla_AV_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+AV+", block, "+", treat, "+", extra_terms)) } # check design matrix and fit model @@ -394,7 +404,7 @@ DI_AV_treat <- function(y, block, density, prop, treat, FG, data, family, estima modlist <- list("model" = mod_AV_treat) modlist$model$DIinternalcall <- DIinternalcall if(estimate_theta) { - modlist$theta <- DI_theta(obj = mod_AV_treat, DImodel = "AV", + modlist$theta <- DI_theta(obj = mod_AV_treat, DImodel = "AV", prop = prop, nSpecies = nSpecies, family = family) modlist$theta$DIinternalcall <- DIinternalcall # RV change @@ -403,7 +413,7 @@ DI_AV_treat <- function(y, block, density, prop, treat, FG, data, family, estima return(modlist) } -DI_E <- function(y, block, density, prop, treat, data, FG, family, estimate_theta, nSpecies, total, extra_formula = 0) { +DI_E <- function(y, block, density, prop, treat, data, FG, ID, family, estimate_theta, nSpecies, total, extra_formula = 0) { DIinternalcall <- match.call() if(density != "density_zero") { @@ -425,10 +435,10 @@ DI_E <- function(y, block, density, prop, treat, data, FG, family, estimate_thet } if(!any(names(data) == "E")) stop("'E' variable not found in dataset.") if(block == "block_zero") { - fmla_E <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+E", + fmla_E <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+E", "+", extra_terms)) } else { - fmla_E <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+E+", block, + fmla_E <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+E+", block, "+", extra_terms)) } # check design matrix and fit model @@ -438,7 +448,7 @@ DI_E <- function(y, block, density, prop, treat, data, FG, family, estimate_thet modlist <- list("model" = mod_E) modlist$model$DIinternalcall <- DIinternalcall if(estimate_theta) { - modlist$theta <- DI_theta(obj = mod_E, DImodel = "E", + modlist$theta <- DI_theta(obj = mod_E, DImodel = "E", prop = prop, nSpecies = nSpecies, family = family) modlist$theta$DIinternalcall <- DIinternalcall # RV change @@ -447,7 +457,7 @@ DI_E <- function(y, block, density, prop, treat, data, FG, family, estimate_thet return(modlist) } -DI_E_treat <- function(y, block, density, prop, treat, data, FG, family, estimate_theta, nSpecies, total, extra_formula = 0) { +DI_E_treat <- function(y, block, density, prop, treat, data, FG, ID, family, estimate_theta, nSpecies, total, extra_formula = 0) { DIinternalcall <- match.call() if(density != "density_zero") { @@ -469,10 +479,10 @@ DI_E_treat <- function(y, block, density, prop, treat, data, FG, family, estimat } if(!any(names(data) == "E")) stop("'E' variable not found in dataset.") if(block == "block_zero") { - fmla_E_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+E+", treat, + fmla_E_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+E+", treat, "+", extra_terms)) } else { - fmla_E_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+E+", block, "+", treat, + fmla_E_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+E+", block, "+", treat, "+", extra_terms)) } # check design matrix and fit model @@ -482,7 +492,7 @@ DI_E_treat <- function(y, block, density, prop, treat, data, FG, family, estimat modlist <- list("model" = mod_E_treat) modlist$model$DIinternalcall <- DIinternalcall if(estimate_theta) { - modlist$theta <- DI_theta(obj = mod_E_treat, DImodel = "E", + modlist$theta <- DI_theta(obj = mod_E_treat, DImodel = "E", prop = prop, nSpecies = nSpecies, family = family) modlist$theta$DIinternalcall <- DIinternalcall # RV change @@ -491,7 +501,7 @@ DI_E_treat <- function(y, block, density, prop, treat, data, FG, family, estimat return(modlist) } -DI_FG <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, FGnames, extra_formula = 0) { +DI_FG <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, FGnames, extra_formula = 0) { DIinternalcall <- match.call() if(density != "density_zero") { @@ -514,10 +524,10 @@ DI_FG <- function(y, block, density, prop, treat, FG, data, family, estimate_the if(is.null(FG)) stop("FG matrix not found") #FG_ <- FG if(block == "block_zero") { - fmla_FG <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+FG_", + fmla_FG <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+FG_", "+", extra_terms)) } else { - fmla_FG <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+FG_+", block, + fmla_FG <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+FG_+", block, "+", extra_terms)) } # check design matrix and fit model @@ -527,7 +537,7 @@ DI_FG <- function(y, block, density, prop, treat, FG, data, family, estimate_the modlist <- list("model" = mod_FG) modlist$model$DIinternalcall <- DIinternalcall if(estimate_theta) { - modlist$theta <- DI_theta(obj = mod_FG, DImodel = "FG", FGnames = FGnames, + modlist$theta <- DI_theta(obj = mod_FG, DImodel = "FG", prop = prop, FGnames = FGnames, nSpecies = nSpecies, family = family) modlist$theta$DIinternalcall <- DIinternalcall # RV change @@ -536,7 +546,7 @@ DI_FG <- function(y, block, density, prop, treat, FG, data, family, estimate_the return(modlist) } -DI_FG_treat <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, FGnames, extra_formula = 0) { +DI_FG_treat <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, FGnames, extra_formula = 0) { DIinternalcall <- match.call() if(density != "density_zero") { @@ -559,10 +569,10 @@ DI_FG_treat <- function(y, block, density, prop, treat, FG, data, family, estima if(is.null(FG)) stop("FG matrix not found") #FG_ <- FG if(block == "block_zero") { - fmla_FG_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+FG_+", treat, + fmla_FG_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+FG_+", treat, "+", extra_terms)) } else { - fmla_FG_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+FG_+", block, "+", treat, + fmla_FG_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+FG_+", block, "+", treat, "+", extra_terms)) } # check design matrix and fit model @@ -572,7 +582,7 @@ DI_FG_treat <- function(y, block, density, prop, treat, FG, data, family, estima modlist <- list("model" = mod_FG_treat) modlist$model$DIinternalcall <- DIinternalcall if(estimate_theta) { - modlist$theta <- DI_theta(obj = mod_FG_treat, DImodel = "FG", FGnames = FGnames, + modlist$theta <- DI_theta(obj = mod_FG_treat, DImodel = "FG", prop = prop, FGnames = FGnames, nSpecies = nSpecies, family = family) modlist$theta$DIinternalcall <- DIinternalcall # RV change @@ -581,7 +591,7 @@ DI_FG_treat <- function(y, block, density, prop, treat, FG, data, family, estima return(modlist) } -DI_ADD <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { +DI_ADD <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { DIinternalcall <- match.call() if(density != "density_zero") { @@ -607,12 +617,12 @@ DI_ADD <- function(y, block, density, prop, treat, FG, data, family, estimate_th stop("P_int variables not found in dataset.") if(block == "block_zero") { fmla_ADD <- as.formula(paste(y, "~", "0+", - paste(prop, collapse = "+"), "+", + paste(ID, collapse = "+"), "+", paste0(prop, "_add", collapse = "+"), "+", extra_terms)) } else { fmla_ADD <- as.formula(paste(y, "~", "0+", - paste(prop, collapse = "+"), "+", + paste(ID, collapse = "+"), "+", paste0(prop, "_add", collapse = "+"), "+", block, "+", extra_terms)) } @@ -623,7 +633,7 @@ DI_ADD <- function(y, block, density, prop, treat, FG, data, family, estimate_th modlist <- list("model" = mod_ADD) modlist$model$DIinternalcall <- DIinternalcall if(estimate_theta) { - modlist$theta <- DI_theta(obj = mod_ADD, DImodel = "ADD", + modlist$theta <- DI_theta(obj = mod_ADD, DImodel = "ADD", prop = prop, nSpecies = nSpecies, family = family) modlist$theta$DIinternalcall <- DIinternalcall # RV change @@ -632,7 +642,7 @@ DI_ADD <- function(y, block, density, prop, treat, FG, data, family, estimate_th return(modlist) } -DI_ADD_treat <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { +DI_ADD_treat <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { DIinternalcall <- match.call() if(density != "density_zero") { @@ -658,12 +668,12 @@ DI_ADD_treat <- function(y, block, density, prop, treat, FG, data, family, estim stop("P_int variables not found in dataset.") if(block == "block_zero") { fmla_ADD_treat <- as.formula(paste(y, "~", "0+", - paste(prop, collapse = "+"), "+", + paste(ID, collapse = "+"), "+", paste0(prop, "_add", collapse = "+"), "+", treat, "+", extra_terms)) } else { fmla_ADD_treat <- as.formula(paste(y, "~", "0+", - paste(prop, collapse = "+"), "+", + paste(ID, collapse = "+"), "+", paste0(prop, "_add", collapse = "+"), "+", block, "+", treat, "+", extra_terms)) } @@ -674,7 +684,7 @@ DI_ADD_treat <- function(y, block, density, prop, treat, FG, data, family, estim modlist <- list("model" = mod_ADD_treat) modlist$model$DIinternalcall <- DIinternalcall if(estimate_theta) { - modlist$theta <- DI_theta(obj = mod_ADD_treat, DImodel = "ADD", + modlist$theta <- DI_theta(obj = mod_ADD_treat, DImodel = "ADD", prop = prop, nSpecies = nSpecies, family = family) modlist$theta$DIinternalcall <- DIinternalcall # RV change @@ -683,7 +693,7 @@ DI_ADD_treat <- function(y, block, density, prop, treat, FG, data, family, estim return(modlist) } -DI_FULL <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { +DI_FULL <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { DIinternalcall <- match.call() # Getting the FULL variables @@ -710,12 +720,12 @@ DI_FULL <- function(y, block, density, prop, treat, FG, data, family, estimate_t } if(block == "block_zero") { fmla_FULL <- as.formula(paste(y, "~", "0+", - paste(prop, collapse = "+"), "+", + paste(ID, collapse = "+"), "+", paste0(full_names, collapse = "+"), "+", extra_terms)) } else { fmla_FULL <- as.formula(paste(y, "~", "0+", - paste(prop, collapse = "+"), "+", + paste(ID, collapse = "+"), "+", paste0(full_names, collapse = "+"), "+", block, "+", extra_terms)) } @@ -735,7 +745,7 @@ DI_FULL <- function(y, block, density, prop, treat, FG, data, family, estimate_t return(modlist) } -DI_FULL_treat <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { +DI_FULL_treat <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) { DIinternalcall <- match.call() # Getting the FULL variables @@ -761,12 +771,12 @@ DI_FULL_treat <- function(y, block, density, prop, treat, FG, data, family, esti } if(block == "block_zero") { fmla_FULL_treat <- as.formula(paste(y, "~", "0+", - paste(prop, collapse = "+"), "+", + paste(ID, collapse = "+"), "+", paste0(full_names, collapse = "+"), "+", treat, "+", extra_terms)) } else { fmla_FULL_treat <- as.formula(paste(y, "~", "0+", - paste(prop, collapse = "+"), "+", + paste(ID, collapse = "+"), "+", paste0(full_names, collapse = "+"), "+", block, "+", treat, "+", extra_terms)) } diff --git a/R/DI_internal.R b/R/DI_internal.R index fb01882..d60074a 100644 --- a/R/DI_internal.R +++ b/R/DI_internal.R @@ -44,14 +44,14 @@ DI_check_and_fit <- function(fmla, y, block, density, treat, family, data, FG) { return(mod) } -proflik_theta <- function(theta, obj, family, int_terms, DImodel, nSpecies, FGnames) { +proflik_theta <- function(theta, obj, family, int_terms, prop, DImodel, nSpecies, FGnames) { mm <- model.matrix(obj) if(DImodel %in% c("E","AV")) { data_theta_E_AV <- obj$data data_theta <- data.frame(mm) - data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta - new_E_AV <- DI_data_E_AV_internal(prop = 1:nSpecies, data = data_theta) + #data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta + new_E_AV <- DI_data_E_AV(prop = prop, data = data_theta_E_AV, theta = theta) data_theta_E_AV$E <- new_E_AV$E data_theta_E_AV$AV <- new_E_AV$AV fitted_model_theta <- glm(formula(obj), family = family, data = data_theta_E_AV) @@ -64,10 +64,14 @@ proflik_theta <- function(theta, obj, family, int_terms, DImodel, nSpecies, FGna sigma_hat <- sqrt(sum(fitted_model_theta$residuals^2)/(fitted_model_theta$df.residual) * (n - p)/n) llik <- sum(dnorm(fitted_model_theta$y, mu_hat, sigma_hat, log = TRUE)) } else if(DImodel == "FG") { - data_theta_FG <- obj$data - data_theta <- data.frame(mm) - data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta - new_FG <- DI_data_FG_internal(prop = 1:nSpecies, FG = FGnames, data = data_theta) + #data_theta_FG <- obj$data + data_theta_FG <- data.frame(mm, check.names = FALSE) + colnames(data_theta_FG) <- gsub("`", "", colnames(data_theta_FG)) + # Add original props to calculate interactions + data_theta_FG <- cbind(data_theta_FG, obj$data[, prop]) + + #data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta + new_FG <- DI_data_FG(prop = prop, FG = FGnames, data = data_theta_FG[, prop], theta = theta) FG_ <- new_FG$FG ## if we have column names starting with FG_ already ## in data_theta_FG, then substitute columns else it's all good @@ -84,6 +88,10 @@ proflik_theta <- function(theta, obj, family, int_terms, DImodel, nSpecies, FGna } old_formula <- formula(obj) new_formula <- paste0(old_formula[2], " ~ ", old_formula[3]) + + data_theta_FG$y <- obj$y + names(data_theta_FG)[length(names(data_theta_FG))] <- paste(old_formula[2]) + fitted_model_theta <- glm(as.formula(new_formula), family = family, data = data_theta_FG) #mu_hat <- fitted(fitted_model_theta) #sigma_hat <- sqrt(sum(fitted_model_theta$residuals^2)/(fitted_model_theta$df.residual-1)) @@ -95,8 +103,10 @@ proflik_theta <- function(theta, obj, family, int_terms, DImodel, nSpecies, FGna llik <- sum(dnorm(fitted_model_theta$y, mu_hat, sigma_hat, log = TRUE)) } else if(DImodel == "ADD") { #data_theta_ADD <- obj$data + # Drop existing _add columns to avoid conflicts data_theta <- data.frame(mm, check.names = FALSE) - new_ADD <- DI_data_ADD_theta(prop = 1:nSpecies, data = data_theta, theta = theta) + data_theta <- cbind(data_theta, obj$data[, prop]) + new_ADD <- DI_data_ADD_theta(prop = prop, data = data_theta, theta = theta) ADD_cols_in_the_data <- int_terms #grep("_add", colnames(data_theta)) if(length(ADD_cols_in_the_data) > 0) { j <- 1 @@ -172,7 +182,8 @@ DI_theta <- function(obj, DImodel, FGnames, prop, nSpecies, family) { #options(warn = -1) upper_boundary <- 1.5 theta_info <- get_theta_info(upper_boundary = upper_boundary, DImodel = DImodel, - obj = obj, family = family, int_terms = int_terms, + obj = obj, prop = prop, + family = family, int_terms = int_terms, nSpecies = nSpecies, FGnames = FGnames) #options(warn = 0) theta_hat <- theta_info$theta_hat @@ -184,16 +195,18 @@ DI_theta <- function(obj, DImodel, FGnames, prop, nSpecies, family) { if(DImodel %in% c("E","AV")) { data_theta_E_AV <- obj$data data_theta <- data.frame(mm) - data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta_hat - new_E_AV <- DI_data_E_AV_internal(prop = 1:nSpecies, data = data_theta) + #data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta_hat + new_E_AV <- DI_data_E_AV(prop = prop, data = data_theta_E_AV, theta = theta_hat) data_theta_E_AV$E <- new_E_AV$E data_theta_E_AV$AV <- new_E_AV$AV mod_theta <- glm(formula(obj), family = family, data = data_theta_E_AV) } else if(DImodel == "FG") { - data_theta_FG <- obj$data - data_theta <- data.frame(mm) - data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta_hat - new_FG <- DI_data_FG_internal(prop = 1:nSpecies, FG = FGnames, data = data_theta) + #data_theta_FG <- obj$data + data_theta_FG <- data.frame(mm, check.names = FALSE) + colnames(data_theta_FG) <- gsub("`", "", colnames(data_theta_FG)) + data_theta_FG <- cbind(data_theta_FG, obj$data[, prop]) + #data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta_hat + new_FG <- DI_data_FG(prop = prop, FG = FGnames, theta = theta_hat, data = data_theta_FG) FG_ <- new_FG$FG ## if we have column names starting with FG_ already ## in data_theta_FG, then substitute columns, else it's all good @@ -207,11 +220,14 @@ DI_theta <- function(obj, DImodel, FGnames, prop, nSpecies, family) { } old_formula <- formula(obj) new_formula <- paste0(old_formula[2], " ~ ", old_formula[3]) + data_theta_FG$y <- obj$y + names(data_theta_FG)[length(names(data_theta_FG))] <- paste(old_formula[2]) mod_theta <- glm(as.formula(new_formula), family = family, data = data_theta_FG) } else if(DImodel == "ADD") { #data_theta_ADD <- obj$data data_theta <- data.frame(mm, check.names = FALSE) - new_ADD <- DI_data_ADD_theta(prop = 1:nSpecies, data = data_theta, theta = theta_hat) + data_theta <- cbind(data_theta, obj$data[, prop]) + new_ADD <- DI_data_ADD_theta(prop = prop, data = data_theta, theta = theta_hat) ADD_cols_in_the_data <- int_terms #grep("_add", colnames(data_theta)) if(length(ADD_cols_in_the_data) > 0) { j <- 1 @@ -250,15 +266,15 @@ DI_theta <- function(obj, DImodel, FGnames, prop, nSpecies, family) { return(mod_theta) } -get_theta_info <- function(upper_boundary, DImodel, obj, family, int_terms, +get_theta_info <- function(upper_boundary, DImodel, obj, prop, family, int_terms, nSpecies, FGnames) { optimum <- optimize(proflik_theta, interval = c(0.00001, upper_boundary), - maximum = TRUE, DImodel = DImodel, obj = obj, family = family, + maximum = TRUE, DImodel = DImodel, obj = obj, prop = prop, family = family, int_terms = int_terms, nSpecies = nSpecies, FGnames = FGnames) theta_hat <- optimum$maximum theta_grid <- seq(0.00001, upper_boundary + 1, length = 100) proflik_theta_vec <- Vectorize(proflik_theta, "theta") - profile_loglik <- proflik_theta_vec(theta = theta_grid, obj = obj, + profile_loglik <- proflik_theta_vec(theta = theta_grid, obj = obj, prop = prop, family = family, int_terms = int_terms, DImodel = DImodel, nSpecies = nSpecies, FGnames = FGnames) diff --git a/R/autoDI.R b/R/autoDI.R index 3aafb2f..ddd3137 100644 --- a/R/autoDI.R +++ b/R/autoDI.R @@ -7,7 +7,7 @@ # selection = "Ftest" to perform F (or X2) tests to select best model, # "AIC" to use select model with smallest AIC -autoDI <- function(y, prop, data, block, density, treat, FG = NULL, +autoDI <- function(y, prop, data, block, density, treat, ID, FG = NULL, selection = c("Ftest","AIC","AICc","BIC","BICc"), step0 = FALSE, step4 = TRUE) { if(missing(y)) stop("You must supply a response variable name or column index through the argument 'y'.\n") @@ -57,6 +57,15 @@ autoDI <- function(y, prop, data, block, density, treat, FG = NULL, if(missing(FG)){ FG <- NULL } + + if(missing(ID)){ + if(is.numeric(prop)){ + ID <- paste0(colnames(data)[prop], "_ID") + } + else { + ID <- paste0(prop, "_ID") + } + } # Step 0 if(step0) { @@ -64,13 +73,13 @@ autoDI <- function(y, prop, data, block, density, treat, FG = NULL, } # Step 1 - step1_model <- autoDI_step1(y = y, block = block, density = density, prop = prop, treat = treat, family = family, data = data, selection = selection) + step1_model <- autoDI_step1(y = y, block = block, density = density, prop = prop, treat = treat, ID = ID, family = family, data = data, selection = selection) selected_model <- step1_model$model theta_flag <- step1_model$theta_flag # step 2 - step2_model <- autoDI_step2(y = y, block = block, density = density, prop = prop, treat = treat, family = family, FG = FG, data = data, theta_flag = theta_flag, selection = selection, selected_model = selected_model) + step2_model <- autoDI_step2(y = y, block = block, density = density, prop = prop, treat = treat, ID = ID, family = family, FG = FG, data = data, theta_flag = theta_flag, selection = selection, selected_model = selected_model) # step 3 step3_model <- autoDI_step3(selected_model = step2_model, selection = selection, family = family) diff --git a/R/autoDI_internal.R b/R/autoDI_internal.R index 3d435a9..82660c1 100644 --- a/R/autoDI_internal.R +++ b/R/autoDI_internal.R @@ -331,30 +331,30 @@ autoDI_step0 <- function(y, block, density, treat, family, data) { #options(scipen = 0) } -autoDI_step1 <- function(y, block, density, prop, treat, family, data, selection) { +autoDI_step1 <- function(y, block, density, prop, treat, ID, family, data, selection) { model_tag <- ifelse(length(prop) == 2, "FULL", "AV") if(is.na(treat)) { model_name <- paste0(model_tag, "_model") fit_theta <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, prop = prop, - DImodel = model_tag, data = data, estimate_theta = TRUE))) + ID = ID, DImodel = model_tag, data = data, estimate_theta = TRUE))) fit_notheta <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, prop = prop, - DImodel = model_tag, data = data, estimate_theta = FALSE))) + ID = ID, DImodel = model_tag, data = data, estimate_theta = FALSE))) fit_theta$DIcall <- call("DI", y = y, block = block, density = density, prop = prop, - DImodel = model_tag, data = data, estimate_theta = TRUE) + ID = ID, DImodel = model_tag, data = data, estimate_theta = TRUE) fit_notheta$DIcall <- call("DI", y = y, block = block, density = density, prop = prop, - DImodel = model_tag, data = data, estimate_theta = FALSE) + ID = ID, DImodel = model_tag, data = data, estimate_theta = FALSE) } else { model_name <- paste0(model_tag, "_model_treat") fit_theta <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, prop = prop, - treat = treat, DImodel = model_tag, data = data, estimate_theta = TRUE))) + ID = ID, treat = treat, DImodel = model_tag, data = data, estimate_theta = TRUE))) fit_notheta <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, prop = prop, - treat = treat, DImodel = model_tag, data = data, estimate_theta = FALSE))) + ID = ID, treat = treat, DImodel = model_tag, data = data, estimate_theta = FALSE))) fit_theta$DIcall <- call("DI", y = y, block = block, density = density, prop = prop, - treat = treat, DImodel = model_tag, data = data, estimate_theta = TRUE) + ID = ID, treat = treat, DImodel = model_tag, data = data, estimate_theta = TRUE) fit_notheta$DIcall <- call("DI", y = y, block = block, density = density, prop = prop, - treat = treat, DImodel = model_tag, data = data, estimate_theta = FALSE) + ID = ID, treat = treat, DImodel = model_tag, data = data, estimate_theta = FALSE) } message("\n", strrep("-", getOption("width"))) @@ -392,7 +392,7 @@ autoDI_step1 <- function(y, block, density, prop, treat, family, data, selection } autoDI_step2 <- function(y, block, density, - prop, treat, FG, + prop, treat, ID, FG, family, data, theta_flag, selection, selected_model){ @@ -420,10 +420,10 @@ autoDI_step2 <- function(y, block, density, } ID_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, - prop = prop, treat = treat, + prop = prop, treat = treat, ID = ID, data = data, DImodel = 'ID'))) STR_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, - prop = prop, treat = treat, + prop = prop, treat = treat, ID = ID, data = data, DImodel = 'STR'))) model_list <- list('STR_model' = STR_model, 'ID_model' = ID_model) @@ -432,28 +432,28 @@ autoDI_step2 <- function(y, block, density, theta_est <- coefficients(selected_model)['theta'] if(AV_flag){ AV_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, - prop = prop, treat = treat, theta = theta_est, + prop = prop, treat = treat, ID = ID, theta = theta_est, data = data, DImodel = 'AV'))) model_list[[length(model_list)+1]] <- AV_model names(model_list)[length(model_list)] <- 'AV_model' } if(FG_flag){ FG_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, - prop = prop, treat = treat, theta = theta_est, + prop = prop, treat = treat, ID = ID, theta = theta_est, data = data, FG = FG, DImodel = 'FG'))) model_list[[length(model_list)+1]] <- FG_model names(model_list)[length(model_list)] <- 'FG_model' } if(ADD_flag){ ADD_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, - prop = prop, treat = treat, theta = theta_est, + prop = prop, treat = treat, ID = ID, theta = theta_est, data = data, DImodel = 'ADD'))) model_list[[length(model_list)+1]] <- ADD_model names(model_list)[length(model_list)] <- 'ADD_model' } if(FULL_flag){ FULL_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, - prop = prop, treat = treat, theta = theta_est, + prop = prop, treat = treat, ID = ID, theta = theta_est, data = data, DImodel = 'FULL'))) model_list[[length(model_list)+1]] <- FULL_model names(model_list)[length(model_list)] <- 'FULL_model' @@ -467,28 +467,28 @@ autoDI_step2 <- function(y, block, density, } else { if(AV_flag){ AV_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, - prop = prop, treat = treat, estimate_theta = F, + prop = prop, treat = treat, ID = ID, estimate_theta = F, data = data, DImodel = 'AV'))) model_list[[length(model_list)+1]] <- AV_model names(model_list)[length(model_list)] <- 'AV_model' } if(FG_flag){ FG_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, - prop = prop, treat = treat, estimate_theta = F, + prop = prop, treat = treat, ID = ID, estimate_theta = F, data = data, FG = FG, DImodel = 'FG'))) model_list[[length(model_list)+1]] <- FG_model names(model_list)[length(model_list)] <- 'FG_model' } if(ADD_flag){ ADD_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, - prop = prop, treat = treat, estimate_theta = F, + prop = prop, treat = treat, ID = ID, estimate_theta = F, data = data, DImodel = 'ADD'))) model_list[[length(model_list)+1]] <- ADD_model names(model_list)[length(model_list)] <- 'ADD_model' } if(FULL_flag){ FULL_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, - prop = prop, treat = treat, estimate_theta = F, + prop = prop, treat = treat, ID = ID, estimate_theta = F, data = data, DImodel = 'FULL'))) model_list[[length(model_list)+1]] <- FULL_model names(model_list)[length(model_list)] <- 'FULL_model' @@ -537,13 +537,14 @@ autoDI_step2 <- function(y, block, density, estimate_theta <- theta_flag DImodel <- fit_final$DIcall$DImodel the_final_model <- suppressWarnings(suppressMessages(DI(y = y, prop = prop, block = block, - FG = FG, density = density, treat = treat, + FG = FG, ID = ID, density = density, treat = treat, estimate_theta = estimate_theta, DImodel = DImodel, data = data))) the_final_model$DIcall$prop <- prop the_final_model$DIcall$DImodel <- DImodel the_final_model$DIcall$estimate_theta <- estimate_theta the_final_model$DIcall$FG <- FG + the_final_model$DIcall$ID <- ID the_final_model$DIcall$treat <- treat the_final_model$DIcall$block <- block the_final_model$DIcall$density <- density diff --git a/R/dataDI.R b/R/dataDI.R index 9b9bd88..9464774 100644 --- a/R/dataDI.R +++ b/R/dataDI.R @@ -1,4 +1,4 @@ -DI_data_prepare <- function(y, block, density, prop, treat, FG = NULL, data, theta = 1) { +DI_data_prepare <- function(y, block, density, prop, treat, ID, FG = NULL, data, theta = 1) { if(any(is.na(data))) { stop("The dataset contains missing values. Please remove them prior to analysis.") } @@ -56,6 +56,9 @@ DI_data_prepare <- function(y, block, density, prop, treat, FG = NULL, data, the ## checking if the Pi's sum to 1 prop_check <- DI_prop_check(Pind = Pind, data = data_check) + if(prop_check == "OOB") { + stop("One or more rows have species proportions with values less than 0 or greater than 1. This must be corrected prior to analysis.\n") + } if(prop_check == "error") { stop("One or more rows have species proportions that do not sum to 1. This must be corrected prior to analysis.\n") } @@ -84,16 +87,31 @@ DI_data_prepare <- function(y, block, density, prop, treat, FG = NULL, data, the } newdata <- data.frame(newdata, ADD$ADD) ## calculating FG variables - if(!is.null(FG)) FG <- DI_data_FG(prop = prop, FG = FG, data = data, theta = theta)$FG + if(!is.null(FG)) { + FG <- DI_data_FG(prop = prop, FG = FG, data = data, theta = theta)$FG + FG_names <- colnames(FG) + } else { + FG_names <- NULL + } # Calculating the FULL variables FULL <- DI_data_fullpairwise(prop = prop, data = data[, prop], theta = theta) # Add variabes to data newdata <- cbind(newdata, FULL) + ###### RV change######## + # Grouping ID effects + if(missing(ID)){ + ID <- paste0(prop, "_ID") + } + ID_name_check(ID = ID, prop = prop, FG = FG_names) + grouped_ID <- group_IDs(data = data, prop = prop, ID = ID) + newdata <- cbind(newdata, grouped_ID) + ####################### + ## return object return(list("newdata" = newdata, "y" = y, "block" = block, density = density, - "prop" = prop, "treat" = treat, "FG" = FG, + "prop" = prop, "ID" = unique(ID), "treat" = treat, "FG" = FG, "P_int_flag" = P_int_flag, "even_flag" = even_flag, "nSpecies" = length(prop))) } @@ -103,7 +121,7 @@ get_P_indices <- function(prop, data) { prop <- names(data[, prop]) # species proportions P_i (names) } else { vec_grep <- Vectorize(grep, "pattern") - Pind <- as.numeric(vec_grep(paste("\\<", prop, "\\>", sep = ""), + Pind <- as.numeric(vec_grep(paste0("^", prop, "$"),#paste("\\<", prop, "\\>", sep = ""), names(data))) } return(list(Pind = Pind, prop = prop)) @@ -308,6 +326,10 @@ DI_data_fullpairwise <- function(prop, data, theta = 1) { DI_prop_check <- function(Pind, data) { props <- data[,Pind] + # prop values are out of bounds, i.e. > 1 or < 0 + if(any(props > 1 | props < 0)){ + return("OOB") + } pi_sums <- apply(props, 1, sum) if(any(pi_sums > 1.0001) | any(pi_sums < .9999)) { return("error") @@ -361,6 +383,73 @@ add_name_check <- function(data){ stop('Certain column names cause internal conflicts when calculating additive interactions. Please rename any columns containing the string \'_add\'.') } } + +ID_name_check <- function(ID, prop, FG){ + cond1 <- length(grep(":", ID)) > 0 + cond2 <- any(ID == "_") + cond3 <- any(ID == "i") + cond4 <- any(ID == "n") + cond5 <- any(ID == "f") + cond6 <- any(ID == "g") + cond7 <- any(ID == "_i") + cond8 <- any(ID == "in") + cond9 <- any(ID == "nf") + cond10 <- any(ID == "fg") + cond11 <- any(ID == "g_") + cond12 <- any(ID == "_in") + cond13 <- any(ID == "inf") + cond14 <- any(ID == "nfg") + cond15 <- any(ID == "fg_") + cond16 <- any(ID == "_inf") + cond17 <- any(ID == "infg") + cond18 <- any(ID == "nfg_") + cond19 <- any(ID == "_infg") + cond20 <- any(ID == "infg_") + cond21 <- any(ID == "_infg_") + cond22 <- length(grep(pattern = "^add", x = ID, ignore.case = TRUE)) > 0 + cond23 <- length(grep(pattern = "^full", x = ID, ignore.case = TRUE)) > 0 + cond24 <- length(grep(pattern = "^fg", x = ID, ignore.case = TRUE)) > 0 + cond25 <- any(ID == "E") + cond26 <- any(ID == "AV") + if(cond1 | cond2 | cond3 | cond4 | cond5 | cond6 | cond7 | + cond8 | cond9 | cond10 | cond11 | cond12 | cond13 | cond14 | + cond15 | cond16 | cond17 | cond18 | cond19 | cond20 | cond21 | + cond22 | cond23 | cond24| cond25 | cond26) { + stop("Please give your IDs a different name.", + " Names should not include colons (':'), or any single or multiple", + " character combination of the expressions '_infg_', 'ADD', 'FG', 'FULL', 'E', and 'AV'.", + " These expressions are reserved for internal computations.") + } + if(length(ID)!=length(prop)){ + stop("'ID' should be a vector of same length as prop vector specifying the grouping structure for the ID effects") + } + if(any(ID %in% FG)){ + stop("'ID' cannot have any names common with names of functional groups specified by 'FG'. Change the name of groups in either 'ID' or 'FG'") + } +} + +group_IDs <- function(data, prop, ID){ + # Species proportions to sum + props <- data[, prop] + + # Unique IDs + uIDs <- unique(ID) + + # Number of unique ID terms + nIDs <- length(uIDs) + + sapply(uIDs, function(x){ + # Index of prop to sum + idx <- which(ID == x) + if(length(idx) == 1){ + # No need to sum if only one element in group + result <- props[, idx] + } else { + # Sum of species proportions + result <- rowSums(props[, idx]) + } + }) +} ######################################### DI_data <- function(prop, FG, data, theta = 1, what = c("E","AV","FG","ADD","FULL")) { diff --git a/R/predictDI.R b/R/predictDI.R index 8607e06..e0d3f15 100644 --- a/R/predictDI.R +++ b/R/predictDI.R @@ -1,278 +1,365 @@ -predict.DI <- function (object, newdata, se.fit = FALSE, type = c("link", - "response"), ...) +predict.DI <- function (object, newdata, se.fit = FALSE, + interval = c("none", "confidence", "prediction"), + level = 0.95, weights = 1, + type = c("link", "response", "terms"), ...) { + interval <- match.arg(interval) + # Make predictions for raw data if no newdata specified if (missing(newdata)) { if(missing(type)) { type <- "link" } - return(predict.glm(object, se.fit = se.fit, type = type, ...)) - } - - # Getting species columns and DImodel for adding interactions - type <- match.arg(type) - DImodel_tag <- object$DIcall$DImodel - if (is.null(DImodel_tag)){ - DImodel_tag <- 'CUSTOM' - } - - # If custom_formula was used just pass the object to predict.glm to get predictions - if (DImodel_tag == 'CUSTOM'){ - if(missing(type)) { - type <- "link" - } - return(predict.glm(object, newdata = newdata, se.fit = se.fit, type = type, ...)) - } - - original_data <- object$original_data - model_data <- eval(object$model) - prop_cols <- eval(object$DIcall$prop) - - if (is.numeric(prop_cols)){ - prop <- names(original_data[, prop_cols]) + ret_obj <- predict.glm(object, + se.fit = ifelse(interval != "none", TRUE, se.fit), + type = type, ...) } else { - prop <- prop_cols - } - - - if (!is.null(prop) & !all(prop %in% colnames(newdata))){ - prop_in_data <- prop[prop %in% colnames(newdata)] - prop_missing <- prop[!prop %in% prop_in_data] - if(all(is_near(rowSums(newdata[, prop_in_data]), 1))){ - missing_prop_cols <- matrix(0, ncol = length(prop_missing), nrow = nrow(newdata), dimnames = list(NULL, prop_missing)) - newdata <- cbind(newdata, missing_prop_cols) - warning(paste0('Species ', paste0(prop_missing, collapse = ','), ' were not present in newdata. These species proportions are assumed to be 0.')) + # Getting species columns and DImodel for adding interactions + type <- match.arg(type) + DImodel_tag <- object$DIcall$DImodel + if (is.null(DImodel_tag)){ + DImodel_tag <- 'CUSTOM' + } + + # If custom_formula was used just pass the object to predict.glm to get predictions + if (DImodel_tag == 'CUSTOM'){ + if(missing(type)) { + type <- "link" + } + return(predict.glm(object, newdata = newdata, se.fit = se.fit, type = type, ...)) + } + + original_data <- object$original_data + model_data <- eval(object$model) + prop_cols <- eval(object$DIcall$prop) + ID_cols <- eval(object$DIcall$ID) + + if (is.numeric(prop_cols)){ + prop <- names(original_data[, prop_cols]) } else { + prop <- prop_cols + } + + + if (!is.null(prop) & !all(prop %in% colnames(newdata))){ + prop_in_data <- prop[prop %in% colnames(newdata)] + prop_missing <- prop[!prop %in% prop_in_data] + if(all(is_near(rowSums(newdata[, prop_in_data]), 1))){ + missing_prop_cols <- matrix(0, ncol = length(prop_missing), nrow = nrow(newdata), dimnames = list(NULL, prop_missing)) + newdata <- cbind(newdata, missing_prop_cols) + warning(paste0('Species ', paste0(prop_missing, collapse = ','), ' were not present in newdata. These species proportions are assumed to be 0.')) + } else { + stop('The species proportions present in newdata don\'t sum to 1.') + } + } + + if (!is.null(prop) & !all(is_near(rowSums(newdata[, prop]), 1))){ stop('The species proportions present in newdata don\'t sum to 1.') } - } - - if (!is.null(prop) & !all(is_near(rowSums(newdata[, prop]), 1))){ - stop('The species proportions present in newdata don\'t sum to 1.') - } - # DI_data doesn't work if dataframe has a single row - # Adding a dummy row which will be deleted later - only_one_row <- nrow(newdata) == 1 - if (only_one_row) { - newdata <- rbind(newdata, rep(0, ncol(newdata))) - } - - # Adding interactions - theta_flag <- object$coefficients['theta'] - betas <- coef(object) - if (!DImodel_tag %in% c("ID", "STR")) { + # DI_data doesn't work if dataframe has a single row + # Adding a dummy row which will be deleted later + only_one_row <- nrow(newdata) == 1 + if (only_one_row) { + newdata <- rbind(newdata, newdata) + } + + # Adding interactions + theta_flag <- object$coefficients['theta'] + betas <- coef(object) if (!is.na(theta_flag)){ - theta_value <- coef(object)["theta"] - betas <- betas[-length(betas)] + theta_value <- coef(object)["theta"] + betas <- betas[-length(betas)] } else { - theta_value <- 1 - } - extra_variables <- DI_data(prop = prop, FG = eval(object$DIcall$FG), - data = newdata, theta = theta_value, what = DImodel_tag) - if (DImodel_tag == "E") { - updated_newdata <- data.frame(newdata, E = extra_variables) - } - if (DImodel_tag == "AV") { - updated_newdata <- data.frame(newdata, AV = extra_variables) - } - if (DImodel_tag == "ADD") { - updated_newdata <- data.frame(newdata, extra_variables) + theta_value <- 1 } - if (DImodel_tag == "FG") { - # colnames(extra_variables) <- paste0("FG_", - # colnames(extra_variables)) + + if (!DImodel_tag %in% c("ID", "STR")) { - # FG model wasn't working for some reason, so had to assign it this way - newdata[, 'FG_'] <- extra_variables + extra_variables <- DI_data(prop = prop, FG = eval(object$DIcall$FG), + data = newdata, theta = theta_value, what = DImodel_tag) + if (DImodel_tag == "E") { + updated_newdata <- data.frame(newdata, E = extra_variables) + } + if (DImodel_tag == "AV") { + updated_newdata <- data.frame(newdata, AV = extra_variables) + } + if (DImodel_tag == "ADD") { + updated_newdata <- data.frame(newdata, extra_variables) + } + if (DImodel_tag == "FG") { + # colnames(extra_variables) <- paste0("FG_", + # colnames(extra_variables)) + + # FG model wasn't working for some reason, so had to assign it this way + newdata[, 'FG_'] <- extra_variables + updated_newdata <- newdata + } + if (DImodel_tag == "FULL") { + updated_newdata <- data.frame(newdata, extra_variables, + check.names = FALSE) + } + } else { updated_newdata <- newdata } - if (DImodel_tag == "FULL") { - updated_newdata <- data.frame(newdata, extra_variables, - check.names = FALSE) + + # Grouping ID terms + # If not grouping was specified + if(is.null(ID_cols)){ + ID_cols <- paste0(prop, "_ID") } - } else { - updated_newdata <- newdata - } - - # Removing the dummy row added - if (only_one_row) { - updated_newdata <- updated_newdata[1,] - } - - # Checking for experimental structures - treat <- eval(object$DIcall$treat) - density <- eval(object$DIcall$density) - block <- eval(object$DIcall$block) - - structures <- list('treatment' = treat, - 'density' = density, - 'block' = block) - - for (covariate in structures){ - if (!is.null(covariate) && !is.na(covariate)){ - # If covariate was supplied as numeric in function call, getting its value - if (is.numeric(covariate)){ - covariate <- colnames(original_data)[covariate] - } - - if (is.numeric(original_data[, covariate])){ - if ( !(covariate %in% colnames(updated_newdata))){ - warning(paste0(names(structures[structures == covariate]), ' not supplied in newdata. Calculating for \'', covariate, - '\' = ' , 0)) - updated_newdata[, covariate] <- 0 - } - } else { - # Levels of factor covariate in original data - covariate_levels <- as.factor(unique(original_data[, covariate])) - # If covariate isn't present in newdata, estimating for base level - if ( !(covariate %in% colnames(updated_newdata))){ - warning(paste0(names(structures[structures == covariate]), ' not supplied in newdata. Calculating for \'', covariate, - '\' = ' , levels(covariate_levels)[1])) - updated_newdata[, covariate] <- levels(covariate_levels)[1] - } - - # If levels of covariate in newdata not matching ones in original data, stop prediction - if (! (all(unique(updated_newdata[, covariate]) %in% covariate_levels, na.rm = TRUE))){ - stop(paste0('Values for ', covariate,' given were not present in raw data used for fitting. Predictions can\'t be made for these values.')) + + grouped_IDs <- group_IDs(data = updated_newdata, prop = prop, ID = ID_cols) + updated_newdata <- cbind(updated_newdata, grouped_IDs) + + # Removing the dummy row added + if (only_one_row) { + updated_newdata <- updated_newdata[1,] + } + + # Checking for experimental structures + treat <- eval(object$DIcall$treat) + density <- eval(object$DIcall$density) + block <- eval(object$DIcall$block) + + structures <- list('treatment' = treat, + 'density' = density, + 'block' = block) + + for (covariate in structures){ + if (!is.null(covariate) && !is.na(covariate)){ + # If covariate was supplied as numeric in function call, getting its value + if (is.numeric(covariate)){ + covariate <- colnames(original_data)[covariate] } - # If covariate is supplied as character or numeric, converting to factor - if (!is.factor(updated_newdata[, covariate])){ - updated_newdata[, covariate] <- factor(updated_newdata[, covariate], - levels = levels(covariate_levels)) + if (is.numeric(original_data[, covariate])){ + if ( !(covariate %in% colnames(updated_newdata))){ + warning(paste0(names(structures[structures == covariate]), ' not supplied in newdata. Calculating for \'', covariate, + '\' = ' , 0)) + updated_newdata[, covariate] <- 0 + } + } else { + # Levels of factor covariate in original data + covariate_levels <- as.factor(unique(original_data[, covariate])) + # If covariate isn't present in newdata, estimating for base level + if ( !(covariate %in% colnames(updated_newdata))){ + warning(paste0(names(structures[structures == covariate]), ' not supplied in newdata. Calculating for \'', covariate, + '\' = ' , levels(covariate_levels)[1])) + updated_newdata[, covariate] <- levels(covariate_levels)[1] + } + + # If levels of covariate in newdata not matching ones in original data, stop prediction + if (! (all(unique(updated_newdata[, covariate]) %in% covariate_levels, na.rm = TRUE))){ + stop(paste0('Values for ', covariate,' given were not present in raw data used for fitting. Predictions can\'t be made for these values.')) + } + + # If covariate is supplied as character or numeric, converting to factor + if (!is.factor(updated_newdata[, covariate])){ + updated_newdata[, covariate] <- factor(updated_newdata[, covariate], + levels = levels(covariate_levels)) + } } } } - } - - - # Handling extra formula - extra_formula <- eval(object$DIcall$extra_formula) - - if (! is.null(extra_formula)){ - extra_data <- model.frame(terms(extra_formula), updated_newdata) - # Matching factors in extra_formula to ones in original_data - og_factors <- original_data[, sapply(original_data, function(x){is.factor(x) | is.character(x)})] - common_factors <- intersect(colnames(extra_data), colnames(og_factors)) + # Handling extra formula + extra_formula <- eval(object$DIcall$extra_formula) - if (length(common_factors)!=0){ + if (! is.null(extra_formula)){ - # Levels of all factors in extra_formula - xlevels <- lapply(common_factors, function(x){levels(original_data[,x])}) - names(xlevels) <- common_factors - - for (i in common_factors){ + extra_data <- model.frame(terms(extra_formula), updated_newdata) + + # Matching factors in extra_formula to ones in original_data + og_factors <- original_data[, sapply(original_data, function(x){is.factor(x) | is.character(x)})] + common_factors <- intersect(colnames(extra_data), colnames(og_factors)) + + if (length(common_factors)!=0){ - # If levels of factors in extra_formula in newdata not matching ones in original data, stop prediction - if (! (all(unique(updated_newdata[, i]) %in% xlevels[[i]], na.rm = TRUE))){ - stop(paste0('Values for ', covariate,' given were not present in raw data used for fitting. Predictions can\'t be made for these values.')) - } + # Levels of all factors in extra_formula + xlevels <- lapply(common_factors, function(x){levels(original_data[,x])}) + names(xlevels) <- common_factors - # If factors in extra_formula is supplied as character or numeric, converting to factor - if (!is.factor(updated_newdata[, i])){ - updated_newdata[, i] <- factor(updated_newdata[,i], levels = xlevels[[i]]) + for (i in common_factors){ + + # If levels of factors in extra_formula in newdata not matching ones in original data, stop prediction + if (! (all(unique(updated_newdata[, i]) %in% xlevels[[i]], na.rm = TRUE))){ + stop(paste0('Values for ', covariate,' given were not present in raw data used for fitting. Predictions can\'t be made for these values.')) + } + + # If factors in extra_formula is supplied as character or numeric, converting to factor + if (!is.factor(updated_newdata[, i])){ + updated_newdata[, i] <- factor(updated_newdata[,i], levels = xlevels[[i]]) + } } } + + # Having certain functions in extra_formula like poly, ns, bs, etc. + # cause problems in estimating model.matrix for newdata + + # So my solution is to simply refit the model with glm when + # such functions are used and then make the prediction + + + fmla <- eval(object$DIcheck_formula) + + extra_variables <- DI_data(prop = prop, FG = eval(object$DIcall$FG), + data = original_data, theta = theta_value, what = DImodel_tag) + + if (DImodel_tag == "E") { + original_data_updated <- data.frame(original_data, E = extra_variables) + } + if (DImodel_tag == "AV") { + original_data_updated <- data.frame(original_data, AV = extra_variables) + } + if (DImodel_tag == "ADD") { + original_data_updated <- data.frame(original_data, extra_variables) + } + if (DImodel_tag == "FG") { + original_data[, 'FG_'] <- extra_variables + original_data_updated <- original_data + } + if (DImodel_tag == "FULL") { + original_data_updated <- data.frame(original_data, extra_variables, + check.names = FALSE) + } + if (DImodel_tag == 'ID' | DImodel_tag == 'STR'){ + original_data_updated <- original_data + } + + # Grouping ID terms + # If no grouping was specified + if(is.null(ID_cols)){ + ID_cols <- paste0(prop, "_ID") + } + + grouped_IDs <- group_IDs(data = original_data_updated, prop = prop, ID = ID_cols) + original_data_updated <- cbind(original_data_updated, grouped_IDs) + + glm_fit <- lm(fmla, data = original_data_updated) } - # Having certain functions in extra_formula like poly, ns, bs, etc. - # cause problems in estimating model.matrix for newdata - - # So my solution is to simply refit the model with glm when - # such functions are used and then make the prediction + # Calculating response + # Remove backticks from coefficient names for name-matching + names(betas) <- gsub(pattern = '`', replacement = '' , x = names(betas)) - - fmla <- eval(object$DIcheck_formula) - - extra_variables <- DI_data(prop = prop, FG = eval(object$DIcall$FG), - data = original_data, theta = theta_value, what = DImodel_tag) - - if (DImodel_tag == "E") { - original_data_updated <- data.frame(original_data, E = extra_variables) - } - if (DImodel_tag == "AV") { - original_data_updated <- data.frame(original_data, AV = extra_variables) - } - if (DImodel_tag == "ADD") { - original_data_updated <- data.frame(original_data, extra_variables) + # glm fmla object from DI_check_and_fit + if (DImodel_tag == 'CUSTOM'){ + fmla <- eval(object$DIcall$custom_formula) + } else { + fmla <- eval(object$DIcheck_formula) } - if (DImodel_tag == "FG") { - original_data[, 'FG_'] <- extra_variables - original_data_updated <- original_data + + if (! is.null(extra_formula)){ + Terms <- delete.response(terms(glm_fit)) + } else { + Terms <- delete.response(terms(formula(fmla))) } - if (DImodel_tag == "FULL") { - original_data_updated <- data.frame(original_data, extra_variables, - check.names = FALSE) - } - if (DImodel_tag == 'ID' | DImodel_tag == 'STR'){ - original_data_updated <- original_data + # Model matrix for predictions of newdata + X_old <- as.data.frame(model.matrix(Terms, data = updated_newdata)) + names(X_old) <- gsub('`','', names(X_old)) + # print(X_old) + # glm formula adds NA for non-estimable levels of factors + # Removing the ones with NA to get only those present in DImodel + common <- intersect(names(betas), names(X_old)) + X_new <- as.data.frame(X_old[, common]) + # + # # Predictions + # y_hat <- as.numeric(X_new %*% betas) + # + # if (type == "response") { + # inv_link <- object$family$linkinv + # y_hat <- inv_link(y_hat) + # } + # if (se.fit) { + # standard_errors <- as.numeric(sqrt(diag(X_new %*% vcov(object) %*% + # t(X_new)))) + # dispersion <- summary(object)$dispersion + # residual.scale <- as.vector(sqrt(dispersion)) + # ret_obj <- list(fit = y_hat, se.fit = standard_errors, residual.scale = residual.scale) + # } + # else { + # ret_obj <- y_hat + # } + + if (! is.null(extra_formula)){ + # Terms <- delete.response(terms(glm_fit)) + ret_obj <- suppressWarnings(predict.lm(glm_fit, newdata = updated_newdata, + se.fit = ifelse(interval != "none", TRUE, se.fit), + type = ifelse(type == "link", "response", type), ...)) + } else { + # Terms <- delete.response(terms(formula(fmla))) + ret_obj <- suppressWarnings(predict.glm(object, newdata = X_new, + se.fit = ifelse(interval != "none", TRUE, se.fit), + type = type, ...)) } - glm_fit <- glm(fmla, data = original_data_updated, family = object$family) } - # Calculating response - # Remove backticks from coefficient names for name-matching - names(betas) <- gsub(pattern = '`', replacement = '' , x = names(betas)) - - # glm fmla object from DI_check_and_fit - if (DImodel_tag == 'CUSTOM'){ - fmla <- eval(object$DIcall$custom_formula) - } else { - fmla <- eval(object$DIcheck_formula) - } - - if (! is.null(extra_formula)){ - Terms <- delete.response(terms(glm_fit)) - } else { - Terms <- delete.response(terms(formula(fmla))) + # Confidence and prediction intervals + if(object$family$family == "gaussian"){ + if(interval != "none"){ + predictor <- ret_obj$fit + ip <- ret_obj$se.fit^2 + # Calculating CI/PI + w <- object$weights + r <- object$residuals + rss <- sum(if (is.null(w)) r^2 else r^2 * w) + df <- object$df.residual + res.var <- rss/df + pred.var <- res.var/weights + + tfrac <- qt((1 - level)/2, df) + hwid <- tfrac * switch(interval, confidence = sqrt(ip), + prediction = sqrt(ip + pred.var)) + if (type != "terms") { + predictor <- cbind(predictor, predictor + hwid %o% + c(1, -1)) + colnames(predictor) <- c("fit", "lwr", "upr") + ret_obj$fit <- predictor + } + # Don't give SE if user didn't ask for it + if(! se.fit){ + ret_obj$se.fit <- NULL + ret_obj$residual.scale <- NULL + ret_obj <- predictor + } + } } - # Model matrix for predictions of newdata - X_old <- as.data.frame(model.matrix(Terms, data = updated_newdata)) - names(X_old) <- gsub('`','', names(X_old)) - #print(X_old) - # glm formula adds NA for non-estimable levles of factors - # Removing the ones with NA to get only those present in DImodel - common <- intersect(names(betas), names(X_old)) - X_new <- as.matrix(X_old[, common]) - # Predictions - y_hat <- as.numeric(X_new %*% betas) - - if (type == "response") { - inv_link <- object$family$linkinv - y_hat <- inv_link(y_hat) - } - if (se.fit) { - standard_errors <- as.numeric(sqrt(diag(X_new %*% vcov(object) %*% - t(X_new)))) - dispersion <- summary(object)$dispersion - residual.scale <- as.vector(sqrt(dispersion)) - ret_obj <- list(fit = y_hat, se.fit = standard_errors, residual.scale = residual.scale) - } - else { - ret_obj <- y_hat - } return(ret_obj) } contrasts_DI <- function(object, contrast, contrast_vars, ...){ if (missing(object) | class(object)[1]!= 'DI'){ - stop('Please provied a DImodels model object') + stop("Please provied a DImodels model object") } if (missing(contrast_vars) & missing(contrast)){ - stop('Provide either one of contrast_vars or constrast') + stop("Provide either one of `contrast_vars` or `constrast`") } if (!missing(contrast_vars) & !missing(contrast)){ - stop('Provide only one of contrast_vars or constrast') + warning("Provide only one of `contrast_vars` or `constrast`. `contrast_vars` will be ignored.") + contrast_vars <- NULL } + og_data <- object$original_data + # prop_cols <- eval(object$DIcall$prop) + # prop <- colnames(og_data)[prop_cols] + # ID_cols <- eval(object$DIcall$ID) + # + # # Grouping ID terms + # # If not grouping was specified + # if(is.null(ID_cols)){ + # ID_cols <- paste0(prop, "_ID") + # } + # + # grouped_IDs <- group_IDs(data = og_data, prop = prop, ID = ID_cols) + # og_data <- cbind(og_data, grouped_IDs) + betas <- coef(object) theta_flag <- object$coefficients['theta'] if (!is.na(theta_flag)){ @@ -280,18 +367,21 @@ contrasts_DI <- function(object, contrast, contrast_vars, ...){ betas <- betas[-length(betas)] } - if (!missing(contrast_vars)){ + if (!missing(contrast_vars) && !is.null(contrast_vars)){ if(!is.list(contrast_vars)){ - stop('Contrast variables should be specified as a nested list') + stop('Contrast variables should be specified as a nested named list') } - og_data <- object$original_data the_C <- matrix(0, ncol = length(betas)) colnames(the_C) <- names(betas) for (var in names(contrast_vars)){ - if (!(var %in% colnames(og_data) | var %in% colnames(betas))){ + # if(var %in% prop) { + # var <- paste0(var, "_ID") + # } + + if (!(var %in% colnames(og_data))){ stop(paste0(var, ' not present in model')) } diff --git a/README.Rmd b/README.Rmd index 4e4f213..44fe864 100644 --- a/README.Rmd +++ b/README.Rmd @@ -22,9 +22,22 @@ knitr::opts_chunk$set( [![Codecov test coverage](https://codecov.io/gh/rafamoral/DImodels/branch/main/graph/badge.svg)](https://app.codecov.io/gh/rafamoral/DImodels?branch=main) + The `DImodels` package is designed to make fitting Diversity-Interactions models easier. Diversity-Interactions (DI) models (Kirwan et al 2009) are a set of tools for analysing and interpreting data from experiments that explore the effects of species diversity (from a pool of *S* species) on community-level responses. Data suitable for DI models will include (at least) for each experimental unit: a response recorded at a point in time, and a set of proportions of *S* species $p_1$, $p_2$, ..., $p_S$ from a point in time prior to the recording of the response. The proportions sum to 1 for each experimental unit. -__Main changes in the package from version 1.0 to version 1.1 and newer__ + +__Main changes in the package from version 1.2 to version 1.3__ + +- The `DI` and `autoDI` functions now have an additional parameter called `ID` which enables the user to group the species identity effects (see examples below). +- The `predict` function now has flexibility to calculate confidence and prediction intervals for the predicted values. + +__Main changes in the package from version 1.1 to version 1.2__ + +- There are two new functions added to the package: + - `predict`: Make predictions from a fitted DI model without having to worry about theta, and the interaction terms in the data. + - `contrasts_DI`: Create contrasts for a DI model. + +__Main changes in the package from version 1.0 to version 1.1__ - `DI_data_prepare` is now superseded by `DI_data` (see examples below) @@ -37,13 +50,6 @@ install.packages("DImodels") library("DImodels") ``` -You can install the development version of DImodels from [GitHub](https://github.com/) with: - -``` r -# install.packages("devtools") -devtools::install_github("rafamoral/DImodels") -``` - ## Accessing an introduction to Diversity-Introductions models It is recommended that users unfamiliar with Diversity-Interactions (DI) models read the introduction to `DImodels`, before using the package. Run the following code to access the documentation. @@ -167,6 +173,36 @@ m1_theta <- update_DI(object = m1, estimate_theta = TRUE) coef(m1_theta) ``` +### Grouping the species identity effects in the model +The species identity effects in a DI model can be grouped by specifying groups for each species using the `ID` argument. +The `ID` argument functions similar to the `FG` argument and accepts a character list of same length as number of species in the model. The identity effects of species belonging in the same group will be grouped together. + +Grouping all identity effects into a single term +```{r, echo = TRUE} +m1_group <- update_DI(object = m1_theta, + ID = c("ID1", "ID1", "ID1", "ID1", "ID1", + "ID1", "ID1", "ID1", "ID1")) +coef(m1_group) +``` + +Grouping identity effects of specific species +```{r, echo = TRUE} +m1_group2 <- update_DI(object = m1_theta, + ID = c("ID1", "ID1", "ID1", + "ID2", "ID2", "ID2", + "ID3", "ID3", "ID3")) +coef(m1_group2) +``` + +Note: Grouping ID effects will not have an effect on the calculation of the interaction effects, they would still be calculated by using all species. + +Read the documentation of `DI` and `autoDI` for more information and examples using the `ID` parameter. +```{r, eval = FALSE} +?DI +?autoDI +``` + + ## Fitting customised models using the `DI` function There are two ways to fit customised models using `DI`; the first is by using the option `DImodel = ` in the `DI` function and adding the argument `extra_formula = ` to it, and the second is to use the `custom_formula` argument in the `DI` function. If species interaction variables (e.g., the FG interactions or the average pairwise interaction) are included in either `extra_formula` or `custom_formula`, they must first be created and included in the dataset. The function `DI_data` can be used to compute several types of species interaction variables. @@ -219,6 +255,84 @@ m3 <- DI(y = "response", summary(m3) ``` +## Making predictions and testing contrasts for DI models +### Predictions using a DI model + +We can make predictions from a DI model just like any other regression model using the `predict` function. The user does not need to worry about adding any interaction terms or adjusting any columns if theta is not equal to 1. Only the species proportions along with any additional experimental structures is needed and all other terms in the model will be calculated for the user. +```{r, echo = TRUE} +# Fit model +m3 <- DI(y = "response", prop = 4:12, + treat = "treatment", DImodel = "AV", + extra_formula = ~ (AV) : treatment, data = sim3a) + +predict_data <- sim3[c(1, 79, 352), 3:12] +# Only species proportions and treatment is needed +print(predict_data) +# Make prediction +predict(m3, newdata = predict_data) +``` + +### Uncertainity around predictions +```{r} +# The interval and level parameters can be used to calculate the +# uncertainty around the predictions + +# Get confidence interval around prediction +predict(m3, newdata = predict_data, interval = "confidence") + +# Get prediction interval around prediction +predict(m3, newdata = predict_data, interval = "prediction") + +# The function returns a 95% interval by default, +# this can be changed using the level argument +predict(m3, newdata = predict_data, + interval = "prediction", level = 0.9) +``` + + +### Contrasts for DI models +The `contrasts_DI` function can be used to compare and formally test for a difference in performance of communities within the same as well as across different experimental structures + +Comparing the performance of the monocultures of different species at treatment A +```{r, echo = TRUE} +contr <- list("p1vsp2" = c(1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + "p3vsp5" = c(0, 0, 1, 0, -1, 0, 0, 0, 0, 0, 0, 0), + "p4vsp6" = c(0, 0, 0, 1, 0, -1, 0, 0, 0, 0, 0, 0), + "p7vsp9" = c(0, 0, 0, 0, 0, 0, 1, 0, -1, 0, 0, 0)) +the_C <- contrasts_DI(m3, contrast = contr) +summary(the_C) +``` + +Comparing across the two treatment levels for monoculture of species 1 +```{r, echo = TRUE} +contr <- list("treatAvsB" = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0)) +the_C <- contrasts_DI(m3, contrast = contr) +summary(the_C) +``` + +Comparing between two species mixtures +```{r, echo = TRUE} +mixA <- c(0.25, 0, 0.25, 0, 0.25, 0, 0.25, 0, 0, 0, 0, 0) +mixB <- c(0, 0.3333, 0, 0.3333, 0, 0.3333, 0, 0, 0, 0, 0, 0) + +# We have the proportions of the individual species in the mixtures, however +# we still need to calculate the interaction effect for these communities +contr_data <- data.frame(rbind(mixA, mixB)) +colnames(contr_data) <- names(coef(m3)) + +# Adding the interaction effect of the two mixtures +contr_data$AV <- DI_data_E_AV(prop = 1:9, data = contr_data)$AV +print(contr_data) + +# We can now subtract the respective values in each column of the two +# mixtures and get our contrast +my_contrast <- as.matrix(contr_data[1, ] - contr_data[2, ]) +rownames(my_contrast) <- "mixAvsB" + +the_C <- contrasts_DI(m3, contrast = my_contrast) +summary(the_C) +``` + ## References Connolly J, T Bell, T Bolger, C Brophy, T Carnus, JA Finn, L Kirwan, F Isbell, J Levine, A Lüscher, V Picasso, C Roscher, MT Sebastia, M Suter and A Weigelt (2013) An improved model to predict the effects of changing biodiversity levels on ecosystem function. Journal of Ecology, 101, 344-355. diff --git a/README.md b/README.md index 184f9a7..47414a0 100644 --- a/README.md +++ b/README.md @@ -24,8 +24,22 @@ species $p_1$, $p_2$, …, $p_S$ from a point in time prior to the recording of the response. The proportions sum to 1 for each experimental unit. -**Main changes in the package from version 1.0 to version 1.1 and -newer** +**Main changes in the package from version 1.2 to version 1.3** + +- The `DI` and `autoDI` functions now have an additional parameter + called `ID` which enables the user to group the species identity + effects (see examples below). +- The `predict` function now has flexibility to calculate confidence and + prediction intervals for the predicted values. + +**Main changes in the package from version 1.1 to version 1.2** + +- There are two new functions added to the package: + - `predict`: Make predictions from a fitted DI model without having to + worry about theta, and the interaction terms in the data. + - `contrasts_DI`: Create contrasts for a DI model. + +**Main changes in the package from version 1.0 to version 1.1** - `DI_data_prepare` is now superseded by `DI_data` (see examples below) @@ -39,14 +53,6 @@ install.packages("DImodels") library("DImodels") ``` -You can install the development version of DImodels from -[GitHub](https://github.com/) with: - -``` r -# install.packages("devtools") -devtools::install_github("rafamoral/DImodels") -``` - ## Accessing an introduction to Diversity-Introductions models It is recommended that users unfamiliar with Diversity-Interactions (DI) @@ -292,15 +298,15 @@ summary(auto1) #> #> Coefficients: #> Estimate Std. Error t value Pr(>|t|) -#> p1 9.7497 0.3666 26.595 < 2e-16 *** -#> p2 8.5380 0.3672 23.253 < 2e-16 *** -#> p3 8.2329 0.3666 22.459 < 2e-16 *** -#> p4 6.3644 0.3665 17.368 < 2e-16 *** -#> p5 10.8468 0.3669 29.561 < 2e-16 *** -#> p6 5.9621 0.4515 13.205 < 2e-16 *** -#> p7 5.4252 0.4516 12.015 < 2e-16 *** -#> p8 7.3204 0.4515 16.213 < 2e-16 *** -#> p9 8.2154 0.4515 18.196 < 2e-16 *** +#> p1_ID 9.7497 0.3666 26.595 < 2e-16 *** +#> p2_ID 8.5380 0.3672 23.253 < 2e-16 *** +#> p3_ID 8.2329 0.3666 22.459 < 2e-16 *** +#> p4_ID 6.3644 0.3665 17.368 < 2e-16 *** +#> p5_ID 10.8468 0.3669 29.561 < 2e-16 *** +#> p6_ID 5.9621 0.4515 13.205 < 2e-16 *** +#> p7_ID 5.4252 0.4516 12.015 < 2e-16 *** +#> p8_ID 7.3204 0.4515 16.213 < 2e-16 *** +#> p9_ID 8.2154 0.4515 18.196 < 2e-16 *** #> FG_bfg_FG1_FG2 3.4395 0.8635 3.983 8.09e-05 *** #> FG_bfg_FG1_FG3 11.5915 0.8654 13.395 < 2e-16 *** #> FG_bfg_FG2_FG3 2.8711 1.2627 2.274 0.02351 * @@ -359,15 +365,15 @@ summary(m1) #> #> Coefficients: #> Estimate Std. Error t value Pr(>|t|) -#> p1 9.7497 0.3666 26.595 < 2e-16 *** -#> p2 8.5380 0.3672 23.253 < 2e-16 *** -#> p3 8.2329 0.3666 22.459 < 2e-16 *** -#> p4 6.3644 0.3665 17.368 < 2e-16 *** -#> p5 10.8468 0.3669 29.561 < 2e-16 *** -#> p6 5.9621 0.4515 13.205 < 2e-16 *** -#> p7 5.4252 0.4516 12.015 < 2e-16 *** -#> p8 7.3204 0.4515 16.213 < 2e-16 *** -#> p9 8.2154 0.4515 18.196 < 2e-16 *** +#> p1_ID 9.7497 0.3666 26.595 < 2e-16 *** +#> p2_ID 8.5380 0.3672 23.253 < 2e-16 *** +#> p3_ID 8.2329 0.3666 22.459 < 2e-16 *** +#> p4_ID 6.3644 0.3665 17.368 < 2e-16 *** +#> p5_ID 10.8468 0.3669 29.561 < 2e-16 *** +#> p6_ID 5.9621 0.4515 13.205 < 2e-16 *** +#> p7_ID 5.4252 0.4516 12.015 < 2e-16 *** +#> p8_ID 7.3204 0.4515 16.213 < 2e-16 *** +#> p9_ID 8.2154 0.4515 18.196 < 2e-16 *** #> FG_bfg_FG1_FG2 3.4395 0.8635 3.983 8.09e-05 *** #> FG_bfg_FG1_FG3 11.5915 0.8654 13.395 < 2e-16 *** #> FG_bfg_FG2_FG3 2.8711 1.2627 2.274 0.02351 * @@ -395,9 +401,9 @@ m1_theta <- update_DI(object = m1, estimate_theta = TRUE) #> Fitted model: Functional group effects 'FG' DImodel #> Theta estimate: 0.9681 coef(m1_theta) -#> p1 p2 p3 p4 p5 +#> p1_ID p2_ID p3_ID p4_ID p5_ID #> 9.8128865 8.6069092 8.2968619 6.4287580 10.9110563 -#> p6 p7 p8 p9 FG_bfg_FG1_FG2 +#> p6_ID p7_ID p8_ID p9_ID FG_bfg_FG1_FG2 #> 6.0189395 5.4846833 7.4038925 8.2992262 2.9840924 #> FG_bfg_FG1_FG3 FG_bfg_FG2_FG3 FG_wfg_FG1 FG_wfg_FG2 FG_wfg_FG3 #> 10.6019235 2.3514998 2.3737831 0.3789464 1.8470612 @@ -405,6 +411,61 @@ coef(m1_theta) #> 3.1017864 0.9681005 ``` +### Grouping the species identity effects in the model + +The species identity effects in a DI model can be grouped by specifying +groups for each species using the `ID` argument. The `ID` argument +functions similar to the `FG` argument and accepts a character list of +same length as number of species in the model. The identity effects of +species belonging in the same group will be grouped together. + +Grouping all identity effects into a single term + +``` r +m1_group <- update_DI(object = m1_theta, + ID = c("ID1", "ID1", "ID1", "ID1", "ID1", + "ID1", "ID1", "ID1", "ID1")) +#> Warning in DI_data_prepare(y = y, block = block, density = density, prop = prop, : One or more rows have species proportions that sum to approximately 1, but not exactly 1. This is typically a rounding issue, and has been corrected internally prior to analysis. +#> Fitted model: Functional group effects 'FG' DImodel +#> Theta estimate: 0.9919 +coef(m1_group) +#> ID1 FG_bfg_FG1_FG2 FG_bfg_FG1_FG3 FG_bfg_FG2_FG3 FG_wfg_FG1 +#> 7.8667702 1.1475018 12.9438529 -1.2235215 5.6141823 +#> FG_wfg_FG2 FG_wfg_FG3 treatmentA theta +#> -5.5214662 1.0205019 3.1017864 0.9919097 +``` + +Grouping identity effects of specific species + +``` r +m1_group2 <- update_DI(object = m1_theta, + ID = c("ID1", "ID1", "ID1", + "ID2", "ID2", "ID2", + "ID3", "ID3", "ID3")) +#> Warning in DI_data_prepare(y = y, block = block, density = density, prop = prop, : One or more rows have species proportions that sum to approximately 1, but not exactly 1. This is typically a rounding issue, and has been corrected internally prior to analysis. +#> Fitted model: Functional group effects 'FG' DImodel +#> Theta estimate: 0.989 +coef(m1_group2) +#> ID1 ID2 ID3 FG_bfg_FG1_FG2 FG_bfg_FG1_FG3 +#> 8.5288216 7.9537767 7.1357104 0.9665077 13.3434768 +#> FG_bfg_FG2_FG3 FG_wfg_FG1 FG_wfg_FG2 FG_wfg_FG3 treatmentA +#> 0.4940952 4.1543637 -4.4683501 3.4674196 3.1017864 +#> theta +#> 0.9889999 +``` + +Note: Grouping ID effects will not have an effect on the calculation of +the interaction effects, they would still be calculated by using all +species. + +Read the documentation of `DI` and `autoDI` for more information and +examples using the `ID` parameter. + +``` r +?DI +?autoDI +``` + ## Fitting customised models using the `DI` function There are two ways to fit customised models using `DI`; the first is by @@ -437,15 +498,15 @@ summary(m2) #> #> Coefficients: #> Estimate Std. Error t value Pr(>|t|) -#> p1 9.391824 0.540485 17.377 < 2e-16 *** -#> p2 8.492825 0.540879 15.702 < 2e-16 *** -#> p3 8.406038 0.540471 15.553 < 2e-16 *** -#> p4 6.015296 0.540391 11.131 < 2e-16 *** -#> p5 10.802270 0.378776 28.519 < 2e-16 *** -#> p6 5.917565 0.461482 12.823 < 2e-16 *** -#> p7 5.380703 0.461535 11.658 < 2e-16 *** -#> p8 7.275881 0.461506 15.766 < 2e-16 *** -#> p9 8.170907 0.461471 17.706 < 2e-16 *** +#> p1_ID 10.018491 0.466552 21.473 < 2e-16 *** +#> p2_ID 8.494038 0.467009 18.188 < 2e-16 *** +#> p3_ID 7.970716 0.466536 17.085 < 2e-16 *** +#> p4_ID 6.624476 0.466443 14.202 < 2e-16 *** +#> p5_ID 10.802270 0.378776 28.519 < 2e-16 *** +#> p6_ID 5.917565 0.461482 12.823 < 2e-16 *** +#> p7_ID 5.380703 0.461535 11.658 < 2e-16 *** +#> p8_ID 7.275881 0.461506 15.766 < 2e-16 *** +#> p9_ID 8.170907 0.461471 17.706 < 2e-16 *** #> FG_bfg_FG1_FG2 3.439508 0.865279 3.975 8.38e-05 *** #> FG_bfg_FG1_FG3 11.591458 0.867140 13.367 < 2e-16 *** #> FG_bfg_FG2_FG3 2.871063 1.265295 2.269 0.02381 * @@ -453,10 +514,10 @@ summary(m2) #> FG_wfg_FG2 0.679285 2.360195 0.288 0.77365 #> FG_wfg_FG3 2.416774 2.333420 1.036 0.30097 #> treatmentA 3.190868 0.216493 14.739 < 2e-16 *** -#> `p1:treatmentB` 0.626667 0.668369 0.938 0.34902 -#> `p2:treatmentB` 0.001213 0.668369 0.002 0.99855 -#> `p3:treatmentB` -0.435322 0.668369 -0.651 0.51522 -#> `p4:treatmentB` 0.609180 0.668369 0.911 0.36262 +#> `treatmentA:p1` -0.626667 0.668369 -0.938 0.34902 +#> `treatmentA:p2` -0.001213 0.668369 -0.002 0.99855 +#> `treatmentA:p3` 0.435322 0.668369 0.651 0.51522 +#> `treatmentA:p4` -0.609180 0.668369 -0.911 0.36262 #> --- #> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 #> @@ -501,15 +562,15 @@ summary(m3) #> #> Coefficients: #> Estimate Std. Error t value Pr(>|t|) -#> p1 9.68668 0.40000 24.217 < 2e-16 *** -#> p2 8.47495 0.40053 21.159 < 2e-16 *** -#> p3 8.16990 0.39998 20.426 < 2e-16 *** -#> p4 6.30140 0.39987 15.759 < 2e-16 *** -#> p5 10.78379 0.40031 26.938 < 2e-16 *** -#> p6 5.89908 0.47958 12.301 < 2e-16 *** -#> p7 5.36222 0.47963 11.180 < 2e-16 *** -#> p8 7.25740 0.47960 15.132 < 2e-16 *** -#> p9 8.15243 0.47957 17.000 < 2e-16 *** +#> p1_ID 9.68668 0.40000 24.217 < 2e-16 *** +#> p2_ID 8.47495 0.40053 21.159 < 2e-16 *** +#> p3_ID 8.16990 0.39998 20.426 < 2e-16 *** +#> p4_ID 6.30140 0.39987 15.759 < 2e-16 *** +#> p5_ID 10.78379 0.40031 26.938 < 2e-16 *** +#> p6_ID 5.89908 0.47958 12.301 < 2e-16 *** +#> p7_ID 5.36222 0.47963 11.180 < 2e-16 *** +#> p8_ID 7.25740 0.47960 15.132 < 2e-16 *** +#> p9_ID 8.15243 0.47957 17.000 < 2e-16 *** #> FG_bfg_FG1_FG2 4.00191 1.12383 3.561 0.000415 *** #> FG_bfg_FG1_FG3 11.77389 1.12973 10.422 < 2e-16 *** #> FG_bfg_FG2_FG3 3.83681 1.64287 2.335 0.020027 * @@ -588,6 +649,185 @@ summary(m3) #> Number of Fisher Scoring iterations: 2 ``` +## Making predictions and testing contrasts for DI models + +### Predictions using a DI model + +We can make predictions from a DI model just like any other regression +model using the `predict` function. The user does not need to worry +about adding any interaction terms or adjusting any columns if theta is +not equal to 1. Only the species proportions along with any additional +experimental structures is needed and all other terms in the model will +be calculated for the user. + +``` r +# Fit model +m3 <- DI(y = "response", prop = 4:12, + treat = "treatment", DImodel = "AV", + extra_formula = ~ (AV) : treatment, data = sim3a) +#> Warning in DI_data_prepare(y = y, block = block, density = density, prop = prop, : One or more rows have species proportions that sum to approximately 1, but not exactly 1. This is typically a rounding issue, and has been corrected internally prior to analysis. +#> Fitted model: Average interactions 'AV' DImodel + +predict_data <- sim3[c(1, 79, 352), 3:12] +# Only species proportions and treatment is needed +print(predict_data) +#> treatment p1 p2 p3 p4 p5 p6 p7 p8 +#> 1 A 0 0 0.0000000 0 0.0000000 0.0000000 0.0000000 0.0000000 +#> 79 A 0 0 0.0000000 0 0.5000000 0.0000000 0.0000000 0.5000000 +#> 352 B 0 0 0.1666667 0 0.1666667 0.1666667 0.1666667 0.1666667 +#> p9 +#> 1 1.0000000 +#> 79 0.0000000 +#> 352 0.1666667 +# Make prediction +predict(m3, newdata = predict_data) +#> 1 79 352 +#> 12.83789 14.27503 10.00291 +``` + +### Uncertainity around predictions + +``` r +# The interval and level parameters can be used to calculate the +# uncertainty around the predictions + +# Get confidence interval around prediction +predict(m3, newdata = predict_data, interval = "confidence") +#> fit lwr upr +#> 1 12.83789 12.028716 13.64707 +#> 79 14.27503 13.817612 14.73246 +#> 352 10.00291 9.694552 10.31126 + +# Get prediction interval around prediction +predict(m3, newdata = predict_data, interval = "prediction") +#> fit lwr upr +#> 1 12.83789 10.124779 15.55100 +#> 79 14.27503 11.645310 16.90476 +#> 352 10.00291 7.394976 12.61083 + +# The function returns a 95% interval by default, +# this can be changed using the level argument +predict(m3, newdata = predict_data, + interval = "prediction", level = 0.9) +#> fit lwr upr +#> 1 12.83789 10.562595 15.11319 +#> 79 14.27503 12.069670 16.48040 +#> 352 10.00291 7.815819 12.18999 +``` + +### Contrasts for DI models + +The `contrasts_DI` function can be used to compare and formally test for +a difference in performance of communities within the same as well as +across different experimental structures + +Comparing the performance of the monocultures of different species at +treatment A + +``` r +contr <- list("p1vsp2" = c(1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + "p3vsp5" = c(0, 0, 1, 0, -1, 0, 0, 0, 0, 0, 0, 0), + "p4vsp6" = c(0, 0, 0, 1, 0, -1, 0, 0, 0, 0, 0, 0), + "p7vsp9" = c(0, 0, 0, 0, 0, 0, 1, 0, -1, 0, 0, 0)) +the_C <- contrasts_DI(m3, contrast = contr) +#> Generated contrast matrix: +#> p1_ID p2_ID p3_ID p4_ID p5_ID p6_ID p7_ID p8_ID p9_ID AV treatmentA +#> p1vsp2 1 -1 0 0 0 0 0 0 0 0 0 +#> p3vsp5 0 0 1 0 -1 0 0 0 0 0 0 +#> p4vsp6 0 0 0 1 0 -1 0 0 0 0 0 +#> p7vsp9 0 0 0 0 0 0 1 0 -1 0 0 +#> `AV:treatmentB` +#> p1vsp2 0 +#> p3vsp5 0 +#> p4vsp6 0 +#> p7vsp9 0 +summary(the_C) +#> +#> Simultaneous Tests for General Linear Hypotheses +#> +#> Fit: glm(formula = new_fmla, family = family, data = new_data) +#> +#> Linear Hypotheses: +#> Estimate Std. Error z value Pr(>|z|) +#> p1vsp2 == 0 1.473 0.477 3.088 0.00803 ** +#> p3vsp5 == 0 -2.652 0.477 -5.560 1.08e-07 *** +#> p4vsp6 == 0 1.462 0.477 3.064 0.00870 ** +#> p7vsp9 == 0 -5.521 0.477 -11.573 < 2e-16 *** +#> --- +#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +#> (Adjusted p values reported -- single-step method) +``` + +Comparing across the two treatment levels for monoculture of species 1 + +``` r +contr <- list("treatAvsB" = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0)) +the_C <- contrasts_DI(m3, contrast = contr) +#> Generated contrast matrix: +#> p1_ID p2_ID p3_ID p4_ID p5_ID p6_ID p7_ID p8_ID p9_ID AV treatmentA +#> treatAvsB 1 0 0 0 0 0 0 0 0 0 1 +#> `AV:treatmentB` +#> treatAvsB 0 +summary(the_C) +#> +#> Simultaneous Tests for General Linear Hypotheses +#> +#> Fit: glm(formula = new_fmla, family = family, data = new_data) +#> +#> Linear Hypotheses: +#> Estimate Std. Error z value Pr(>|z|) +#> treatAvsB == 0 12.8993 0.4116 31.34 <2e-16 *** +#> --- +#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +#> (Adjusted p values reported -- single-step method) +``` + +Comparing between two species mixtures + +``` r +mixA <- c(0.25, 0, 0.25, 0, 0.25, 0, 0.25, 0, 0, 0, 0, 0) +mixB <- c(0, 0.3333, 0, 0.3333, 0, 0.3333, 0, 0, 0, 0, 0, 0) + +# We have the proportions of the individual species in the mixtures, however +# we still need to calculate the interaction effect for these communities +contr_data <- data.frame(rbind(mixA, mixB)) +colnames(contr_data) <- names(coef(m3)) + +# Adding the interaction effect of the two mixtures +contr_data$AV <- DI_data_E_AV(prop = 1:9, data = contr_data)$AV +print(contr_data) +#> p1_ID p2_ID p3_ID p4_ID p5_ID p6_ID p7_ID p8_ID p9_ID AV +#> mixA 0.25 0.0000 0.25 0.0000 0.25 0.0000 0.25 0 0 0.3750000 +#> mixB 0.00 0.3333 0.00 0.3333 0.00 0.3333 0.00 0 0 0.3332667 +#> treatmentA `AV:treatmentB` +#> mixA 0 0 +#> mixB 0 0 + +# We can now subtract the respective values in each column of the two +# mixtures and get our contrast +my_contrast <- as.matrix(contr_data[1, ] - contr_data[2, ]) +rownames(my_contrast) <- "mixAvsB" + +the_C <- contrasts_DI(m3, contrast = my_contrast) +#> Generated contrast matrix: +#> p1_ID p2_ID p3_ID p4_ID p5_ID p6_ID p7_ID p8_ID p9_ID AV +#> mixAvsB 0.25 -0.3333 0.25 -0.3333 0.25 -0.3333 0.25 0 0 0.04173333 +#> treatmentA `AV:treatmentB` +#> mixAvsB 0 0 +summary(the_C) +#> +#> Simultaneous Tests for General Linear Hypotheses +#> +#> Fit: glm(formula = new_fmla, family = family, data = new_data) +#> +#> Linear Hypotheses: +#> Estimate Std. Error z value Pr(>|z|) +#> mixAvsB == 0 2.0379 0.2599 7.841 4.44e-15 *** +#> --- +#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 +#> (Adjusted p values reported -- single-step method) +``` + ## References Connolly J, T Bell, T Bolger, C Brophy, T Carnus, JA Finn, L Kirwan, F diff --git a/man/DI.Rd b/man/DI.Rd index 3669e64..6029393 100644 --- a/man/DI.Rd +++ b/man/DI.Rd @@ -31,7 +31,7 @@ This function will fit a wide range of Diversity-Interactions (DI) models, one a \usage{ DI(y, prop, DImodel, custom_formula, data, - block, density, treat, FG, extra_formula, + block, density, treat, ID, FG, extra_formula, estimate_theta = FALSE, theta = 1) } @@ -72,6 +72,18 @@ When used in conjunction with \code{DImodel}, the treatment will be included in \item The \code{treat} argument is defunct when using the \code{custom_formula} argument, and any treatment must be included directly in the \code{custom_formula} argument. } +} + \item{ID}{ + %% ~~Describe \code{ID} here~~ +This argument takes a text list (of length \emph{s}) dsecirbing groupings for the identity effects of the species. For example, if there are four species and you wish to group the identity effects all four species into a single term: \code{ID} could be \code{ID = c("ID1","ID1","ID1","ID1")}, where "ID1" is the name of the ID group. Similarly if the we wish to have two identity effect groups where identity effect of species 1 and 3, and species 2 and 4 are grouped together: \code{ID} could be \code{ID = c("ID1","ID2","ID1","ID2")}, where "ID1" and "ID2" are the names of the ID groups. These ideas expand to any number of species and any number or combination of groups. Finally, the ID groups do not have to be named "ID1" and "ID2", the user can specify any name for the groups. + + \itemize{ + \item If the \code{ID} argument is not specified, each species will be assumed to have a separate identity effect. + + \item Specify an grouping for the ID does not affect the interaction terms. The interactions are still calculated using the individual species proportions. + + \item The \code{ID} argument is defunct when using the \code{custom_formula} argument, since species identity effects must be included directly in the \code{custom_formula} argument. +} } \item{FG}{ %% ~~Describe \code{FG} here~~ @@ -211,14 +223,32 @@ data(Switzerland) Switzerlandsums <- rowSums(Switzerland[4:7]) summary(Switzerlandsums) +## Fit the a simple AV DImodel with theta = 1 +mod <- DI(y = "yield", prop = c("p1", "p2", "p3", "p4"), + DImodel = "AV", data = Switzerland) +summary(mod) + +## Fit the same model but group the 4 species identity effect into 2 groups +mod <- DI(y = "yield", prop = c("p1", "p2", "p3", "p4"), + ID = c("ID1", "ID1", "ID2", "ID2"), DImodel = "AV", + data = Switzerland) +summary(mod) + +## Combine the four identity effects into a single term and estimate theta +mod <- DI(y = "yield", prop = c("p1", "p2", "p3", "p4"), + ID = c("ID1", "ID1", "ID1", "ID1"), DImodel = "AV", + estimate_theta = TRUE, data = Switzerland) +summary(mod) + ## Fit the FG DImodel, with factors density and treatment, and with theta = 1 m1 <- DI(y = "yield", density = "density", prop = 4:7, treat = "nitrogen", FG = c("G","G","L","L"), DImodel = "FG", data = Switzerland) summary(m1) -## Fit the FG DImodel, with factors density and treatment, and theta estimated +## Fit the FG DImodel, with factors density and treatment, and a fixed value of theta not equal to 1 m2 <- DI(y = "yield", density = "density", prop = 4:7, treat = "nitrogen", - FG = c("G","G","L","L"), DImodel = "FG", data = Switzerland, estimate_theta = TRUE) + FG = c("G","G","L","L"), DImodel = "FG", data = Switzerland, + theta = 0.5) summary(m2) ## Test if the identity effects interact with nitrogen (and main effect of nitrogen excluded) diff --git a/man/autoDI.Rd b/man/autoDI.Rd index 8722c6c..ca65b3f 100644 --- a/man/autoDI.Rd +++ b/man/autoDI.Rd @@ -11,7 +11,7 @@ This function provides an automated way to fit a (limited) range of Diversity-In } \usage{ -autoDI(y, prop, data, block, density, treat, FG = NULL, +autoDI(y, prop, data, block, density, treat, ID, FG = NULL, selection = c("Ftest","AIC","AICc","BIC","BICc"), step0 = FALSE, step4 = TRUE) } @@ -36,6 +36,18 @@ A vector of \emph{s} column names identifying the species proportions in each ro \item{treat}{ %% ~~Describe \code{prop} here~~ The name of a column in the dataset containing the value of a treatment factor or covariate. The treatment name must be included in quotes, for example, \code{treat = "nitrogen"}. (Only one treatment or covariate is permitted in \code{autoDI}, but see \code{DI} for options involving more than one treatment.) If the treatment is a factor, the variable must already be specified as a factor prior to using \code{autoDI}. +} + \item{ID}{ + %% ~~Describe \code{ID} here~~ +This argument takes a text list (of length \emph{s}) dsecirbing groupings for the identity effects of the species. For example, if there are four species and you wish to group the identity effects all four species into a single term: \code{ID} could be \code{ID = c("ID1","ID1","ID1","ID1")}, where "ID1" is the name of the ID group. Similarly if the we wish to have two identity effect groups where identity effect of species 1 and 3, and species 2 and 4 are grouped together: \code{ID} could be \code{ID = c("ID1","ID2","ID1","ID2")}, where "ID1" and "ID2" are the names of the ID groups. These ideas expand to any number of species and any number or combination of groups. Finally, the ID groups do not have to be named "ID1" and "ID2", the user can specify any name for the groups. + + \itemize{ + \item If the \code{ID} argument is not specified, each species will be assumed to have a separate identity effect. + + \item Specify an grouping for the ID does not affect the interaction terms. The interactions are still calculated using the individual species proportions. + + \item The \code{ID} argument is defunct when using the \code{custom_formula} argument, since species identity effects must be included directly in the \code{custom_formula} argument. +} } \item{FG}{ %% ~~Describe \code{FG} here~~ @@ -185,6 +197,15 @@ The \code{\link{Switzerland}} dataset examples. } selection = "Ftest") summary(auto1) +## Running autoDI after grouping identity effects using the "ID" argument + auto2 <- autoDI(y = "yield", density = "density", + prop = c("p1","p2","p3","p4"), + treat = "nitrogen", ID = c("ID1", "ID1", "ID1", "ID1"), + FG = c("G", "G", "L", "L"), data = Switzerland, + selection = "Ftest") + summary(auto2) + + \donttest{ ## Using column numbers to indicate which columns contain the proportions and including Step 0 auto2 <- autoDI(y = "yield", density = "density", prop = 4:7, treat = "nitrogen", diff --git a/man/predict.DI.Rd b/man/predict.DI.Rd index cb19fb5..e53339e 100644 --- a/man/predict.DI.Rd +++ b/man/predict.DI.Rd @@ -8,14 +8,20 @@ Predict Method for Diversity-Interactions (DI) Models Generates predictions for a fitted DI models object and, optionally, the associated standard errors for those predictions. } \usage{ -\method{predict}{DI}(object, newdata, se.fit = FALSE, type = c("link", "response"), ...) +\method{predict}{DI}(object, newdata, se.fit = FALSE, +interval = c("none", "confidence", "prediction"), +level = 0.95, weights = 1, +type = c("link", "response", "terms"), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{object}{a \code{\link{DI}} or \code{\link{autoDI}} model object.} \item{newdata}{optionally, a data frame in which to look for variables with which to predict. If omitted, predictions will be made for data used to fit the model.} - \item{se.fit}{logical switch indicating whether to calculate the associated standard errors} - \item{type}{the type of prediction required. The default is on the scale of the linear predictors; the alternative "response" is on the scale of the response variable.} + \item{se.fit}{A logical switch indicating whether to calculate the associated standard errors.} + \item{interval}{The type of interval to be calculated. Accepts one of "none", "confidence" or "prediction", with "none"" being the default.} + \item{level}{The level (1 - \eqn{\alpha}) at which to calculate the interval. Defaults to 0.95, giving a 95\% interval.} + \item{weights}{The variance weights for calculating the prediction intervals. By default all values get the same weight of 1. Can also specify a numeric vector of same length as number of rows in \code{newdata}.} + \item{type}{the type of prediction required. One of "link", "response" or "terms". The default "link" is on the scale of the linear predictors while "response" is on the scale of the response variable. The "terms" option returns a matrix giving the fitted values of each term in the model formula on the linear predictor scale.} \item{...}{further arguments passed to or from other methods. For eg: \code{dispersion} or \code{na.action} arguments from \code{\link[stats]{predict.glm}} function.} } \details{ diff --git a/tests/testthat/test-DI.R b/tests/testthat/test-DI.R index fbe4fe9..335a654 100644 --- a/tests/testthat/test-DI.R +++ b/tests/testthat/test-DI.R @@ -36,14 +36,14 @@ test_that("DI works with \"E\" model", { density = "density", DImodel = "E", extra_formula = ~p1:nitrogen, data = Switzerland) expect_equal(mod_E$coef, - c(8.19821278836538, 8.52407309971615, 15.4175280362485, 11.9330267505285, 5.155459, -0.640652903945622, 0.137755987617647, 0.400343881654642), + c(8.5985566700200202, 8.52407309971615, 15.4175280362485392, 11.9330267505285228, 5.1554594166571261, -0.6406529039456219, 0.13775598761764632, -0.4003438816546424), ignore_attr = TRUE) mod_E <- DI(y = "yield", prop = 4:7, block = "nitrogen", density = "density", DImodel = "E", extra_formula = ~p1:nitrogen, data = Switzerland) expect_equal(mod_E$coef, - c(8.19821278836538, 8.52407309971615, 15.4175280362485, 11.9330267505285, 5.155459, -0.640652903945622, 0.137755987617647, 0.400343881654642), + c(8.5985566700200202, 8.52407309971615, 15.4175280362485392, 11.9330267505285228, 5.1554594166571261, -0.6406529039456219, 0.13775598761764632, -0.4003438816546424), ignore_attr = TRUE) mod_E <- DI(y = "yield", prop = 4:7, estimate_theta = T, @@ -181,6 +181,53 @@ test_that("FG model works with theta", { }) +## Ensure specific models can't be fit for models with less than four species +test_that("Correct number of species are present", { + data("sim1") + + sim1$p5 <- 1 - sim1$p1 + sim1$p6 <- 1 - sim1$p1 - sim1$p2 + + expect_error(DI(data = sim1, y = "response", + prop = c("p1", "p5"), + DImodel= "E"), + "you must have > 2 species to fit model E") + + expect_error(DI(data = sim1, y = "response", + prop = c("p1", "p5"), + DImodel= "AV"), + "you must have > 2 species to fit model AV") + + expect_error(DI(data = sim1, y = "response", + prop = c("p1", "p5"), + DImodel= "FG", + FG = c("G", "G")), + "you must have > 2 species to fit model FG") + + expect_error(DI(data = sim1, y = "response", + prop = c("p1", "p2", "p6"), + DImodel= "ADD"), + "you must have > 3 species to fit model ADD") + +}) + +## Richness vs DI works +test_that("Richness vs DI works", { + data("Switzerland") + + ## compare the richness model with DI alternatives + t1 <- richness_vs_DI(y = "yield", prop = 4:7, data = Switzerland) + m1 <- DI(y = "yield", prop = 4:7, data = Switzerland, + DImodel = "AV", estimate_theta = TRUE) + expect_equal(t1$coefficients, m1$coefficients) + + ## include the density effects in the linear predictors of the four models + t2 <- richness_vs_DI(y = "yield", prop = 4:7, data = Switzerland, extra_formula = ~ density) + m2 <- DI(y = "yield", prop = 4:7, data = Switzerland, DImodel = "AV", + extra_formula = ~ density, estimate_theta = TRUE) + expect_equal(t2$coefficients, m2$coefficients) +}) + ## S3 methods for class DI (extract, AIC, BIC) test_that("S3 methods work", { data("Switzerland") diff --git a/tests/testthat/test-autoDI.R b/tests/testthat/test-autoDI.R index 5e0afbe..0eaabea 100644 --- a/tests/testthat/test-autoDI.R +++ b/tests/testthat/test-autoDI.R @@ -114,4 +114,4 @@ test_that("Special conditions", { # Drop ADD model if selection F-test and FG is specified expect_message(autoDI(FG = c("G","G","H"), data = sim0, y = "response", prop = 3:5), regexp = "Since 'Ftest' was specified as selection criterion and functional groups were specified, dropping the ADD model as it is not nested within the FG model") -}) \ No newline at end of file +}) diff --git a/tests/testthat/test-dataDI.R b/tests/testthat/test-dataDI.R index e16dfa8..d0c86d6 100644 --- a/tests/testthat/test-dataDI.R +++ b/tests/testthat/test-dataDI.R @@ -120,9 +120,16 @@ test_that("DI_data_prepare works", { expect_error(DI_data_prepare(data = dummy, prop = 1:4, y = 5), regexp = "One or more rows have species proportions that do not sum to 1. This must be corrected prior to analysis") + # Ensure there are no negative proportions + dummy <- rbind(Switzerland[, 4:8], c(-0.1, 0.9, 0.1, 0.1, 0)) + expect_error(DI_data_prepare(data = dummy, prop = 1:4, y = 5), + regexp = "One or more rows have species proportions with values less than 0 or greater than 1. This must be corrected prior to analysis") + + # Warning if any species proportions sum to 0 dummy <- rbind(Switzerland[, 4:8], c(0, 0, 0, 0, 0)) expect_warning(DI_data_prepare(data = dummy, prop = 1:4, y = 5), regexp = "One or more rows in your dataset have ALL proportions equal to zero") + }) diff --git a/tests/testthat/test-predictDI.R b/tests/testthat/test-predictDI.R index 0c4117d..41d3a36 100644 --- a/tests/testthat/test-predictDI.R +++ b/tests/testthat/test-predictDI.R @@ -12,16 +12,20 @@ test_that("predict function works", { expect_equal(predict(mod), mod$fitted.values) # If newdata is specified + exp_pred <- c(13.2703349718639, 13.3043419781524, 17.4404149400718, 15.3497141686398) + names(exp_pred) <- as.character(1:4) expect_equal(predict(mod, newdata = Switzerland[1:4, ]), - c(13.2703349718639, 13.3043419781524, 17.4404149400718, 15.3497141686398)) + exp_pred) # Ensure SE is accurate + exp_se <- c(0.453997370504706, 0.453997370504706, 0.453997370504706, 0.453997370504706) + names(exp_se) <- as.character(1:4) expect_equal(predict(mod, newdata = Switzerland[1:4, ], se.fit = TRUE)$se.fit, - c(0.457828639867846, 0.457828639867846, 0.457828639867846, 0.457828639867846)) + exp_se) # If only one row is specified expect_equal(suppressWarnings(predict(mod, newdata = Switzerland[4, ])), - c(15.3497141686398)) + c("4" = 15.3497141686398)) # If not all factor variables present in model are present in newdata warning will be thrown expect_warning(predict(mod, newdata = Switzerland[1:4, -c(2)]), @@ -60,11 +64,13 @@ test_that("predict function works", { DImodel = "FULL", data = Switzerland) expect_equal(predict(mod_basic, newdata = Switzerland[1:4, ]), - c(12.8725915311115, 12.8424977926434, 17.0259686206005, 14.5222165084828)) + c("1" = 12.8725915311115, "2" = 12.8424977926434, + "3" = 17.0259686206005, "4" = 14.5222165084828)) # Predict using type = "response" expect_equal(predict(mod_basic, newdata = Switzerland[1:4, ], type = "response"), - c(12.8725915311115, 12.8424977926434, 17.0259686206005, 14.5222165084828)) + c("1" = 12.8725915311115, "2" = 12.8424977926434, + "3" = 17.0259686206005, "4" = 14.5222165084828)) # Predict for when additional treatment is numeric swiss_num_treat <- Switzerland @@ -155,7 +161,146 @@ test_that("Prediction works for all interaction structures", { ignore_attr = TRUE) }) +test_that("Prediction works with grouped ID effects", { + # Ensuring predictions work for all interaction structures + data("Switzerland") + swiss_num_treat <- Switzerland + swiss_num_treat$nitrogen <- as.numeric(swiss_num_treat$nitrogen) + + # E model + mod_int <- DI(y = "yield", prop = 4:7, treat = "nitrogen", + density = "density", DImodel = "E", + ID = c("I1","I1", "I1", "I1"), + theta = 0.5, + extra_formula = ~ p1:nitrogen, + data = swiss_num_treat) + + # If not all factor variables present in model are present in newdata warning will be thrown + expect_equal(predict(mod_int, newdata = swiss_num_treat[1:3, ]), + c(13.5089908, 15.7958394, 15.7958394), + ignore_attr = TRUE) + + # AV model + mod_int <- DI(y = "yield", prop = 4:7, treat = "nitrogen", + density = "density", DImodel = "AV", + ID = c("I1","I2", "I2", "I1"), + estimate_theta = TRUE, + extra_formula = ~ p1:nitrogen, + data = swiss_num_treat) + + # If not all factor variables present in model are present in newdata warning will be thrown + expect_equal(predict(mod_int, newdata = swiss_num_treat[1:4, ]), + c(13.2287655, 15.5557315, 15.5557315, 15.4019903), + ignore_attr = TRUE) + + # ADD model + mod_int <- DI(y = "yield", prop = 4:7, treat = "nitrogen", + density = "density", DImodel = "ADD", + ID = c("I1","I2", "I3", "I4"), + estimate_theta = TRUE, + extra_formula = ~ I1:nitrogen, + data = swiss_num_treat) + + # If not all factor variables present in model are present in newdata warning will be thrown + expect_equal(predict(mod_int, newdata = swiss_num_treat[1:4, ]), + c(13.485657, 13.449836, 17.641787, 15.021399), + ignore_attr = TRUE) + + # FG model + mod_int <- DI(y = "yield", prop = 4:7, treat = "nitrogen", + density = "density", DImodel = "FG", + FG = c("G", "G", "H", "H"), + ID = c("I1","I1", "I2", "I2"), + estimate_theta = TRUE, + extra_formula = ~ p1:nitrogen, + data = Switzerland) + + # If not all factor variables present in model are present in newdata warning will be thrown + expect_equal(predict(mod_int, newdata = Switzerland[1:6, ]), + c(13.5117704, 13.5316355, 16.3710272, 16.3710272, 16.7358524, 14.2027248), + ignore_attr = TRUE) + + # FULL model + mod_int <- DI(y = "yield", prop = 4:7, treat = "nitrogen", + density = "density", DImodel = "FULL", + ID = c("I3","I3", "I3", "I3"), + estimate_theta = TRUE, + extra_formula = ~ p1:nitrogen, + data = swiss_num_treat) + + # If not all factor variables present in model are present in newdata warning will be thrown + expect_equal(predict(mod_int, newdata = swiss_num_treat[1:4, ]), + c(13.5538097, 14.3899227, 17.4391819, 15.1266995), + ignore_attr = TRUE) + + # ID model + mod_int <- DI(y = "yield", prop = 4:7, treat = "nitrogen", + density = "density", DImodel = "ID", + ID = c("I3","I3", "I3", "I3"), + estimate_theta = TRUE, extra_formula = ~ p1:nitrogen, + data = swiss_num_treat) + + # If not all factor variables present in model are present in newdata warning will be thrown + expect_equal(predict(mod_int, newdata = swiss_num_treat[1, ]), + c(12.3940615), + ignore_attr = TRUE) +}) + +# Testing CI and PI work + +test_that("contrasts function works", { + data("sim2") + + mod <- DI(y = "response", DImodel = "FULL", + data = sim2, prop = 3:6) + mod_lm <- lm(response ~ 0 + (p1 + p2 + p3 + p4)^2, data = sim2) + + # Base prediction in same + expect_equal(predict(mod), predict(mod_lm)) + + # CI is same + expect_equal(predict(mod, interval = "conf"), + predict(mod_lm, interval = "conf")) + + # PI is same + expect_equal(predict(mod, interval = "pred", level = 0.9), + suppressWarnings(predict(mod_lm, interval = "pred", level = 0.9))) + + + # PI with se is same + DI_pred <- predict(mod, interval = "pred", se.fit = TRUE) + lm_pred <- suppressWarnings(predict(mod_lm, interval = "pred", se.fit = TRUE)) + expect_equal(as.numeric(DI_pred$se.fit), lm_pred$se.fit) + expect_equal(DI_pred$fit, lm_pred$fit) + + # CI and PI with newdata work + DI_pred <- predict(mod, newdata = sim2[1:5, ], interval = "conf", se.fit = TRUE) + lm_pred <- suppressWarnings(predict(mod_lm, newdata = sim2[1:5, ], interval = "conf", se.fit = TRUE)) + expect_equal(as.numeric(DI_pred$se.fit), as.numeric(lm_pred$se.fit)) + expect_equal(DI_pred$fit, lm_pred$fit) + + DI_pred <- predict(mod, newdata = sim2[1:5, ], interval = "pred", se.fit = TRUE) + lm_pred <- suppressWarnings(predict(mod_lm, newdata = sim2[1:5, ], interval = "pred", se.fit = TRUE)) + expect_equal(as.numeric(DI_pred$se.fit), as.numeric(lm_pred$se.fit)) + expect_equal(DI_pred$fit, lm_pred$fit) + + DI_pred <- predict(mod, newdata = sim2[1:5, ], interval = "none", se.fit = TRUE) + lm_pred <- suppressWarnings(predict(mod_lm, newdata = sim2[1:5, ], interval = "none", se.fit = TRUE)) + expect_equal(as.numeric(DI_pred$se.fit), as.numeric(lm_pred$se.fit)) + expect_equal(DI_pred$fit, lm_pred$fit) + + # Ensure response type = "terms" works + expect_equal(as.vector(predict(mod_lm, type = "terms")), + as.vector(predict(mod, type = "terms"))) + + DI_pred <- predict(mod, type = "terms", se.fit = TRUE) + lm_pred <- predict(mod_lm, type = "terms", se.fit = TRUE) + expect_equal(as.vector(DI_pred$fit), as.vector(lm_pred$fit)) + expect_equal(as.vector(DI_pred$se.fit), as.vector(lm_pred$se.fit)) + +}) +# Testing contrast function test_that("contrasts function works", { data("Switzerland") @@ -171,11 +316,11 @@ test_that("contrasts function works", { # Mandatory to specify either constrast_vars or contrast expect_error(contrasts_DI(mod), - regexp = "Provide either one of contrast_vars or constrast") + regexp = "Provide either one of `contrast_vars` or `constrast`") # Can't specify both contrast_vars and contrast - expect_error(contrasts_DI(mod, contrast_vars = 0, contrast = 0), - regexp = "Provide only one of contrast_vars or constrast") + expect_warning(contrasts_DI(mod, contrast_vars = 0, contrast = rep(0, 8)), + regexp = "Provide only one of `contrast_vars` or `constrast`") # Ensure contrast vector is of appropriate type expect_error(contrasts_DI(mod, contrast = c("1", "-1", "0", "0")), @@ -195,7 +340,7 @@ test_that("contrasts function works", { # Ensure contrast_vars are specified as a list expect_error(contrasts_DI(mod, contrast_vars = c("density" = c(-0.25, 0.25, 0.25, -0.25))), - regexp = "Contrast variables should be specified as a nested list") + regexp = "Contrast variables should be specified as a nested named list") # Ensure user specifies variables present in model in contrast_vars expect_error(contrasts_DI(mod, contrast_vars = list(p5 = c(0, 1))), @@ -227,11 +372,11 @@ test_that("contrasts function works", { ignore_attr = TRUE) # Using contrast_vars - the_C <- matrix(c(1, 0, 0, 0, 0, 0, 0, 0), nrow = 1) + the_C <- matrix(c(0, 0, 0, 0, 0, 1, 0, 0), nrow = 1) colnames(the_C) <- names(mod$coefficients[1:8]) - expect_equal(contrasts_DI(mod, contrast_vars = list("p1" = c(1))), + expect_equal(contrasts_DI(mod, contrast_vars = list("nitrogen" = list("50v150" = c(1, -1)))), multcomp::glht(mod, linfct = the_C , coef = mod$coef[1:8], vcov = vcov(mod)), ignore_attr = TRUE) -}) \ No newline at end of file +}) diff --git a/vignettes/DImodels.Rmd b/vignettes/DImodels.Rmd index c0b3dd8..8e31305 100644 --- a/vignettes/DImodels.Rmd +++ b/vignettes/DImodels.Rmd @@ -22,7 +22,19 @@ library(DImodels) The `DImodels` package is designed to make fitting Diversity-Interactions models easier. Diversity-Interactions (DI) models (Kirwan et al 2009) are a set of tools for analysing and interpreting data from experiments that explore the effects of species diversity (from a pool of *S* species) on community-level responses. Data suitable for DI models will include (at least) for each experimental unit: a response recorded at a point in time, and a set of proportions of *S* species $p_1$, $p_2$, ..., $p_S$ from a point in time prior to the recording of the response. The proportions sum to 1 for each experimental unit. -__Main changes in the package from version 1.0 to version 1.1 and newer__ + +__Main changes in the package from version 1.2 to version 1.3__ + +- The `DI` and `autoDI` functions now have an additional parameter called `ID` which enables the user to group the species identity effects (see examples below). +- The `predict` function now has flexibility to calculate confidence and prediction intervals for the predicted values. + +__Main changes in the package from version 1.1 to version 1.2__ + +- There are two new functions added to the package: + - `predict`: Make predictions from a fitted DI model without having to worry about theta, and the interaction terms in the data. + - `contrasts_DI`: Create contrasts for a DI model. + +__Main changes in the package from version 1.0 to version 1.1__ - `DI_data_prepare` is now superseded by `DI_data` (see examples below) @@ -158,6 +170,36 @@ m1_theta <- update_DI(object = m1, estimate_theta = TRUE) coef(m1_theta) ``` +### Grouping the species identity effects in the model +The species identity effects in a DI model can be grouped by specifying groups for each species using the `ID` argument. +The `ID` argument functions similar to the `FG` argument and accepts a character list of same length as number of species in the model. The identity effects of species belonging in the same group will be grouped together. + +Grouping all identity effects into a single term +```{r, echo = TRUE} +m1_group <- update_DI(object = m1_theta, + ID = c("ID1", "ID1", "ID1", "ID1", "ID1", + "ID1", "ID1", "ID1", "ID1")) +coef(m1_group) +``` + +Grouping identity effects of specific species +```{r, echo = TRUE} +m1_group2 <- update_DI(object = m1_theta, + ID = c("ID1", "ID1", "ID1", + "ID2", "ID2", "ID2", + "ID3", "ID3", "ID3")) +coef(m1_group2) +``` + +Note: Grouping ID effects will not have an effect on the calculation of the interaction effects, they would still be calculated by using all species. + +Read the documentation of `DI` and `autoDI` for more information and examples using the `ID` parameter. +```{r, eval = FALSE} +?DI +?autoDI +``` + + ## Fitting customised models using the `DI` function There are two ways to fit customised models using `DI`; the first is by using the option `DImodel = ` in the `DI` function and adding the argument `extra_formula = ` to it, and the second is to use the `custom_formula` argument in the `DI` function. If species interaction variables (e.g., the FG interactions or the average pairwise interaction) are included in either `extra_formula` or `custom_formula`, they must first be created and included in the dataset. The function `DI_data` can be used to compute several types of species interaction variables. @@ -210,6 +252,83 @@ m3 <- DI(y = "response", summary(m3) ``` +## Making predictions and testing contrasts for DI models +### Predictions using a DI model + +We can make predictions from a DI model just like any other regression model using the `predict` function. The user does not need to worry about adding any interaction terms or adjusting any columns if theta is not equal to 1. Only the species proportions along with any additional experimental structures is needed and all other terms in the model will be calculated for the user. +```{r, echo = TRUE} +# Fit model +m3 <- DI(y = "response", prop = 4:12, + treat = "treatment", DImodel = "AV", + extra_formula = ~ (AV) : treatment, data = sim3a) + +predict_data <- sim3[c(1, 79, 352), 3:12] +# Only species proportions and treatment is needed +print(predict_data) +# Make prediction +predict(m3, newdata = predict_data) +``` + +### Uncertainity around predictions +```{r} +# The interval and level parameters can be used to calculate the +# uncertainty around the predictions + +# Get confidence interval around prediction +predict(m3, newdata = predict_data, interval = "confidence") + +# Get prediction interval around prediction +predict(m3, newdata = predict_data, interval = "prediction") + +# The function returns a 95% interval by default, +# this can be changed using the level argument +predict(m3, newdata = predict_data, + interval = "prediction", level = 0.9) +``` + +### Contrasts for DI models +The `contrasts_DI` function can be used to compare and formally test for a difference in performance of communities within the same as well as across different experimental structures + +Comparing the performance of the monocultures of different species at treatment A +```{r, echo = TRUE} +contr <- list("p1vsp2" = c(1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + "p3vsp5" = c(0, 0, 1, 0, -1, 0, 0, 0, 0, 0, 0, 0), + "p4vsp6" = c(0, 0, 0, 1, 0, -1, 0, 0, 0, 0, 0, 0), + "p7vsp9" = c(0, 0, 0, 0, 0, 0, 1, 0, -1, 0, 0, 0)) +the_C <- contrasts_DI(m3, contrast = contr) +summary(the_C) +``` + +Comparing across the two treatment levels for monoculture of species 1 +```{r, echo = TRUE} +contr <- list("treatAvsB" = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0)) +the_C <- contrasts_DI(m3, contrast = contr) +summary(the_C) +``` + +Comparing between two species mixtures +```{r, echo = TRUE} +mixA <- c(0.25, 0, 0.25, 0, 0.25, 0, 0.25, 0, 0, 0, 0, 0) +mixB <- c(0, 0.3333, 0, 0.3333, 0, 0.3333, 0, 0, 0, 0, 0, 0) + +# We have the proportions of the individual species in the mixtures, however +# we still need to calculate the interaction effect for these communities +contr_data <- data.frame(rbind(mixA, mixB)) +colnames(contr_data) <- names(coef(m3)) + +# Adding the interaction effect of the two mixtures +contr_data$AV <- DI_data_E_AV(prop = 1:9, data = contr_data)$AV +print(contr_data) + +# We can now subtract the respective values in each column of the two +# mixtures and get our contrast +my_contrast <- as.matrix(contr_data[1, ] - contr_data[2, ]) +rownames(my_contrast) <- "mixAvsB" + +the_C <- contrasts_DI(m3, contrast = my_contrast) +summary(the_C) +``` + ## References Connolly J, T Bell, T Bolger, C Brophy, T Carnus, JA Finn, L Kirwan, F Isbell, J Levine, A Lüscher, V Picasso, C Roscher, MT Sebastia, M Suter and A Weigelt (2013) An improved model to predict the effects of changing biodiversity levels on ecosystem function. Journal of Ecology, 101, 344-355.