From 047c027c6c5f1018ef15c915838d1bf47cc52682 Mon Sep 17 00:00:00 2001
From: Rishabh Vishwakarma <rishabhvish98@gmail.com>
Date: Tue, 21 Nov 2023 16:06:27 +0000
Subject: [PATCH] DImodels v1.3

---
 DESCRIPTION                     |   3 +-
 R/DI.R                          | 122 ++++----
 R/DI_internal.R                 |  54 ++--
 R/autoDI.R                      |  15 +-
 R/autoDI_internal.R             |  43 +--
 R/dataDI.R                      |  97 +++++-
 R/predictDI.R                   | 536 +++++++++++++++++++-------------
 README.Rmd                      | 130 +++++++-
 README.md                       | 344 ++++++++++++++++----
 man/DI.Rd                       |  36 ++-
 man/autoDI.Rd                   |  23 +-
 man/predict.DI.Rd               |  12 +-
 tests/testthat/test-DI.R        |  51 ++-
 tests/testthat/test-autoDI.R    |   2 +-
 tests/testthat/test-dataDI.R    |   7 +
 tests/testthat/test-predictDI.R | 169 +++++++++-
 vignettes/DImodels.Rmd          | 121 ++++++-
 17 files changed, 1355 insertions(+), 410 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index fed0fe2..9d4b69d 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: DImodels
 Type: Package
 Title: Diversity-Interactions (DI) Models
-Version: 1.2.1
+Version: 1.3
 Date: 2023-05-20
 Authors@R: c(person("Rafael", "de Andrade Moral", role = c("aut", "cre"), email = "rafael.deandrademoral@mu.ie"), person("John", "Connolly", role = "aut"), person("Rishabh", "Vishwakarma", role = "ctb"), person("Caroline", "Brophy", role = "aut"))
 Author: Rafael de Andrade Moral [aut, cre], John Connolly [aut], Rishabh Vishwakarma [ctb], Caroline Brophy [aut]
@@ -20,3 +20,4 @@ Description: The 'DImodels' package is suitable for analysing data from biodiver
 License: GPL (>=2)
 NeedsCompilation: no
 Config/testthat/edition: 3
+RoxygenNote: 7.2.3
diff --git a/R/DI.R b/R/DI.R
index 676ebd3..1e3fa02 100644
--- a/R/DI.R
+++ b/R/DI.R
@@ -1,5 +1,5 @@
-DI <- function(y, prop, DImodel, custom_formula, data,
-               block, density, treat, FG, extra_formula,
+DI <- function(y, prop, DImodel, custom_formula, data, 
+               block, density, treat, ID, FG, extra_formula,
                estimate_theta = FALSE, theta = 1) {
   DIcall <- match.call()
   if(theta < 0){
@@ -84,6 +84,7 @@ DI <- function(y, prop, DImodel, custom_formula, data,
   if(DImodel == "FG" & is.null(FG)) {
     stop("The argument FG must be specified alongside DImodel = 'FG'.\n") 
   }
+  
   if(!missing(custom_formula)) {
     if(DImodel != "CUSTOM") {
       warning("fitting custom DI model using supplied custom_formula instead of model ", DImodel,
@@ -93,16 +94,25 @@ DI <- function(y, prop, DImodel, custom_formula, data,
     model_fit <- DI_CUSTOM(custom_formula = custom_formula, data = data,
                            family = family, estimate_theta = estimate_theta, total = total)
   } else {
-    # preparing new data object
-    data_obj <- DI_data_prepare(y = y, block = block, density = density, prop = prop, treat = treat, FG = FG, data = data, theta = theta)
-    newdata <- data_obj$newdata
-    nSpecies <- data_obj$nSpecies
+    # Ensure there are enough species to fit different DI models
+    nSpecies <- length(prop)
     if(nSpecies <= 2 & DImodel == "E") {
       stop("you must have > 2 species to fit model ", DImodel, " (", namesub_DI(DImodel), ")")
     }
     if(nSpecies <= 2 & DImodel == "AV") {
       stop("you must have > 2 species to fit model ", DImodel, " (", namesub_DI(DImodel), ")")
     }
+    if(nSpecies <= 2 & DImodel == "FG") {
+      stop("you must have > 2 species to fit model ", DImodel, " (", namesub_DI(DImodel), ")")
+    }
+    if(nSpecies <= 3 & DImodel == "ADD") {
+      stop("you must have > 3 species to fit model ", DImodel, " (", namesub_DI(DImodel), ")")
+    }
+    
+    # preparing new data object
+    data_obj <- DI_data_prepare(y = y, block = block, density = density, prop = prop, treat = treat, ID = ID, FG = FG, data = data, theta = theta)
+    newdata <- data_obj$newdata
+    nSpecies <- data_obj$nSpecies
     if(data_obj$P_int_flag & DImodel == "ADD") {
       stop("you must have > 3 species to fit model ", DImodel, " (", namesub_DI(DImodel), ")")
     }
@@ -117,11 +127,11 @@ DI <- function(y, prop, DImodel, custom_formula, data,
     ## fitting the model
     if(DImodel == "FG") {
       model_fit <- model_fun(y = data_obj$y, block = data_obj$block, density = data_obj$density, prop = data_obj$prop, FG = data_obj$FG,
-                             treat = data_obj$treat, data = newdata, family = family, extra_formula = extra_formula,
+                             ID = data_obj$ID, treat = data_obj$treat, data = newdata, family = family, extra_formula = extra_formula,
                              estimate_theta = estimate_theta, nSpecies = nSpecies, total = total, FGnames = FGnames)
     } else {
       model_fit <- model_fun(y = data_obj$y, block = data_obj$block, density = data_obj$density, prop = data_obj$prop, FG = data_obj$FG,
-                             treat = data_obj$treat, data = newdata, family = family, extra_formula = extra_formula,
+                             ID = data_obj$ID, treat = data_obj$treat, data = newdata, family = family, extra_formula = extra_formula,
                              estimate_theta = estimate_theta, nSpecies = nSpecies, total = total)
     }
   }
@@ -158,7 +168,7 @@ DI_CUSTOM <- function(custom_formula, data, family, estimate_theta, total) {
   return(modlist)
 }
 
-DI_STR <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
+DI_STR <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   if(density != "density_zero") {
@@ -199,7 +209,7 @@ DI_STR <- function(y, block, density, prop, treat, FG, data, family, estimate_th
               "theta" = mod_STR))
 }
 
-DI_STR_treat <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
+DI_STR_treat <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   if(density != "density_zero") {
@@ -233,14 +243,14 @@ DI_STR_treat <- function(y, block, density, prop, treat, FG, data, family, estim
     }
   }
   mod_STR_treat <- DI_check_and_fit(fmla = fmla_STR_treat, y = y,
-                                     block = block, density = density, treat = treat,
-                                     family = family, data = data)
+                                    block = block, density = density, treat = treat,
+                                    family = family, data = data)
   mod_STR_treat$DIinternalcall <- DIinternalcall
   return(list("model" = mod_STR_treat,
               "theta" = mod_STR_treat))
 }
 
-DI_ID <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
+DI_ID <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   if(density != "density_zero") {
@@ -262,10 +272,10 @@ DI_ID <- function(y, block, density, prop, treat, FG, data, family, estimate_the
   }
   if(block == "block_zero") {
     fmla_ID <- as.formula(paste(y, "~", "0+",
-                                paste(prop, collapse = "+"),
+                                paste(ID, collapse = "+"),
                                 "+", extra_terms))
   } else {
-    fmla_ID <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+", block,
+    fmla_ID <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+", block,
                                 "+", extra_terms))
   }
   # check design matrix and fit model
@@ -277,7 +287,7 @@ DI_ID <- function(y, block, density, prop, treat, FG, data, family, estimate_the
               "theta" = mod_ID))
 }
 
-DI_ID_treat <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
+DI_ID_treat <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   if(density != "density_zero") {
@@ -298,10 +308,10 @@ DI_ID_treat <- function(y, block, density, prop, treat, FG, data, family, estima
     y <- paste("cbind(", y, ",", total, "-", y, ")") 
   }
   if(block == "block_zero") {
-    fmla_ID_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+", treat,
+    fmla_ID_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+", treat,
                                       "+", extra_terms))
   } else {
-    fmla_ID_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+", block, "+", treat,
+    fmla_ID_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+", block, "+", treat,
                                       "+", extra_terms))
   }
   # check design matrix and fit model
@@ -313,7 +323,7 @@ DI_ID_treat <- function(y, block, density, prop, treat, FG, data, family, estima
               "theta" = mod_ID_treat))
 }
 
-DI_AV <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
+DI_AV <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   if(density != "density_zero") {
@@ -336,11 +346,11 @@ DI_AV <- function(y, block, density, prop, treat, FG, data, family, estimate_the
   if(!any(names(data) == "AV")) stop("'AV' variable not found in dataset.")
   if(block == "block_zero") {
     fmla_AV <- as.formula(paste(y, "~", "0+", 
-                                paste(prop, collapse = "+"), "+AV",
+                                paste(ID, collapse = "+"), "+AV",
                                 "+", extra_terms)) 
   } else {
     fmla_AV <- as.formula(paste(y, "~", "0+",
-                                paste(prop, collapse = "+"), "+AV+", block,
+                                paste(ID, collapse = "+"), "+AV+", block,
                                 "+", extra_terms))
   }
   # check design matrix and fit model
@@ -350,7 +360,7 @@ DI_AV <- function(y, block, density, prop, treat, FG, data, family, estimate_the
   modlist <- list("model" = mod_AV)
   modlist$model$DIinternalcall <- DIinternalcall
   if(estimate_theta) {
-    modlist$theta <- DI_theta(obj = mod_AV, DImodel = "AV",
+    modlist$theta <- DI_theta(obj = mod_AV, DImodel = "AV", prop = prop,
                               nSpecies = nSpecies, family = family)
     modlist$theta$DIinternalcall <- DIinternalcall
     # RV change
@@ -359,7 +369,7 @@ DI_AV <- function(y, block, density, prop, treat, FG, data, family, estimate_the
   return(modlist)
 }
 
-DI_AV_treat <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
+DI_AV_treat <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   if(density != "density_zero") {
@@ -381,10 +391,10 @@ DI_AV_treat <- function(y, block, density, prop, treat, FG, data, family, estima
   }
   if(!any(names(data) == "AV")) stop("'AV' variable not found in dataset.")
   if(block == "block_zero") {
-    fmla_AV_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+AV+", treat,
+    fmla_AV_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+AV+", treat,
                                       "+", extra_terms)) 
   } else {
-    fmla_AV_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+AV+", block, "+", treat,
+    fmla_AV_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+AV+", block, "+", treat,
                                       "+", extra_terms))
   }
   # check design matrix and fit model
@@ -394,7 +404,7 @@ DI_AV_treat <- function(y, block, density, prop, treat, FG, data, family, estima
   modlist <- list("model" = mod_AV_treat)
   modlist$model$DIinternalcall <- DIinternalcall
   if(estimate_theta) {
-    modlist$theta <- DI_theta(obj = mod_AV_treat, DImodel = "AV",
+    modlist$theta <- DI_theta(obj = mod_AV_treat, DImodel = "AV", prop = prop,
                               nSpecies = nSpecies, family = family)
     modlist$theta$DIinternalcall <- DIinternalcall
     # RV change
@@ -403,7 +413,7 @@ DI_AV_treat <- function(y, block, density, prop, treat, FG, data, family, estima
   return(modlist)
 }
 
-DI_E <- function(y, block, density, prop, treat, data, FG, family, estimate_theta, nSpecies, total, extra_formula = 0) {
+DI_E <- function(y, block, density, prop, treat, data, FG, ID, family, estimate_theta, nSpecies, total, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   if(density != "density_zero") {
@@ -425,10 +435,10 @@ DI_E <- function(y, block, density, prop, treat, data, FG, family, estimate_thet
   }
   if(!any(names(data) == "E")) stop("'E' variable not found in dataset.")
   if(block == "block_zero") {
-    fmla_E <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+E",
+    fmla_E <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+E",
                                "+", extra_terms)) 
   } else {
-    fmla_E <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+E+", block,
+    fmla_E <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+E+", block,
                                "+", extra_terms))
   }
   # check design matrix and fit model
@@ -438,7 +448,7 @@ DI_E <- function(y, block, density, prop, treat, data, FG, family, estimate_thet
   modlist <- list("model" = mod_E)
   modlist$model$DIinternalcall <- DIinternalcall
   if(estimate_theta) {
-    modlist$theta <- DI_theta(obj = mod_E, DImodel = "E",
+    modlist$theta <- DI_theta(obj = mod_E, DImodel = "E", prop = prop,
                               nSpecies = nSpecies, family = family)
     modlist$theta$DIinternalcall <- DIinternalcall
     # RV change
@@ -447,7 +457,7 @@ DI_E <- function(y, block, density, prop, treat, data, FG, family, estimate_thet
   return(modlist)
 }
 
-DI_E_treat <- function(y, block, density, prop, treat, data, FG, family, estimate_theta, nSpecies, total, extra_formula = 0) {
+DI_E_treat <- function(y, block, density, prop, treat, data, FG, ID, family, estimate_theta, nSpecies, total, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   if(density != "density_zero") {
@@ -469,10 +479,10 @@ DI_E_treat <- function(y, block, density, prop, treat, data, FG, family, estimat
   }
   if(!any(names(data) == "E")) stop("'E' variable not found in dataset.")
   if(block == "block_zero") {
-    fmla_E_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+E+", treat,
+    fmla_E_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+E+", treat,
                                      "+", extra_terms)) 
   } else {
-    fmla_E_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+E+", block, "+", treat,
+    fmla_E_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+E+", block, "+", treat,
                                      "+", extra_terms))
   }
   # check design matrix and fit model
@@ -482,7 +492,7 @@ DI_E_treat <- function(y, block, density, prop, treat, data, FG, family, estimat
   modlist <- list("model" = mod_E_treat)
   modlist$model$DIinternalcall <- DIinternalcall
   if(estimate_theta) {
-    modlist$theta <- DI_theta(obj = mod_E_treat, DImodel = "E",
+    modlist$theta <- DI_theta(obj = mod_E_treat, DImodel = "E", prop = prop,
                               nSpecies = nSpecies, family = family)
     modlist$theta$DIinternalcall <- DIinternalcall 
     # RV change
@@ -491,7 +501,7 @@ DI_E_treat <- function(y, block, density, prop, treat, data, FG, family, estimat
   return(modlist)
 }
 
-DI_FG <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, FGnames, extra_formula = 0) {
+DI_FG <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, FGnames, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   if(density != "density_zero") {
@@ -514,10 +524,10 @@ DI_FG <- function(y, block, density, prop, treat, FG, data, family, estimate_the
   if(is.null(FG)) stop("FG matrix not found")
   #FG_ <- FG
   if(block == "block_zero") {
-    fmla_FG <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+FG_",
+    fmla_FG <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+FG_",
                                 "+", extra_terms))
   } else {
-    fmla_FG <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+FG_+", block,
+    fmla_FG <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+FG_+", block,
                                 "+", extra_terms))
   }
   # check design matrix and fit model
@@ -527,7 +537,7 @@ DI_FG <- function(y, block, density, prop, treat, FG, data, family, estimate_the
   modlist <- list("model" = mod_FG)
   modlist$model$DIinternalcall <- DIinternalcall
   if(estimate_theta) {
-    modlist$theta <- DI_theta(obj = mod_FG, DImodel = "FG", FGnames = FGnames,
+    modlist$theta <- DI_theta(obj = mod_FG, DImodel = "FG", prop = prop, FGnames = FGnames,
                               nSpecies = nSpecies, family = family)
     modlist$theta$DIinternalcall <- DIinternalcall
     # RV change
@@ -536,7 +546,7 @@ DI_FG <- function(y, block, density, prop, treat, FG, data, family, estimate_the
   return(modlist)
 }
 
-DI_FG_treat <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, FGnames, extra_formula = 0) {
+DI_FG_treat <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, FGnames, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   if(density != "density_zero") {
@@ -559,10 +569,10 @@ DI_FG_treat <- function(y, block, density, prop, treat, FG, data, family, estima
   if(is.null(FG)) stop("FG matrix not found")
   #FG_ <- FG
   if(block == "block_zero") {
-    fmla_FG_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+FG_+", treat,
+    fmla_FG_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+FG_+", treat,
                                       "+", extra_terms))
   } else {
-    fmla_FG_treat <- as.formula(paste(y, "~", "0+", paste(prop, collapse = "+"), "+FG_+", block, "+", treat,
+    fmla_FG_treat <- as.formula(paste(y, "~", "0+", paste(ID, collapse = "+"), "+FG_+", block, "+", treat,
                                       "+", extra_terms))
   }
   # check design matrix and fit model
@@ -572,7 +582,7 @@ DI_FG_treat <- function(y, block, density, prop, treat, FG, data, family, estima
   modlist <- list("model" = mod_FG_treat)
   modlist$model$DIinternalcall <- DIinternalcall
   if(estimate_theta) {
-    modlist$theta <- DI_theta(obj = mod_FG_treat, DImodel = "FG", FGnames = FGnames,
+    modlist$theta <- DI_theta(obj = mod_FG_treat, DImodel = "FG", prop = prop, FGnames = FGnames,
                               nSpecies = nSpecies, family = family)
     modlist$theta$DIinternalcall <- DIinternalcall
     # RV change
@@ -581,7 +591,7 @@ DI_FG_treat <- function(y, block, density, prop, treat, FG, data, family, estima
   return(modlist)
 }
 
-DI_ADD <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
+DI_ADD <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   if(density != "density_zero") {
@@ -607,12 +617,12 @@ DI_ADD <- function(y, block, density, prop, treat, FG, data, family, estimate_th
     stop("P_int variables not found in dataset.")
   if(block == "block_zero") {
     fmla_ADD <- as.formula(paste(y, "~", "0+", 
-                                    paste(prop, collapse = "+"), "+",
+                                    paste(ID, collapse = "+"), "+",
                                     paste0(prop, "_add", collapse = "+"),
                                  "+", extra_terms))
   } else {
     fmla_ADD <- as.formula(paste(y, "~", "0+",
-                                    paste(prop, collapse = "+"), "+",
+                                    paste(ID, collapse = "+"), "+",
                                     paste0(prop, "_add", collapse = "+"), "+", block,
                                  "+", extra_terms))
   }
@@ -623,7 +633,7 @@ DI_ADD <- function(y, block, density, prop, treat, FG, data, family, estimate_th
   modlist <- list("model" = mod_ADD)
   modlist$model$DIinternalcall <- DIinternalcall
   if(estimate_theta) {
-    modlist$theta <- DI_theta(obj = mod_ADD, DImodel = "ADD",
+    modlist$theta <- DI_theta(obj = mod_ADD, DImodel = "ADD", prop = prop,
                               nSpecies = nSpecies, family = family)
     modlist$theta$DIinternalcall <- DIinternalcall
     # RV change
@@ -632,7 +642,7 @@ DI_ADD <- function(y, block, density, prop, treat, FG, data, family, estimate_th
   return(modlist)
 }
 
-DI_ADD_treat <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
+DI_ADD_treat <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   if(density != "density_zero") {
@@ -658,12 +668,12 @@ DI_ADD_treat <- function(y, block, density, prop, treat, FG, data, family, estim
     stop("P_int variables not found in dataset.")
   if(block == "block_zero") {
     fmla_ADD_treat <- as.formula(paste(y, "~", "0+",
-                                     paste(prop, collapse = "+"), "+",
+                                     paste(ID, collapse = "+"), "+",
                                      paste0(prop, "_add", collapse = "+"), "+", treat,
                                      "+", extra_terms))
   } else {
     fmla_ADD_treat <- as.formula(paste(y, "~", "0+", 
-                                     paste(prop, collapse = "+"), "+",
+                                     paste(ID, collapse = "+"), "+",
                                      paste0(prop, "_add", collapse = "+"), "+", block, "+", treat,
                                      "+", extra_terms))
   }
@@ -674,7 +684,7 @@ DI_ADD_treat <- function(y, block, density, prop, treat, FG, data, family, estim
   modlist <- list("model" = mod_ADD_treat)
   modlist$model$DIinternalcall <- DIinternalcall
   if(estimate_theta) {
-    modlist$theta <- DI_theta(obj = mod_ADD_treat, DImodel = "ADD",
+    modlist$theta <- DI_theta(obj = mod_ADD_treat, DImodel = "ADD",  prop = prop,
                               nSpecies = nSpecies, family = family)
     modlist$theta$DIinternalcall <- DIinternalcall
     # RV change
@@ -683,7 +693,7 @@ DI_ADD_treat <- function(y, block, density, prop, treat, FG, data, family, estim
   return(modlist)
 }
 
-DI_FULL <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
+DI_FULL <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   # Getting the FULL variables
@@ -710,12 +720,12 @@ DI_FULL <- function(y, block, density, prop, treat, FG, data, family, estimate_t
   }
   if(block == "block_zero") {
     fmla_FULL <- as.formula(paste(y, "~", "0+", 
-                                  paste(prop, collapse = "+"), "+",
+                                  paste(ID, collapse = "+"), "+",
                                   paste0(full_names, collapse = "+"),
                                   "+", extra_terms))
   } else {
     fmla_FULL <- as.formula(paste(y, "~", "0+",
-                                  paste(prop, collapse = "+"), "+",
+                                  paste(ID, collapse = "+"), "+",
                                   paste0(full_names, collapse = "+"), "+", block,
                                   "+", extra_terms))
   }
@@ -735,7 +745,7 @@ DI_FULL <- function(y, block, density, prop, treat, FG, data, family, estimate_t
   return(modlist)
 }
 
-DI_FULL_treat <- function(y, block, density, prop, treat, FG, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
+DI_FULL_treat <- function(y, block, density, prop, treat, FG, ID, data, family, estimate_theta, nSpecies, total, extra_formula = 0) {
   DIinternalcall <- match.call()
   
   # Getting the FULL variables
@@ -761,12 +771,12 @@ DI_FULL_treat <- function(y, block, density, prop, treat, FG, data, family, esti
   }
   if(block == "block_zero") {
     fmla_FULL_treat <- as.formula(paste(y, "~", "0+",
-                                        paste(prop, collapse = "+"), "+",
+                                        paste(ID, collapse = "+"), "+",
                                         paste0(full_names, collapse = "+"), "+", treat,
                                         "+", extra_terms))
   } else {
     fmla_FULL_treat <- as.formula(paste(y, "~", "0+", 
-                                        paste(prop, collapse = "+"), "+",
+                                        paste(ID, collapse = "+"), "+",
                                         paste0(full_names, collapse = "+"), "+", block, "+", treat,
                                         "+", extra_terms))
   }
diff --git a/R/DI_internal.R b/R/DI_internal.R
index fb01882..d60074a 100644
--- a/R/DI_internal.R
+++ b/R/DI_internal.R
@@ -44,14 +44,14 @@ DI_check_and_fit <- function(fmla, y, block, density, treat, family, data, FG) {
   return(mod)
 }
 
-proflik_theta <- function(theta, obj, family, int_terms, DImodel, nSpecies, FGnames) {
+proflik_theta <- function(theta, obj, family, int_terms, prop, DImodel, nSpecies, FGnames) {
   mm <- model.matrix(obj)
   
   if(DImodel %in% c("E","AV")) {
     data_theta_E_AV <- obj$data
     data_theta <- data.frame(mm)
-    data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta
-    new_E_AV <- DI_data_E_AV_internal(prop = 1:nSpecies, data = data_theta)
+    #data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta
+    new_E_AV <- DI_data_E_AV(prop = prop, data = data_theta_E_AV, theta = theta)
     data_theta_E_AV$E <- new_E_AV$E
     data_theta_E_AV$AV <- new_E_AV$AV
     fitted_model_theta <- glm(formula(obj), family = family, data = data_theta_E_AV)
@@ -64,10 +64,14 @@ proflik_theta <- function(theta, obj, family, int_terms, DImodel, nSpecies, FGna
     sigma_hat <- sqrt(sum(fitted_model_theta$residuals^2)/(fitted_model_theta$df.residual) * (n - p)/n)
     llik <- sum(dnorm(fitted_model_theta$y, mu_hat, sigma_hat, log = TRUE))
   } else if(DImodel == "FG") {
-    data_theta_FG <- obj$data
-    data_theta <- data.frame(mm)
-    data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta
-    new_FG <- DI_data_FG_internal(prop = 1:nSpecies, FG = FGnames, data = data_theta)
+    #data_theta_FG <- obj$data
+    data_theta_FG <- data.frame(mm, check.names = FALSE)
+    colnames(data_theta_FG) <- gsub("`", "", colnames(data_theta_FG))
+    # Add original props to calculate interactions
+    data_theta_FG <- cbind(data_theta_FG, obj$data[, prop])
+    
+    #data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta
+    new_FG <- DI_data_FG(prop = prop, FG = FGnames, data = data_theta_FG[, prop], theta = theta)
     FG_ <- new_FG$FG
     ## if we have column names starting with FG_ already
     ## in data_theta_FG, then substitute columns else it's all good
@@ -84,6 +88,10 @@ proflik_theta <- function(theta, obj, family, int_terms, DImodel, nSpecies, FGna
     }
     old_formula <- formula(obj)
     new_formula <- paste0(old_formula[2], " ~ ", old_formula[3])
+    
+    data_theta_FG$y <- obj$y
+    names(data_theta_FG)[length(names(data_theta_FG))] <- paste(old_formula[2])
+    
     fitted_model_theta <- glm(as.formula(new_formula), family = family, data = data_theta_FG)
     #mu_hat <- fitted(fitted_model_theta)
     #sigma_hat <- sqrt(sum(fitted_model_theta$residuals^2)/(fitted_model_theta$df.residual-1))
@@ -95,8 +103,10 @@ proflik_theta <- function(theta, obj, family, int_terms, DImodel, nSpecies, FGna
     llik <- sum(dnorm(fitted_model_theta$y, mu_hat, sigma_hat, log = TRUE))
   } else if(DImodel == "ADD") {
     #data_theta_ADD <- obj$data
+    # Drop existing _add columns to avoid conflicts
     data_theta <- data.frame(mm, check.names = FALSE)
-    new_ADD <- DI_data_ADD_theta(prop = 1:nSpecies, data = data_theta, theta = theta)
+    data_theta <- cbind(data_theta, obj$data[, prop])
+    new_ADD <- DI_data_ADD_theta(prop = prop, data = data_theta, theta = theta)
     ADD_cols_in_the_data <- int_terms #grep("_add", colnames(data_theta))
     if(length(ADD_cols_in_the_data) > 0) {
       j <- 1
@@ -172,7 +182,8 @@ DI_theta <- function(obj, DImodel, FGnames, prop, nSpecies, family) {
   #options(warn = -1)
   upper_boundary <- 1.5
   theta_info <- get_theta_info(upper_boundary = upper_boundary, DImodel = DImodel,
-                               obj = obj, family = family, int_terms = int_terms,
+                               obj = obj, prop = prop, 
+                               family = family, int_terms = int_terms, 
                                nSpecies = nSpecies, FGnames = FGnames)
   #options(warn = 0)
   theta_hat <- theta_info$theta_hat
@@ -184,16 +195,18 @@ DI_theta <- function(obj, DImodel, FGnames, prop, nSpecies, family) {
   if(DImodel %in% c("E","AV")) {
     data_theta_E_AV <- obj$data
     data_theta <- data.frame(mm)
-    data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta_hat
-    new_E_AV <- DI_data_E_AV_internal(prop = 1:nSpecies, data = data_theta)
+    #data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta_hat
+    new_E_AV <- DI_data_E_AV(prop = prop, data = data_theta_E_AV, theta = theta_hat)
     data_theta_E_AV$E <- new_E_AV$E
     data_theta_E_AV$AV <- new_E_AV$AV
     mod_theta <- glm(formula(obj), family = family, data = data_theta_E_AV)
   } else if(DImodel == "FG") {
-    data_theta_FG <- obj$data
-    data_theta <- data.frame(mm)
-    data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta_hat
-    new_FG <- DI_data_FG_internal(prop = 1:nSpecies, FG = FGnames, data = data_theta)
+    #data_theta_FG <- obj$data
+    data_theta_FG <- data.frame(mm, check.names = FALSE)
+    colnames(data_theta_FG) <- gsub("`", "", colnames(data_theta_FG))
+    data_theta_FG <- cbind(data_theta_FG, obj$data[, prop])
+    #data_theta[1:nSpecies] <- data_theta[1:nSpecies]^theta_hat
+    new_FG <- DI_data_FG(prop = prop, FG = FGnames, theta = theta_hat, data = data_theta_FG)
     FG_ <- new_FG$FG
     ## if we have column names starting with FG_ already
     ## in data_theta_FG, then substitute columns, else it's all good
@@ -207,11 +220,14 @@ DI_theta <- function(obj, DImodel, FGnames, prop, nSpecies, family) {
     }
     old_formula <- formula(obj)
     new_formula <- paste0(old_formula[2], " ~ ", old_formula[3])
+    data_theta_FG$y <- obj$y
+    names(data_theta_FG)[length(names(data_theta_FG))] <- paste(old_formula[2])
     mod_theta <- glm(as.formula(new_formula), family = family, data = data_theta_FG)
   } else if(DImodel == "ADD") {
     #data_theta_ADD <- obj$data
     data_theta <- data.frame(mm, check.names = FALSE)
-    new_ADD <- DI_data_ADD_theta(prop = 1:nSpecies, data = data_theta, theta = theta_hat)
+    data_theta <- cbind(data_theta, obj$data[, prop])
+    new_ADD <- DI_data_ADD_theta(prop = prop, data = data_theta, theta = theta_hat)
     ADD_cols_in_the_data <- int_terms #grep("_add", colnames(data_theta))
     if(length(ADD_cols_in_the_data) > 0) {
       j <- 1
@@ -250,15 +266,15 @@ DI_theta <- function(obj, DImodel, FGnames, prop, nSpecies, family) {
   return(mod_theta)
 }
 
-get_theta_info <- function(upper_boundary, DImodel, obj, family, int_terms,
+get_theta_info <- function(upper_boundary, DImodel, obj, prop, family, int_terms,
                            nSpecies, FGnames) {
   optimum <- optimize(proflik_theta, interval = c(0.00001, upper_boundary), 
-                      maximum = TRUE, DImodel = DImodel, obj = obj, family = family, 
+                      maximum = TRUE, DImodel = DImodel, obj = obj, prop = prop, family = family, 
                       int_terms = int_terms, nSpecies = nSpecies, FGnames = FGnames)
   theta_hat <- optimum$maximum
   theta_grid <- seq(0.00001, upper_boundary + 1, length = 100)
   proflik_theta_vec <- Vectorize(proflik_theta, "theta")
-  profile_loglik <- proflik_theta_vec(theta = theta_grid, obj = obj, 
+  profile_loglik <- proflik_theta_vec(theta = theta_grid, obj = obj, prop = prop,
                                       family = family, int_terms = int_terms, DImodel = DImodel, 
                                       nSpecies = nSpecies, FGnames = FGnames)
   
diff --git a/R/autoDI.R b/R/autoDI.R
index 3aafb2f..ddd3137 100644
--- a/R/autoDI.R
+++ b/R/autoDI.R
@@ -7,7 +7,7 @@
 # selection = "Ftest" to perform F (or X2) tests to select best model,
 #             "AIC" to use select model with smallest AIC
 
-autoDI <- function(y, prop, data, block, density, treat, FG = NULL, 
+autoDI <- function(y, prop, data, block, density, treat, ID, FG = NULL, 
                    selection = c("Ftest","AIC","AICc","BIC","BICc"), 
                    step0 = FALSE, step4 = TRUE) {
   if(missing(y)) stop("You must supply a response variable name or column index through the argument 'y'.\n")
@@ -57,6 +57,15 @@ autoDI <- function(y, prop, data, block, density, treat, FG = NULL,
   if(missing(FG)){
     FG <- NULL
   }
+
+  if(missing(ID)){
+    if(is.numeric(prop)){
+      ID <- paste0(colnames(data)[prop], "_ID")
+    }
+    else {
+      ID <- paste0(prop, "_ID")
+    }
+  }
   
   # Step 0
   if(step0) {
@@ -64,13 +73,13 @@ autoDI <- function(y, prop, data, block, density, treat, FG = NULL,
   }
   
   # Step 1
-  step1_model <- autoDI_step1(y = y, block = block, density = density, prop = prop, treat = treat, family = family, data = data, selection = selection)
+  step1_model <- autoDI_step1(y = y, block = block, density = density, prop = prop, treat = treat, ID = ID, family = family, data = data, selection = selection)
   
   selected_model <- step1_model$model
   theta_flag <- step1_model$theta_flag
   
   # step 2
-  step2_model <- autoDI_step2(y = y, block = block, density = density, prop = prop, treat = treat, family = family, FG = FG, data = data, theta_flag = theta_flag, selection = selection, selected_model = selected_model)
+  step2_model <- autoDI_step2(y = y, block = block, density = density, prop = prop, treat = treat, ID = ID, family = family, FG = FG, data = data, theta_flag = theta_flag, selection = selection, selected_model = selected_model)
   
   # step 3
   step3_model <- autoDI_step3(selected_model = step2_model, selection = selection, family = family)
diff --git a/R/autoDI_internal.R b/R/autoDI_internal.R
index 3d435a9..82660c1 100644
--- a/R/autoDI_internal.R
+++ b/R/autoDI_internal.R
@@ -331,30 +331,30 @@ autoDI_step0 <- function(y, block, density, treat, family, data) {
   #options(scipen = 0)
 }
 
-autoDI_step1 <- function(y, block, density, prop, treat, family, data, selection) {
+autoDI_step1 <- function(y, block, density, prop, treat, ID, family, data, selection) {
   
   model_tag <- ifelse(length(prop) == 2, "FULL", "AV")
   
   if(is.na(treat)) {
       model_name <- paste0(model_tag, "_model")
       fit_theta <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, prop = prop,
-                                       DImodel = model_tag, data = data, estimate_theta = TRUE)))
+                                                        ID = ID, DImodel = model_tag, data = data, estimate_theta = TRUE)))
       fit_notheta <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, prop = prop,
-                                         DImodel = model_tag, data = data, estimate_theta = FALSE)))
+                                                          ID = ID, DImodel = model_tag, data = data, estimate_theta = FALSE)))
       fit_theta$DIcall <- call("DI", y = y, block = block, density = density, prop = prop,
-                               DImodel = model_tag, data = data, estimate_theta = TRUE)
+                               ID = ID, DImodel = model_tag, data = data, estimate_theta = TRUE)
       fit_notheta$DIcall <- call("DI", y = y, block = block, density = density, prop = prop,
-                                 DImodel = model_tag, data = data, estimate_theta = FALSE)
+                                 ID = ID, DImodel = model_tag, data = data, estimate_theta = FALSE)
     } else {
       model_name <- paste0(model_tag, "_model_treat")
       fit_theta <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, prop = prop,
-                                       treat = treat, DImodel = model_tag, data = data, estimate_theta = TRUE)))
+                                                        ID = ID, treat = treat, DImodel = model_tag, data = data, estimate_theta = TRUE)))
       fit_notheta <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, prop = prop,
-                                         treat = treat, DImodel = model_tag, data = data, estimate_theta = FALSE)))
+                                                          ID = ID, treat = treat, DImodel = model_tag, data = data, estimate_theta = FALSE)))
       fit_theta$DIcall <- call("DI", y = y, block = block, density = density, prop = prop,
-                               treat = treat, DImodel = model_tag, data = data, estimate_theta = TRUE)
+                               ID = ID, treat = treat, DImodel = model_tag, data = data, estimate_theta = TRUE)
       fit_notheta$DIcall <- call("DI", y = y, block = block, density = density, prop = prop,
-                                 treat = treat, DImodel = model_tag, data = data, estimate_theta = FALSE)
+                                 ID = ID, treat = treat, DImodel = model_tag, data = data, estimate_theta = FALSE)
     }
   
   message("\n", strrep("-", getOption("width")))
@@ -392,7 +392,7 @@ autoDI_step1 <- function(y, block, density, prop, treat, family, data, selection
 }
 
 autoDI_step2 <- function(y, block, density, 
-                         prop, treat, FG, 
+                         prop, treat, ID, FG, 
                          family, data, theta_flag,
                          selection, selected_model){
   
@@ -420,10 +420,10 @@ autoDI_step2 <- function(y, block, density,
   }
   
   ID_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, 
-                 prop = prop, treat = treat, 
+                 prop = prop, treat = treat, ID = ID,
                  data = data, DImodel = 'ID')))
   STR_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, 
-                  prop = prop, treat = treat, 
+                  prop = prop, treat = treat, ID = ID,
                   data = data, DImodel = 'STR')))
   model_list <- list('STR_model' = STR_model, 'ID_model' = ID_model)
   
@@ -432,28 +432,28 @@ autoDI_step2 <- function(y, block, density,
     theta_est <- coefficients(selected_model)['theta']
     if(AV_flag){
       AV_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, 
-                     prop = prop, treat = treat, theta = theta_est, 
+                     prop = prop, treat = treat, ID = ID, theta = theta_est, 
                      data = data, DImodel = 'AV')))
       model_list[[length(model_list)+1]] <- AV_model
       names(model_list)[length(model_list)] <- 'AV_model'
     }
     if(FG_flag){
       FG_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, 
-                     prop = prop, treat = treat, theta = theta_est,  
+                     prop = prop, treat = treat, ID = ID, theta = theta_est,  
                      data = data, FG = FG, DImodel = 'FG')))
       model_list[[length(model_list)+1]] <- FG_model
       names(model_list)[length(model_list)] <- 'FG_model'
     }
     if(ADD_flag){
       ADD_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, 
-                      prop = prop, treat = treat, theta = theta_est, 
+                      prop = prop, treat = treat, ID = ID, theta = theta_est, 
                       data = data, DImodel = 'ADD')))
       model_list[[length(model_list)+1]] <- ADD_model
       names(model_list)[length(model_list)] <- 'ADD_model'
     }
     if(FULL_flag){
       FULL_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, 
-                       prop = prop, treat = treat, theta = theta_est, 
+                       prop = prop, treat = treat, ID = ID, theta = theta_est, 
                        data = data, DImodel = 'FULL')))
       model_list[[length(model_list)+1]] <- FULL_model
       names(model_list)[length(model_list)] <- 'FULL_model'
@@ -467,28 +467,28 @@ autoDI_step2 <- function(y, block, density,
   } else {
     if(AV_flag){
       AV_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, 
-                     prop = prop, treat = treat, estimate_theta = F,
+                     prop = prop, treat = treat, ID = ID, estimate_theta = F,
                      data = data, DImodel = 'AV')))
       model_list[[length(model_list)+1]] <- AV_model
       names(model_list)[length(model_list)] <- 'AV_model'
     }
     if(FG_flag){
       FG_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, 
-                     prop = prop, treat = treat, estimate_theta = F, 
+                     prop = prop, treat = treat, ID = ID, estimate_theta = F, 
                      data = data, FG = FG, DImodel = 'FG')))
       model_list[[length(model_list)+1]] <- FG_model
       names(model_list)[length(model_list)] <- 'FG_model'
     }
     if(ADD_flag){
       ADD_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, 
-                      prop = prop, treat = treat, estimate_theta = F, 
+                      prop = prop, treat = treat, ID = ID, estimate_theta = F, 
                       data = data, DImodel = 'ADD')))
       model_list[[length(model_list)+1]] <- ADD_model
       names(model_list)[length(model_list)] <- 'ADD_model'
     } 
     if(FULL_flag){
       FULL_model <- suppressWarnings(suppressMessages(DI(y = y, block = block, density = density, 
-                       prop = prop, treat = treat, estimate_theta = F, 
+                       prop = prop, treat = treat, ID = ID, estimate_theta = F, 
                        data = data, DImodel = 'FULL')))
       model_list[[length(model_list)+1]] <- FULL_model
       names(model_list)[length(model_list)] <- 'FULL_model'
@@ -537,13 +537,14 @@ autoDI_step2 <- function(y, block, density,
   estimate_theta <- theta_flag
   DImodel <-  fit_final$DIcall$DImodel
   the_final_model <- suppressWarnings(suppressMessages(DI(y = y, prop = prop, block = block,
-                                         FG = FG, density = density, treat = treat,
+                                         FG = FG, ID = ID, density = density, treat = treat,
                                          estimate_theta = estimate_theta,
                                          DImodel = DImodel, data = data)))
   the_final_model$DIcall$prop <- prop
   the_final_model$DIcall$DImodel <- DImodel
   the_final_model$DIcall$estimate_theta <- estimate_theta
   the_final_model$DIcall$FG <- FG
+  the_final_model$DIcall$ID <- ID
   the_final_model$DIcall$treat <- treat
   the_final_model$DIcall$block <- block
   the_final_model$DIcall$density <- density
diff --git a/R/dataDI.R b/R/dataDI.R
index 9b9bd88..9464774 100644
--- a/R/dataDI.R
+++ b/R/dataDI.R
@@ -1,4 +1,4 @@
-DI_data_prepare <- function(y, block, density, prop, treat, FG = NULL, data, theta = 1) {
+DI_data_prepare <- function(y, block, density, prop, treat, ID, FG = NULL, data, theta = 1) {
   if(any(is.na(data))) {
     stop("The dataset contains missing values. Please remove them prior to analysis.") 
   }
@@ -56,6 +56,9 @@ DI_data_prepare <- function(y, block, density, prop, treat, FG = NULL, data, the
   
   ## checking if the Pi's sum to 1
   prop_check <- DI_prop_check(Pind = Pind, data = data_check)
+  if(prop_check == "OOB") {
+    stop("One or more rows have species proportions with values less than 0 or greater than 1. This must be corrected prior to analysis.\n")
+  }
   if(prop_check == "error") {
     stop("One or more rows have species proportions that do not sum to 1. This must be corrected prior to analysis.\n")
   }
@@ -84,16 +87,31 @@ DI_data_prepare <- function(y, block, density, prop, treat, FG = NULL, data, the
   }
   newdata <- data.frame(newdata, ADD$ADD)
   ## calculating FG variables
-  if(!is.null(FG)) FG <- DI_data_FG(prop = prop, FG = FG, data = data, theta = theta)$FG
+  if(!is.null(FG)) {
+    FG <- DI_data_FG(prop = prop, FG = FG, data = data, theta = theta)$FG
+    FG_names <- colnames(FG)
+  } else {
+    FG_names <- NULL
+  }
   
   # Calculating the FULL variables
   FULL <- DI_data_fullpairwise(prop = prop, data = data[, prop], theta = theta)
   # Add variabes to data
   newdata <- cbind(newdata, FULL)
   
+  ###### RV change########
+  # Grouping ID effects
+  if(missing(ID)){
+    ID <- paste0(prop, "_ID")
+  } 
+  ID_name_check(ID = ID, prop = prop, FG = FG_names)
+  grouped_ID <- group_IDs(data = data, prop = prop, ID = ID)
+  newdata <- cbind(newdata, grouped_ID)
+  #######################
+  
   ## return object
   return(list("newdata" = newdata, "y" = y, "block" = block, density = density,
-              "prop" = prop, "treat" = treat, "FG" = FG,
+              "prop" = prop, "ID" = unique(ID), "treat" = treat, "FG" = FG,
               "P_int_flag" = P_int_flag, "even_flag" = even_flag, "nSpecies" = length(prop)))
 }
 
@@ -103,7 +121,7 @@ get_P_indices <- function(prop, data) {
     prop <- names(data[, prop]) # species proportions P_i (names)
   } else {
     vec_grep <- Vectorize(grep, "pattern")
-    Pind <- as.numeric(vec_grep(paste("\\<", prop, "\\>", sep = ""),
+    Pind <- as.numeric(vec_grep(paste0("^", prop, "$"),#paste("\\<", prop, "\\>", sep = ""),
                                 names(data)))
   }
   return(list(Pind = Pind, prop = prop))
@@ -308,6 +326,10 @@ DI_data_fullpairwise <- function(prop, data, theta = 1) {
 
 DI_prop_check <- function(Pind, data) {
   props <- data[,Pind]
+  # prop values are out of bounds, i.e. > 1 or < 0
+  if(any(props > 1 | props < 0)){
+    return("OOB") 
+  }
   pi_sums <- apply(props, 1, sum)
   if(any(pi_sums > 1.0001) | any(pi_sums < .9999)) {
     return("error")
@@ -361,6 +383,73 @@ add_name_check <- function(data){
     stop('Certain column names cause internal conflicts when calculating additive interactions. Please rename any columns containing the string \'_add\'.')
   }
 }
+
+ID_name_check <- function(ID, prop, FG){
+  cond1 <- length(grep(":", ID)) > 0
+  cond2 <- any(ID == "_")
+  cond3 <- any(ID == "i")
+  cond4 <- any(ID == "n")
+  cond5 <- any(ID == "f")
+  cond6 <- any(ID == "g")
+  cond7 <- any(ID == "_i")
+  cond8 <- any(ID == "in")
+  cond9 <- any(ID == "nf")
+  cond10 <- any(ID == "fg")
+  cond11 <- any(ID == "g_")
+  cond12 <- any(ID == "_in")
+  cond13 <- any(ID == "inf")
+  cond14 <- any(ID == "nfg")
+  cond15 <- any(ID == "fg_")
+  cond16 <- any(ID == "_inf")
+  cond17 <- any(ID == "infg")
+  cond18 <- any(ID == "nfg_")
+  cond19 <- any(ID == "_infg")
+  cond20 <- any(ID == "infg_")
+  cond21 <- any(ID == "_infg_")
+  cond22 <- length(grep(pattern = "^add", x = ID, ignore.case = TRUE)) > 0
+  cond23 <- length(grep(pattern = "^full", x = ID, ignore.case = TRUE)) > 0
+  cond24 <- length(grep(pattern = "^fg", x = ID, ignore.case = TRUE)) > 0
+  cond25 <- any(ID == "E")
+  cond26 <- any(ID == "AV")
+  if(cond1 | cond2 | cond3 | cond4 | cond5 | cond6 | cond7 |
+     cond8 | cond9 | cond10 | cond11 | cond12 | cond13 | cond14 |
+     cond15 | cond16 | cond17 | cond18 | cond19 | cond20 | cond21 | 
+     cond22 | cond23 | cond24| cond25 | cond26) {
+    stop("Please give your IDs a different name.",
+         " Names should not include colons (':'), or any single or multiple",
+         " character combination of the expressions '_infg_', 'ADD', 'FG', 'FULL', 'E', and 'AV'.",
+         " These expressions are reserved for internal computations.")
+  }
+  if(length(ID)!=length(prop)){
+    stop("'ID' should be a vector of same length as prop vector specifying the grouping structure for the ID effects")
+  }
+  if(any(ID %in% FG)){
+    stop("'ID' cannot have any names common with names of functional groups specified by 'FG'. Change the name of groups in either 'ID' or 'FG'")
+  }
+}
+
+group_IDs <- function(data, prop, ID){
+  # Species proportions to sum 
+  props <- data[, prop] 
+  
+  # Unique IDs
+  uIDs <- unique(ID) 
+  
+  # Number of unique ID terms
+  nIDs <- length(uIDs) 
+  
+  sapply(uIDs, function(x){
+    # Index of prop to sum
+    idx <- which(ID == x) 
+    if(length(idx) == 1){
+      # No need to sum if only one element in group
+      result <- props[, idx]
+    } else {
+      # Sum of species proportions
+      result <-  rowSums(props[, idx])
+    }
+  })
+}
 #########################################
 
 DI_data <- function(prop, FG, data, theta = 1, what = c("E","AV","FG","ADD","FULL")) {
diff --git a/R/predictDI.R b/R/predictDI.R
index 8607e06..e0d3f15 100644
--- a/R/predictDI.R
+++ b/R/predictDI.R
@@ -1,278 +1,365 @@
 
-predict.DI <- function (object, newdata, se.fit = FALSE, type = c("link", 
-                                                                  "response"), ...) 
+predict.DI <- function (object, newdata, se.fit = FALSE, 
+                        interval = c("none", "confidence", "prediction"), 
+                        level = 0.95, weights = 1,
+                        type = c("link", "response", "terms"), ...) 
 {
+  interval <- match.arg(interval)
+  
   # Make predictions for raw data if no newdata specified
   if (missing(newdata)) {
     if(missing(type)) {
       type <- "link"
     }
-    return(predict.glm(object, se.fit = se.fit, type = type, ...))
-  }
-  
-  # Getting species columns and DImodel for adding interactions
-  type <- match.arg(type)
-  DImodel_tag <- object$DIcall$DImodel
-  if (is.null(DImodel_tag)){
-    DImodel_tag <- 'CUSTOM'
-  }
-  
-  # If custom_formula was used just pass the object to predict.glm to get predictions
-  if (DImodel_tag == 'CUSTOM'){
-    if(missing(type)) {
-      type <- "link"
-    }
-    return(predict.glm(object, newdata = newdata, se.fit = se.fit, type = type, ...))
-  }
-  
-  original_data <- object$original_data
-  model_data <- eval(object$model)
-  prop_cols <- eval(object$DIcall$prop)
-  
-  if (is.numeric(prop_cols)){
-    prop <- names(original_data[, prop_cols])
+    ret_obj <- predict.glm(object, 
+                           se.fit = ifelse(interval != "none", TRUE, se.fit),
+                           type = type, ...)
   } else {
-    prop <- prop_cols
-  }
-  
-  
-  if (!is.null(prop) & !all(prop %in% colnames(newdata))){
-    prop_in_data <- prop[prop %in% colnames(newdata)]
-    prop_missing <- prop[!prop %in% prop_in_data]
-    if(all(is_near(rowSums(newdata[, prop_in_data]), 1))){
-      missing_prop_cols <- matrix(0, ncol = length(prop_missing), nrow = nrow(newdata), dimnames = list(NULL, prop_missing))
-      newdata <- cbind(newdata, missing_prop_cols)
-      warning(paste0('Species ', paste0(prop_missing, collapse = ','), ' were not present in newdata. These species proportions are assumed to be 0.'))
+    # Getting species columns and DImodel for adding interactions
+    type <- match.arg(type)
+    DImodel_tag <- object$DIcall$DImodel
+    if (is.null(DImodel_tag)){
+      DImodel_tag <- 'CUSTOM'
+    }
+    
+    # If custom_formula was used just pass the object to predict.glm to get predictions
+    if (DImodel_tag == 'CUSTOM'){
+      if(missing(type)) {
+        type <- "link"
+      }
+      return(predict.glm(object, newdata = newdata, se.fit = se.fit, type = type, ...))
+    }
+    
+    original_data <- object$original_data
+    model_data <- eval(object$model)
+    prop_cols <- eval(object$DIcall$prop)
+    ID_cols <- eval(object$DIcall$ID)
+    
+    if (is.numeric(prop_cols)){
+      prop <- names(original_data[, prop_cols])
     } else {
+      prop <- prop_cols
+    }
+    
+    
+    if (!is.null(prop) & !all(prop %in% colnames(newdata))){
+      prop_in_data <- prop[prop %in% colnames(newdata)]
+      prop_missing <- prop[!prop %in% prop_in_data]
+      if(all(is_near(rowSums(newdata[, prop_in_data]), 1))){
+        missing_prop_cols <- matrix(0, ncol = length(prop_missing), nrow = nrow(newdata), dimnames = list(NULL, prop_missing))
+        newdata <- cbind(newdata, missing_prop_cols)
+        warning(paste0('Species ', paste0(prop_missing, collapse = ','), ' were not present in newdata. These species proportions are assumed to be 0.'))
+      } else {
+        stop('The species proportions present in newdata don\'t sum to 1.')
+      }
+    }
+    
+    if (!is.null(prop) & !all(is_near(rowSums(newdata[, prop]), 1))){
       stop('The species proportions present in newdata don\'t sum to 1.')
     }
-  }
-  
-  if (!is.null(prop) & !all(is_near(rowSums(newdata[, prop]), 1))){
-    stop('The species proportions present in newdata don\'t sum to 1.')
-  }
-  # DI_data doesn't work if dataframe has a single row
-  # Adding a dummy row which will be deleted later
-  only_one_row <- nrow(newdata) == 1
-  if (only_one_row) {
-    newdata <- rbind(newdata, rep(0, ncol(newdata)))
-  }
-  
-  # Adding interactions
-  theta_flag <- object$coefficients['theta']
-  betas <- coef(object)
-  if (!DImodel_tag %in% c("ID", "STR")) {
+    # DI_data doesn't work if dataframe has a single row
+    # Adding a dummy row which will be deleted later
+    only_one_row <- nrow(newdata) == 1
+    if (only_one_row) {
+      newdata <- rbind(newdata, newdata)
+    }
+    
+    # Adding interactions
+    theta_flag <- object$coefficients['theta']
+    betas <- coef(object)
     if (!is.na(theta_flag)){
-        theta_value <- coef(object)["theta"]
-        betas <- betas[-length(betas)]
+      theta_value <- coef(object)["theta"]
+      betas <- betas[-length(betas)]
     } 
     else {
-        theta_value <- 1
-    }
-    extra_variables <- DI_data(prop = prop, FG = eval(object$DIcall$FG), 
-                               data = newdata, theta = theta_value, what = DImodel_tag)
-    if (DImodel_tag == "E") {
-      updated_newdata <- data.frame(newdata, E = extra_variables)
-    }
-    if (DImodel_tag == "AV") {
-      updated_newdata <- data.frame(newdata, AV = extra_variables)
-    }
-    if (DImodel_tag == "ADD") {
-      updated_newdata <- data.frame(newdata, extra_variables)
+      theta_value <- 1
     }
-    if (DImodel_tag == "FG") {
-      # colnames(extra_variables) <- paste0("FG_", 
-      #                                     colnames(extra_variables))
+    
+    if (!DImodel_tag %in% c("ID", "STR")) {
       
-      # FG model wasn't working for some reason, so had to assign it this way
-      newdata[, 'FG_'] <- extra_variables
+      extra_variables <- DI_data(prop = prop, FG = eval(object$DIcall$FG), 
+                                 data = newdata, theta = theta_value, what = DImodel_tag)
+      if (DImodel_tag == "E") {
+        updated_newdata <- data.frame(newdata, E = extra_variables)
+      }
+      if (DImodel_tag == "AV") {
+        updated_newdata <- data.frame(newdata, AV = extra_variables)
+      }
+      if (DImodel_tag == "ADD") {
+        updated_newdata <- data.frame(newdata, extra_variables)
+      }
+      if (DImodel_tag == "FG") {
+        # colnames(extra_variables) <- paste0("FG_", 
+        #                                     colnames(extra_variables))
+        
+        # FG model wasn't working for some reason, so had to assign it this way
+        newdata[, 'FG_'] <- extra_variables
+        updated_newdata <- newdata
+      }
+      if (DImodel_tag == "FULL") {
+        updated_newdata <- data.frame(newdata, extra_variables, 
+                                      check.names = FALSE)
+      }
+    } else {
       updated_newdata <- newdata
     }
-    if (DImodel_tag == "FULL") {
-      updated_newdata <- data.frame(newdata, extra_variables, 
-                                    check.names = FALSE)
+    
+    # Grouping ID terms 
+    # If not grouping was specified
+    if(is.null(ID_cols)){
+      ID_cols <- paste0(prop, "_ID")
     }
-  } else {
-    updated_newdata <- newdata
-  }
-  
-  # Removing the dummy row added
-  if (only_one_row) {
-    updated_newdata <- updated_newdata[1,]
-  }
-  
-  # Checking for experimental structures
-  treat <- eval(object$DIcall$treat)
-  density <- eval(object$DIcall$density)
-  block <- eval(object$DIcall$block)
-  
-  structures <- list('treatment' = treat,
-                     'density' = density,
-                     'block' = block)
-  
-  for (covariate in structures){
-    if (!is.null(covariate)  && !is.na(covariate)){
-      # If covariate was supplied as numeric in function call, getting its value
-      if (is.numeric(covariate)){
-        covariate <- colnames(original_data)[covariate]
-      }
-      
-      if (is.numeric(original_data[, covariate])){
-        if ( !(covariate %in% colnames(updated_newdata))){
-          warning(paste0(names(structures[structures == covariate]), ' not supplied in newdata. Calculating for \'', covariate,
-                         '\' = ' , 0))
-          updated_newdata[, covariate] <- 0
-        }
-      } else {
-        # Levels of factor covariate in original data
-        covariate_levels <- as.factor(unique(original_data[, covariate]))
-        # If covariate isn't present in newdata, estimating for base level
-        if ( !(covariate %in% colnames(updated_newdata))){
-          warning(paste0(names(structures[structures == covariate]), ' not supplied in newdata. Calculating for \'', covariate,
-                         '\' = ' , levels(covariate_levels)[1]))
-          updated_newdata[, covariate] <- levels(covariate_levels)[1]
-        }
-        
-        # If levels of covariate in newdata not matching ones in original data, stop prediction
-        if (! (all(unique(updated_newdata[, covariate]) %in% covariate_levels, na.rm = TRUE))){
-          stop(paste0('Values for ', covariate,' given were not present in raw data used for fitting. Predictions can\'t be made for these values.'))
+    
+    grouped_IDs <- group_IDs(data = updated_newdata, prop = prop, ID = ID_cols)
+    updated_newdata <- cbind(updated_newdata, grouped_IDs)
+    
+    # Removing the dummy row added
+    if (only_one_row) {
+      updated_newdata <- updated_newdata[1,]
+    }
+    
+    # Checking for experimental structures
+    treat <- eval(object$DIcall$treat)
+    density <- eval(object$DIcall$density)
+    block <- eval(object$DIcall$block)
+    
+    structures <- list('treatment' = treat,
+                       'density' = density,
+                       'block' = block)
+    
+    for (covariate in structures){
+      if (!is.null(covariate)  && !is.na(covariate)){
+        # If covariate was supplied as numeric in function call, getting its value
+        if (is.numeric(covariate)){
+          covariate <- colnames(original_data)[covariate]
         }
         
-        # If covariate is supplied as character or numeric, converting to factor
-        if (!is.factor(updated_newdata[, covariate])){
-          updated_newdata[, covariate] <- factor(updated_newdata[, covariate],
-                                                 levels = levels(covariate_levels))
+        if (is.numeric(original_data[, covariate])){
+          if ( !(covariate %in% colnames(updated_newdata))){
+            warning(paste0(names(structures[structures == covariate]), ' not supplied in newdata. Calculating for \'', covariate,
+                           '\' = ' , 0))
+            updated_newdata[, covariate] <- 0
+          }
+        } else {
+          # Levels of factor covariate in original data
+          covariate_levels <- as.factor(unique(original_data[, covariate]))
+          # If covariate isn't present in newdata, estimating for base level
+          if ( !(covariate %in% colnames(updated_newdata))){
+            warning(paste0(names(structures[structures == covariate]), ' not supplied in newdata. Calculating for \'', covariate,
+                           '\' = ' , levels(covariate_levels)[1]))
+            updated_newdata[, covariate] <- levels(covariate_levels)[1]
+          }
+          
+          # If levels of covariate in newdata not matching ones in original data, stop prediction
+          if (! (all(unique(updated_newdata[, covariate]) %in% covariate_levels, na.rm = TRUE))){
+            stop(paste0('Values for ', covariate,' given were not present in raw data used for fitting. Predictions can\'t be made for these values.'))
+          }
+          
+          # If covariate is supplied as character or numeric, converting to factor
+          if (!is.factor(updated_newdata[, covariate])){
+            updated_newdata[, covariate] <- factor(updated_newdata[, covariate],
+                                                   levels = levels(covariate_levels))
+          }
         }
       }
     }
-  }
-  
-  
-  # Handling extra formula
-  extra_formula <- eval(object$DIcall$extra_formula)
-  
-  if (! is.null(extra_formula)){
     
-    extra_data <- model.frame(terms(extra_formula), updated_newdata)
     
-    # Matching factors in extra_formula to ones in original_data
-    og_factors <- original_data[, sapply(original_data, function(x){is.factor(x) | is.character(x)})]
-    common_factors <- intersect(colnames(extra_data), colnames(og_factors))
+    # Handling extra formula
+    extra_formula <- eval(object$DIcall$extra_formula)
     
-    if (length(common_factors)!=0){
+    if (! is.null(extra_formula)){
       
-      # Levels of all factors in extra_formula
-      xlevels <- lapply(common_factors, function(x){levels(original_data[,x])})
-      names(xlevels) <- common_factors
-
-      for (i in common_factors){
+      extra_data <- model.frame(terms(extra_formula), updated_newdata)
+      
+      # Matching factors in extra_formula to ones in original_data
+      og_factors <- original_data[, sapply(original_data, function(x){is.factor(x) | is.character(x)})]
+      common_factors <- intersect(colnames(extra_data), colnames(og_factors))
+      
+      if (length(common_factors)!=0){
         
-        # If levels of factors in extra_formula in newdata not matching ones in original data, stop prediction
-        if (! (all(unique(updated_newdata[, i]) %in% xlevels[[i]], na.rm = TRUE))){
-          stop(paste0('Values for ', covariate,' given were not present in raw data used for fitting. Predictions can\'t be made for these values.'))
-        }
+        # Levels of all factors in extra_formula
+        xlevels <- lapply(common_factors, function(x){levels(original_data[,x])})
+        names(xlevels) <- common_factors
         
-        # If factors in extra_formula is supplied as character or numeric, converting to factor
-        if (!is.factor(updated_newdata[, i])){
-          updated_newdata[, i] <- factor(updated_newdata[,i], levels = xlevels[[i]])
+        for (i in common_factors){
+          
+          # If levels of factors in extra_formula in newdata not matching ones in original data, stop prediction
+          if (! (all(unique(updated_newdata[, i]) %in% xlevels[[i]], na.rm = TRUE))){
+            stop(paste0('Values for ', covariate,' given were not present in raw data used for fitting. Predictions can\'t be made for these values.'))
+          }
+          
+          # If factors in extra_formula is supplied as character or numeric, converting to factor
+          if (!is.factor(updated_newdata[, i])){
+            updated_newdata[, i] <- factor(updated_newdata[,i], levels = xlevels[[i]])
+          }
         }
       }
+      
+      # Having certain functions in extra_formula like poly, ns, bs, etc.
+      # cause problems in estimating model.matrix for newdata
+      
+      # So my solution is to simply refit the model with glm when 
+      # such functions are used and then make the prediction
+      
+      
+      fmla <-  eval(object$DIcheck_formula)
+      
+      extra_variables <- DI_data(prop = prop, FG = eval(object$DIcall$FG), 
+                                 data = original_data, theta = theta_value, what = DImodel_tag)
+      
+      if (DImodel_tag == "E") {
+        original_data_updated <- data.frame(original_data, E = extra_variables)
+      }
+      if (DImodel_tag == "AV") {
+        original_data_updated <- data.frame(original_data, AV = extra_variables)
+      }
+      if (DImodel_tag == "ADD") {
+        original_data_updated <- data.frame(original_data, extra_variables)
+      }
+      if (DImodel_tag == "FG") {
+        original_data[, 'FG_'] <- extra_variables
+        original_data_updated <- original_data
+      }
+      if (DImodel_tag == "FULL") {
+        original_data_updated <- data.frame(original_data, extra_variables, 
+                                            check.names = FALSE)
+      } 
+      if (DImodel_tag == 'ID' | DImodel_tag == 'STR'){
+        original_data_updated <- original_data
+      }
+      
+      # Grouping ID terms 
+      # If no grouping was specified
+      if(is.null(ID_cols)){
+        ID_cols <- paste0(prop, "_ID")
+      }
+      
+      grouped_IDs <- group_IDs(data = original_data_updated, prop = prop, ID = ID_cols)
+      original_data_updated <- cbind(original_data_updated, grouped_IDs)
+      
+      glm_fit <- lm(fmla, data = original_data_updated)
     }
     
-    # Having certain functions in extra_formula like poly, ns, bs, etc.
-    # cause problems in estimating model.matrix for newdata
-    
-    # So my solution is to simply refit the model with glm when 
-    # such functions are used and then make the prediction
+    # Calculating response
+    # Remove backticks from coefficient names for name-matching
+    names(betas) <- gsub(pattern = '`', replacement = '' , x = names(betas))
     
-
-    fmla <-  eval(object$DIcheck_formula)
-    
-    extra_variables <- DI_data(prop = prop, FG = eval(object$DIcall$FG), 
-                               data = original_data, theta = theta_value, what = DImodel_tag)
-    
-    if (DImodel_tag == "E") {
-      original_data_updated <- data.frame(original_data, E = extra_variables)
-    }
-    if (DImodel_tag == "AV") {
-      original_data_updated <- data.frame(original_data, AV = extra_variables)
-    }
-    if (DImodel_tag == "ADD") {
-      original_data_updated <- data.frame(original_data, extra_variables)
+    # glm fmla object from DI_check_and_fit
+    if (DImodel_tag == 'CUSTOM'){
+      fmla <- eval(object$DIcall$custom_formula)
+    } else {
+      fmla <-  eval(object$DIcheck_formula)
     }
-    if (DImodel_tag == "FG") {
-      original_data[, 'FG_'] <- extra_variables
-      original_data_updated <- original_data
+    
+    if (! is.null(extra_formula)){
+      Terms <- delete.response(terms(glm_fit))
+    } else {
+      Terms <- delete.response(terms(formula(fmla)))
     }
-    if (DImodel_tag == "FULL") {
-      original_data_updated <- data.frame(original_data, extra_variables, 
-                                    check.names = FALSE)
-    } 
-    if (DImodel_tag == 'ID' | DImodel_tag == 'STR'){
-      original_data_updated <- original_data
+    # Model matrix for predictions of newdata
+    X_old <- as.data.frame(model.matrix(Terms, data = updated_newdata))
+    names(X_old) <- gsub('`','', names(X_old))
+    # print(X_old)
+    # glm formula adds NA for non-estimable levels of factors
+    # Removing the ones with NA to get only those present in DImodel
+    common <- intersect(names(betas), names(X_old))
+    X_new <- as.data.frame(X_old[, common])
+    # 
+    # # Predictions
+    # y_hat <- as.numeric(X_new %*% betas)
+    # 
+    # if (type == "response") {
+    #   inv_link <- object$family$linkinv
+    #   y_hat <- inv_link(y_hat)
+    # }
+    # if (se.fit) {
+    #   standard_errors <- as.numeric(sqrt(diag(X_new %*% vcov(object) %*% 
+    #                                             t(X_new))))
+    #   dispersion <- summary(object)$dispersion
+    #   residual.scale <- as.vector(sqrt(dispersion))
+    #   ret_obj <- list(fit = y_hat, se.fit = standard_errors, residual.scale = residual.scale)
+    # }
+    # else {
+    #   ret_obj <- y_hat
+    # }
+    
+    if (! is.null(extra_formula)){
+      # Terms <- delete.response(terms(glm_fit))
+      ret_obj <- suppressWarnings(predict.lm(glm_fit, newdata = updated_newdata,
+                                             se.fit = ifelse(interval != "none", TRUE, se.fit), 
+                                             type = ifelse(type == "link", "response", type), ...))
+    } else {
+      # Terms <- delete.response(terms(formula(fmla)))
+      ret_obj <- suppressWarnings(predict.glm(object, newdata = X_new, 
+                                              se.fit = ifelse(interval != "none", TRUE, se.fit),
+                                              type = type, ...))
     }
-    glm_fit <- glm(fmla, data = original_data_updated, family = object$family)
   }
   
-  # Calculating response
-  # Remove backticks from coefficient names for name-matching
-  names(betas) <- gsub(pattern = '`', replacement = '' , x = names(betas))
-  
-  # glm fmla object from DI_check_and_fit
-  if (DImodel_tag == 'CUSTOM'){
-    fmla <- eval(object$DIcall$custom_formula)
-  } else {
-    fmla <-  eval(object$DIcheck_formula)
-  }
-  
-  if (! is.null(extra_formula)){
-    Terms <- delete.response(terms(glm_fit))
-  } else {
-    Terms <- delete.response(terms(formula(fmla)))
+  # Confidence and prediction intervals
+  if(object$family$family == "gaussian"){
+    if(interval != "none"){
+      predictor <- ret_obj$fit
+      ip <- ret_obj$se.fit^2
+      # Calculating CI/PI
+      w <- object$weights
+      r <- object$residuals
+      rss <- sum(if (is.null(w)) r^2 else r^2 * w)
+      df <- object$df.residual
+      res.var <- rss/df
+      pred.var <- res.var/weights
+      
+      tfrac <- qt((1 - level)/2, df)
+      hwid <- tfrac * switch(interval, confidence = sqrt(ip), 
+                             prediction = sqrt(ip + pred.var))
+      if (type != "terms") {
+        predictor <- cbind(predictor, predictor + hwid %o% 
+                             c(1, -1))
+        colnames(predictor) <- c("fit", "lwr", "upr")
+        ret_obj$fit <- predictor
+      }
+      # Don't give SE if user didn't ask for it
+      if(! se.fit){
+        ret_obj$se.fit <- NULL
+        ret_obj$residual.scale <- NULL
+        ret_obj <- predictor
+      }
+    }
   }
-  # Model matrix for predictions of newdata
-  X_old <- as.data.frame(model.matrix(Terms, data = updated_newdata))
-  names(X_old) <- gsub('`','', names(X_old))
-  #print(X_old)
-  # glm formula adds NA for non-estimable levles of factors
-  # Removing the ones with NA to get only those present in DImodel 
-  common <- intersect(names(betas), names(X_old))
-  X_new <- as.matrix(X_old[, common])
   
-  # Predictions
-  y_hat <- as.numeric(X_new %*% betas)
-
-  if (type == "response") {
-    inv_link <- object$family$linkinv
-    y_hat <- inv_link(y_hat)
-  }
-  if (se.fit) {
-    standard_errors <- as.numeric(sqrt(diag(X_new %*% vcov(object) %*% 
-                                              t(X_new))))
-    dispersion <- summary(object)$dispersion
-    residual.scale <- as.vector(sqrt(dispersion))
-    ret_obj <- list(fit = y_hat, se.fit = standard_errors, residual.scale = residual.scale)
-  }
-  else {
-    ret_obj <- y_hat
-  }
   return(ret_obj)
 }
 
 
 contrasts_DI <- function(object, contrast, contrast_vars, ...){
   if (missing(object) | class(object)[1]!= 'DI'){
-    stop('Please provied a DImodels model object')
+    stop("Please provied a DImodels model object")
   }
   
   if (missing(contrast_vars) & missing(contrast)){
-    stop('Provide either one of contrast_vars or constrast')  
+    stop("Provide either one of `contrast_vars` or `constrast`")  
   }
   
   if (!missing(contrast_vars) & !missing(contrast)){
-    stop('Provide only one of contrast_vars or constrast')  
+    warning("Provide only one of `contrast_vars` or `constrast`. `contrast_vars` will be ignored.") 
+    contrast_vars <- NULL
   }
   
+  og_data <- object$original_data
+  # prop_cols <- eval(object$DIcall$prop)
+  # prop <- colnames(og_data)[prop_cols]
+  # ID_cols <- eval(object$DIcall$ID)
+  # 
+  # # Grouping ID terms 
+  # # If not grouping was specified
+  # if(is.null(ID_cols)){
+  #   ID_cols <- paste0(prop, "_ID")
+  # }
+  # 
+  # grouped_IDs <- group_IDs(data = og_data, prop = prop, ID = ID_cols)
+  # og_data <- cbind(og_data, grouped_IDs)
+  
   betas <- coef(object)
   theta_flag <- object$coefficients['theta']
   if (!is.na(theta_flag)){
@@ -280,18 +367,21 @@ contrasts_DI <- function(object, contrast, contrast_vars, ...){
     betas <- betas[-length(betas)]
   }
   
-  if (!missing(contrast_vars)){
+  if (!missing(contrast_vars) && !is.null(contrast_vars)){
     
     if(!is.list(contrast_vars)){
-      stop('Contrast variables should be specified as a nested list')
+      stop('Contrast variables should be specified as a nested named list')
     }
     
-    og_data <- object$original_data
     the_C <- matrix(0, ncol = length(betas))
     colnames(the_C) <- names(betas)
     
     for (var in names(contrast_vars)){
-      if (!(var %in% colnames(og_data) | var %in% colnames(betas))){
+      # if(var %in% prop) {
+      #   var <- paste0(var, "_ID")
+      # }
+      
+      if (!(var %in% colnames(og_data))){
         stop(paste0(var, ' not present in model'))
       }
       
diff --git a/README.Rmd b/README.Rmd
index 4e4f213..44fe864 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -22,9 +22,22 @@ knitr::opts_chunk$set(
 [![Codecov test coverage](https://codecov.io/gh/rafamoral/DImodels/branch/main/graph/badge.svg)](https://app.codecov.io/gh/rafamoral/DImodels?branch=main)
 <!-- badges: end -->
 
+
 The `DImodels` package is designed to make fitting Diversity-Interactions models easier. Diversity-Interactions (DI) models (Kirwan et al 2009) are a set of tools for analysing and interpreting data from experiments that explore the effects of species diversity (from a pool of *S* species) on community-level responses. Data suitable for DI models will include (at least) for each experimental unit: a response recorded at a point in time, and a set of proportions of *S* species  $p_1$, $p_2$, ..., $p_S$ from a point in time prior to the recording of the response. The proportions sum to 1 for each experimental unit.   
 
-__Main changes in the package from version 1.0 to version 1.1 and newer__
+
+__Main changes in the package from version 1.2 to version 1.3__
+
+- The `DI` and `autoDI` functions now have an additional parameter called `ID` which enables the user to group the species identity effects (see examples below).
+- The `predict` function now has flexibility to calculate confidence and prediction intervals for the predicted values.
+
+__Main changes in the package from version 1.1 to version 1.2__
+
+- There are two new functions added to the package:
+     - `predict`: Make predictions from a fitted DI model without having to worry about theta, and the interaction terms in the data.
+     - `contrasts_DI`: Create contrasts for a DI model.
+     
+__Main changes in the package from version 1.0 to version 1.1__
 
 - `DI_data_prepare` is now superseded by `DI_data` (see examples below)
 
@@ -37,13 +50,6 @@ install.packages("DImodels")
 library("DImodels")
 ```
 
-You can install the development version of DImodels from [GitHub](https://github.com/) with:
-
-``` r
-# install.packages("devtools")
-devtools::install_github("rafamoral/DImodels")
-```
-
 ## Accessing an introduction to Diversity-Introductions models
 
 It is recommended that users unfamiliar with Diversity-Interactions (DI) models read the introduction to `DImodels`, before using the package. Run the following code to access the documentation. 
@@ -167,6 +173,36 @@ m1_theta <- update_DI(object = m1, estimate_theta = TRUE)
 coef(m1_theta)
 ```
 
+### Grouping the species identity effects in the model
+The species identity effects in a DI model can be grouped by specifying groups for each species using the `ID` argument.
+The `ID` argument functions similar to the `FG` argument and accepts a character list of same length as number of species in the model. The identity effects of species belonging in the same group will be grouped together.
+
+Grouping all identity effects into a single term
+```{r, echo = TRUE}
+m1_group <- update_DI(object = m1_theta, 
+                      ID = c("ID1", "ID1", "ID1", "ID1", "ID1",
+                             "ID1", "ID1", "ID1", "ID1"))
+coef(m1_group)
+```
+
+Grouping identity effects of specific species
+```{r, echo = TRUE}
+m1_group2 <- update_DI(object = m1_theta, 
+                       ID = c("ID1", "ID1", "ID1", 
+                              "ID2", "ID2", "ID2", 
+                              "ID3", "ID3", "ID3"))
+coef(m1_group2)
+```
+
+Note: Grouping ID effects will not have an effect on the calculation of the interaction effects, they would still be calculated by using all species.
+
+Read the documentation of `DI` and `autoDI` for more information and examples using the `ID` parameter.
+```{r, eval = FALSE}
+?DI
+?autoDI
+```
+
+
 ## Fitting customised models using the `DI` function
 
 There are two ways to fit customised models using `DI`; the first is by using the option `DImodel = ` in the `DI` function and adding the argument `extra_formula = ` to it, and the second is to use the `custom_formula` argument in the `DI` function. If species interaction variables (e.g., the FG interactions or the average pairwise interaction) are included in either `extra_formula` or `custom_formula`, they must first be created and included in the dataset. The function `DI_data` can be used to compute several types of species interaction variables. 
@@ -219,6 +255,84 @@ m3 <- DI(y = "response",
 summary(m3)
 ```
 
+## Making predictions and testing contrasts for DI models
+### Predictions using a DI model
+
+We can make predictions from a DI model just like any other regression model using the `predict` function. The user does not need to worry about adding any interaction terms or adjusting any columns if theta is not equal to 1. Only the species proportions along with any additional experimental structures is needed and all other terms in the model will be calculated for the user.  
+```{r, echo = TRUE}
+# Fit model
+m3 <- DI(y = "response", prop = 4:12, 
+         treat = "treatment", DImodel = "AV", 
+         extra_formula = ~ (AV) : treatment, data = sim3a)
+
+predict_data <- sim3[c(1, 79, 352), 3:12]
+# Only species proportions and treatment is needed
+print(predict_data)
+# Make prediction
+predict(m3, newdata = predict_data)
+```
+
+### Uncertainity around predictions
+```{r}
+# The interval and level parameters can be used to calculate the 
+# uncertainty around the predictions
+
+# Get confidence interval around prediction
+predict(m3, newdata = predict_data, interval = "confidence")
+
+# Get prediction interval around prediction
+predict(m3, newdata = predict_data, interval = "prediction")
+
+# The function returns a 95% interval by default, 
+# this can be changed using the level argument
+predict(m3, newdata = predict_data, 
+        interval = "prediction", level = 0.9)
+```
+
+
+### Contrasts for DI models
+The `contrasts_DI` function can be used to compare and formally test for a difference in performance of communities within the same as well as across different experimental structures
+
+Comparing the performance of the monocultures of different species at treatment A
+```{r, echo = TRUE}
+contr <- list("p1vsp2" = c(1, -1, 0, 0,  0,  0, 0, 0,  0, 0, 0, 0),
+              "p3vsp5" = c(0,  0, 1, 0, -1,  0, 0, 0,  0, 0, 0, 0),
+              "p4vsp6" = c(0,  0, 0, 1,  0, -1, 0, 0,  0, 0, 0, 0),
+              "p7vsp9" = c(0,  0, 0, 0,  0,  0, 1, 0, -1, 0, 0, 0))
+the_C <- contrasts_DI(m3, contrast = contr)
+summary(the_C)
+```
+
+Comparing across the two treatment levels for monoculture of species 1
+```{r, echo = TRUE}
+contr <- list("treatAvsB" = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0))
+the_C <- contrasts_DI(m3, contrast = contr)
+summary(the_C)
+```
+
+Comparing between two species mixtures
+```{r, echo = TRUE}
+mixA <- c(0.25, 0,      0.25, 0,      0.25, 0,      0.25, 0, 0, 0, 0, 0)
+mixB <- c(0,    0.3333, 0,    0.3333, 0,    0.3333, 0,    0, 0, 0, 0, 0)
+
+# We have the proportions of the individual species in the mixtures, however
+# we still need to calculate the interaction effect for these communities
+contr_data <- data.frame(rbind(mixA, mixB))
+colnames(contr_data) <- names(coef(m3))
+
+# Adding the interaction effect of the two mixtures
+contr_data$AV <- DI_data_E_AV(prop = 1:9, data = contr_data)$AV
+print(contr_data)
+
+# We can now subtract the respective values in each column of the two 
+# mixtures and get our contrast
+my_contrast <- as.matrix(contr_data[1, ] - contr_data[2, ])
+rownames(my_contrast) <- "mixAvsB"
+
+the_C <- contrasts_DI(m3, contrast = my_contrast)
+summary(the_C)
+```
+
 
 ## References
 Connolly J, T Bell, T Bolger, C Brophy, T Carnus, JA Finn, L Kirwan, F Isbell, J Levine, A Lüscher, V Picasso, C Roscher, MT Sebastia, M Suter and A Weigelt (2013) An improved model to predict the effects of changing biodiversity levels on ecosystem function. Journal of Ecology, 101, 344-355. 
diff --git a/README.md b/README.md
index 184f9a7..47414a0 100644
--- a/README.md
+++ b/README.md
@@ -24,8 +24,22 @@ species $p_1$, $p_2$, …, $p_S$ from a point in time prior to the
 recording of the response. The proportions sum to 1 for each
 experimental unit.
 
-**Main changes in the package from version 1.0 to version 1.1 and
-newer**
+**Main changes in the package from version 1.2 to version 1.3**
+
+- The `DI` and `autoDI` functions now have an additional parameter
+  called `ID` which enables the user to group the species identity
+  effects (see examples below).
+- The `predict` function now has flexibility to calculate confidence and
+  prediction intervals for the predicted values.
+
+**Main changes in the package from version 1.1 to version 1.2**
+
+- There are two new functions added to the package:
+  - `predict`: Make predictions from a fitted DI model without having to
+    worry about theta, and the interaction terms in the data.
+  - `contrasts_DI`: Create contrasts for a DI model.
+
+**Main changes in the package from version 1.0 to version 1.1**
 
 - `DI_data_prepare` is now superseded by `DI_data` (see examples below)
 
@@ -39,14 +53,6 @@ install.packages("DImodels")
 library("DImodels")
 ```
 
-You can install the development version of DImodels from
-[GitHub](https://github.com/) with:
-
-``` r
-# install.packages("devtools")
-devtools::install_github("rafamoral/DImodels")
-```
-
 ## Accessing an introduction to Diversity-Introductions models
 
 It is recommended that users unfamiliar with Diversity-Interactions (DI)
@@ -292,15 +298,15 @@ summary(auto1)
 #> 
 #> Coefficients:
 #>                Estimate Std. Error t value Pr(>|t|)    
-#> p1               9.7497     0.3666  26.595  < 2e-16 ***
-#> p2               8.5380     0.3672  23.253  < 2e-16 ***
-#> p3               8.2329     0.3666  22.459  < 2e-16 ***
-#> p4               6.3644     0.3665  17.368  < 2e-16 ***
-#> p5              10.8468     0.3669  29.561  < 2e-16 ***
-#> p6               5.9621     0.4515  13.205  < 2e-16 ***
-#> p7               5.4252     0.4516  12.015  < 2e-16 ***
-#> p8               7.3204     0.4515  16.213  < 2e-16 ***
-#> p9               8.2154     0.4515  18.196  < 2e-16 ***
+#> p1_ID            9.7497     0.3666  26.595  < 2e-16 ***
+#> p2_ID            8.5380     0.3672  23.253  < 2e-16 ***
+#> p3_ID            8.2329     0.3666  22.459  < 2e-16 ***
+#> p4_ID            6.3644     0.3665  17.368  < 2e-16 ***
+#> p5_ID           10.8468     0.3669  29.561  < 2e-16 ***
+#> p6_ID            5.9621     0.4515  13.205  < 2e-16 ***
+#> p7_ID            5.4252     0.4516  12.015  < 2e-16 ***
+#> p8_ID            7.3204     0.4515  16.213  < 2e-16 ***
+#> p9_ID            8.2154     0.4515  18.196  < 2e-16 ***
 #> FG_bfg_FG1_FG2   3.4395     0.8635   3.983 8.09e-05 ***
 #> FG_bfg_FG1_FG3  11.5915     0.8654  13.395  < 2e-16 ***
 #> FG_bfg_FG2_FG3   2.8711     1.2627   2.274  0.02351 *  
@@ -359,15 +365,15 @@ summary(m1)
 #> 
 #> Coefficients:
 #>                Estimate Std. Error t value Pr(>|t|)    
-#> p1               9.7497     0.3666  26.595  < 2e-16 ***
-#> p2               8.5380     0.3672  23.253  < 2e-16 ***
-#> p3               8.2329     0.3666  22.459  < 2e-16 ***
-#> p4               6.3644     0.3665  17.368  < 2e-16 ***
-#> p5              10.8468     0.3669  29.561  < 2e-16 ***
-#> p6               5.9621     0.4515  13.205  < 2e-16 ***
-#> p7               5.4252     0.4516  12.015  < 2e-16 ***
-#> p8               7.3204     0.4515  16.213  < 2e-16 ***
-#> p9               8.2154     0.4515  18.196  < 2e-16 ***
+#> p1_ID            9.7497     0.3666  26.595  < 2e-16 ***
+#> p2_ID            8.5380     0.3672  23.253  < 2e-16 ***
+#> p3_ID            8.2329     0.3666  22.459  < 2e-16 ***
+#> p4_ID            6.3644     0.3665  17.368  < 2e-16 ***
+#> p5_ID           10.8468     0.3669  29.561  < 2e-16 ***
+#> p6_ID            5.9621     0.4515  13.205  < 2e-16 ***
+#> p7_ID            5.4252     0.4516  12.015  < 2e-16 ***
+#> p8_ID            7.3204     0.4515  16.213  < 2e-16 ***
+#> p9_ID            8.2154     0.4515  18.196  < 2e-16 ***
 #> FG_bfg_FG1_FG2   3.4395     0.8635   3.983 8.09e-05 ***
 #> FG_bfg_FG1_FG3  11.5915     0.8654  13.395  < 2e-16 ***
 #> FG_bfg_FG2_FG3   2.8711     1.2627   2.274  0.02351 *  
@@ -395,9 +401,9 @@ m1_theta <- update_DI(object = m1, estimate_theta = TRUE)
 #> Fitted model: Functional group effects 'FG' DImodel
 #> Theta estimate: 0.9681
 coef(m1_theta)
-#>             p1             p2             p3             p4             p5 
+#>          p1_ID          p2_ID          p3_ID          p4_ID          p5_ID 
 #>      9.8128865      8.6069092      8.2968619      6.4287580     10.9110563 
-#>             p6             p7             p8             p9 FG_bfg_FG1_FG2 
+#>          p6_ID          p7_ID          p8_ID          p9_ID FG_bfg_FG1_FG2 
 #>      6.0189395      5.4846833      7.4038925      8.2992262      2.9840924 
 #> FG_bfg_FG1_FG3 FG_bfg_FG2_FG3     FG_wfg_FG1     FG_wfg_FG2     FG_wfg_FG3 
 #>     10.6019235      2.3514998      2.3737831      0.3789464      1.8470612 
@@ -405,6 +411,61 @@ coef(m1_theta)
 #>      3.1017864      0.9681005
 ```
 
+### Grouping the species identity effects in the model
+
+The species identity effects in a DI model can be grouped by specifying
+groups for each species using the `ID` argument. The `ID` argument
+functions similar to the `FG` argument and accepts a character list of
+same length as number of species in the model. The identity effects of
+species belonging in the same group will be grouped together.
+
+Grouping all identity effects into a single term
+
+``` r
+m1_group <- update_DI(object = m1_theta, 
+                      ID = c("ID1", "ID1", "ID1", "ID1", "ID1",
+                             "ID1", "ID1", "ID1", "ID1"))
+#> Warning in DI_data_prepare(y = y, block = block, density = density, prop = prop, : One or more rows have species proportions that sum to approximately 1, but not exactly 1. This is typically a rounding issue, and has been corrected internally prior to analysis.
+#> Fitted model: Functional group effects 'FG' DImodel
+#> Theta estimate: 0.9919
+coef(m1_group)
+#>            ID1 FG_bfg_FG1_FG2 FG_bfg_FG1_FG3 FG_bfg_FG2_FG3     FG_wfg_FG1 
+#>      7.8667702      1.1475018     12.9438529     -1.2235215      5.6141823 
+#>     FG_wfg_FG2     FG_wfg_FG3     treatmentA          theta 
+#>     -5.5214662      1.0205019      3.1017864      0.9919097
+```
+
+Grouping identity effects of specific species
+
+``` r
+m1_group2 <- update_DI(object = m1_theta, 
+                       ID = c("ID1", "ID1", "ID1", 
+                              "ID2", "ID2", "ID2", 
+                              "ID3", "ID3", "ID3"))
+#> Warning in DI_data_prepare(y = y, block = block, density = density, prop = prop, : One or more rows have species proportions that sum to approximately 1, but not exactly 1. This is typically a rounding issue, and has been corrected internally prior to analysis.
+#> Fitted model: Functional group effects 'FG' DImodel
+#> Theta estimate: 0.989
+coef(m1_group2)
+#>            ID1            ID2            ID3 FG_bfg_FG1_FG2 FG_bfg_FG1_FG3 
+#>      8.5288216      7.9537767      7.1357104      0.9665077     13.3434768 
+#> FG_bfg_FG2_FG3     FG_wfg_FG1     FG_wfg_FG2     FG_wfg_FG3     treatmentA 
+#>      0.4940952      4.1543637     -4.4683501      3.4674196      3.1017864 
+#>          theta 
+#>      0.9889999
+```
+
+Note: Grouping ID effects will not have an effect on the calculation of
+the interaction effects, they would still be calculated by using all
+species.
+
+Read the documentation of `DI` and `autoDI` for more information and
+examples using the `ID` parameter.
+
+``` r
+?DI
+?autoDI
+```
+
 ## Fitting customised models using the `DI` function
 
 There are two ways to fit customised models using `DI`; the first is by
@@ -437,15 +498,15 @@ summary(m2)
 #> 
 #> Coefficients:
 #>                  Estimate Std. Error t value Pr(>|t|)    
-#> p1               9.391824   0.540485  17.377  < 2e-16 ***
-#> p2               8.492825   0.540879  15.702  < 2e-16 ***
-#> p3               8.406038   0.540471  15.553  < 2e-16 ***
-#> p4               6.015296   0.540391  11.131  < 2e-16 ***
-#> p5              10.802270   0.378776  28.519  < 2e-16 ***
-#> p6               5.917565   0.461482  12.823  < 2e-16 ***
-#> p7               5.380703   0.461535  11.658  < 2e-16 ***
-#> p8               7.275881   0.461506  15.766  < 2e-16 ***
-#> p9               8.170907   0.461471  17.706  < 2e-16 ***
+#> p1_ID           10.018491   0.466552  21.473  < 2e-16 ***
+#> p2_ID            8.494038   0.467009  18.188  < 2e-16 ***
+#> p3_ID            7.970716   0.466536  17.085  < 2e-16 ***
+#> p4_ID            6.624476   0.466443  14.202  < 2e-16 ***
+#> p5_ID           10.802270   0.378776  28.519  < 2e-16 ***
+#> p6_ID            5.917565   0.461482  12.823  < 2e-16 ***
+#> p7_ID            5.380703   0.461535  11.658  < 2e-16 ***
+#> p8_ID            7.275881   0.461506  15.766  < 2e-16 ***
+#> p9_ID            8.170907   0.461471  17.706  < 2e-16 ***
 #> FG_bfg_FG1_FG2   3.439508   0.865279   3.975 8.38e-05 ***
 #> FG_bfg_FG1_FG3  11.591458   0.867140  13.367  < 2e-16 ***
 #> FG_bfg_FG2_FG3   2.871063   1.265295   2.269  0.02381 *  
@@ -453,10 +514,10 @@ summary(m2)
 #> FG_wfg_FG2       0.679285   2.360195   0.288  0.77365    
 #> FG_wfg_FG3       2.416774   2.333420   1.036  0.30097    
 #> treatmentA       3.190868   0.216493  14.739  < 2e-16 ***
-#> `p1:treatmentB`  0.626667   0.668369   0.938  0.34902    
-#> `p2:treatmentB`  0.001213   0.668369   0.002  0.99855    
-#> `p3:treatmentB` -0.435322   0.668369  -0.651  0.51522    
-#> `p4:treatmentB`  0.609180   0.668369   0.911  0.36262    
+#> `treatmentA:p1` -0.626667   0.668369  -0.938  0.34902    
+#> `treatmentA:p2` -0.001213   0.668369  -0.002  0.99855    
+#> `treatmentA:p3`  0.435322   0.668369   0.651  0.51522    
+#> `treatmentA:p4` -0.609180   0.668369  -0.911  0.36262    
 #> ---
 #> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
 #> 
@@ -501,15 +562,15 @@ summary(m3)
 #> 
 #> Coefficients:
 #>                          Estimate Std. Error t value Pr(>|t|)    
-#> p1                        9.68668    0.40000  24.217  < 2e-16 ***
-#> p2                        8.47495    0.40053  21.159  < 2e-16 ***
-#> p3                        8.16990    0.39998  20.426  < 2e-16 ***
-#> p4                        6.30140    0.39987  15.759  < 2e-16 ***
-#> p5                       10.78379    0.40031  26.938  < 2e-16 ***
-#> p6                        5.89908    0.47958  12.301  < 2e-16 ***
-#> p7                        5.36222    0.47963  11.180  < 2e-16 ***
-#> p8                        7.25740    0.47960  15.132  < 2e-16 ***
-#> p9                        8.15243    0.47957  17.000  < 2e-16 ***
+#> p1_ID                     9.68668    0.40000  24.217  < 2e-16 ***
+#> p2_ID                     8.47495    0.40053  21.159  < 2e-16 ***
+#> p3_ID                     8.16990    0.39998  20.426  < 2e-16 ***
+#> p4_ID                     6.30140    0.39987  15.759  < 2e-16 ***
+#> p5_ID                    10.78379    0.40031  26.938  < 2e-16 ***
+#> p6_ID                     5.89908    0.47958  12.301  < 2e-16 ***
+#> p7_ID                     5.36222    0.47963  11.180  < 2e-16 ***
+#> p8_ID                     7.25740    0.47960  15.132  < 2e-16 ***
+#> p9_ID                     8.15243    0.47957  17.000  < 2e-16 ***
 #> FG_bfg_FG1_FG2            4.00191    1.12383   3.561 0.000415 ***
 #> FG_bfg_FG1_FG3           11.77389    1.12973  10.422  < 2e-16 ***
 #> FG_bfg_FG2_FG3            3.83681    1.64287   2.335 0.020027 *  
@@ -588,6 +649,185 @@ summary(m3)
 #> Number of Fisher Scoring iterations: 2
 ```
 
+## Making predictions and testing contrasts for DI models
+
+### Predictions using a DI model
+
+We can make predictions from a DI model just like any other regression
+model using the `predict` function. The user does not need to worry
+about adding any interaction terms or adjusting any columns if theta is
+not equal to 1. Only the species proportions along with any additional
+experimental structures is needed and all other terms in the model will
+be calculated for the user.
+
+``` r
+# Fit model
+m3 <- DI(y = "response", prop = 4:12, 
+         treat = "treatment", DImodel = "AV", 
+         extra_formula = ~ (AV) : treatment, data = sim3a)
+#> Warning in DI_data_prepare(y = y, block = block, density = density, prop = prop, : One or more rows have species proportions that sum to approximately 1, but not exactly 1. This is typically a rounding issue, and has been corrected internally prior to analysis.
+#> Fitted model: Average interactions 'AV' DImodel
+
+predict_data <- sim3[c(1, 79, 352), 3:12]
+# Only species proportions and treatment is needed
+print(predict_data)
+#>     treatment p1 p2        p3 p4        p5        p6        p7        p8
+#> 1           A  0  0 0.0000000  0 0.0000000 0.0000000 0.0000000 0.0000000
+#> 79          A  0  0 0.0000000  0 0.5000000 0.0000000 0.0000000 0.5000000
+#> 352         B  0  0 0.1666667  0 0.1666667 0.1666667 0.1666667 0.1666667
+#>            p9
+#> 1   1.0000000
+#> 79  0.0000000
+#> 352 0.1666667
+# Make prediction
+predict(m3, newdata = predict_data)
+#>        1       79      352 
+#> 12.83789 14.27503 10.00291
+```
+
+### Uncertainity around predictions
+
+``` r
+# The interval and level parameters can be used to calculate the 
+# uncertainty around the predictions
+
+# Get confidence interval around prediction
+predict(m3, newdata = predict_data, interval = "confidence")
+#>          fit       lwr      upr
+#> 1   12.83789 12.028716 13.64707
+#> 79  14.27503 13.817612 14.73246
+#> 352 10.00291  9.694552 10.31126
+
+# Get prediction interval around prediction
+predict(m3, newdata = predict_data, interval = "prediction")
+#>          fit       lwr      upr
+#> 1   12.83789 10.124779 15.55100
+#> 79  14.27503 11.645310 16.90476
+#> 352 10.00291  7.394976 12.61083
+
+# The function returns a 95% interval by default, 
+# this can be changed using the level argument
+predict(m3, newdata = predict_data, 
+        interval = "prediction", level = 0.9)
+#>          fit       lwr      upr
+#> 1   12.83789 10.562595 15.11319
+#> 79  14.27503 12.069670 16.48040
+#> 352 10.00291  7.815819 12.18999
+```
+
+### Contrasts for DI models
+
+The `contrasts_DI` function can be used to compare and formally test for
+a difference in performance of communities within the same as well as
+across different experimental structures
+
+Comparing the performance of the monocultures of different species at
+treatment A
+
+``` r
+contr <- list("p1vsp2" = c(1, -1, 0, 0,  0,  0, 0, 0,  0, 0, 0, 0),
+              "p3vsp5" = c(0,  0, 1, 0, -1,  0, 0, 0,  0, 0, 0, 0),
+              "p4vsp6" = c(0,  0, 0, 1,  0, -1, 0, 0,  0, 0, 0, 0),
+              "p7vsp9" = c(0,  0, 0, 0,  0,  0, 1, 0, -1, 0, 0, 0))
+the_C <- contrasts_DI(m3, contrast = contr)
+#> Generated contrast matrix:
+#>        p1_ID p2_ID p3_ID p4_ID p5_ID p6_ID p7_ID p8_ID p9_ID AV treatmentA
+#> p1vsp2     1    -1     0     0     0     0     0     0     0  0          0
+#> p3vsp5     0     0     1     0    -1     0     0     0     0  0          0
+#> p4vsp6     0     0     0     1     0    -1     0     0     0  0          0
+#> p7vsp9     0     0     0     0     0     0     1     0    -1  0          0
+#>        `AV:treatmentB`
+#> p1vsp2               0
+#> p3vsp5               0
+#> p4vsp6               0
+#> p7vsp9               0
+summary(the_C)
+#> 
+#>   Simultaneous Tests for General Linear Hypotheses
+#> 
+#> Fit: glm(formula = new_fmla, family = family, data = new_data)
+#> 
+#> Linear Hypotheses:
+#>             Estimate Std. Error z value Pr(>|z|)    
+#> p1vsp2 == 0    1.473      0.477   3.088  0.00803 ** 
+#> p3vsp5 == 0   -2.652      0.477  -5.560 1.08e-07 ***
+#> p4vsp6 == 0    1.462      0.477   3.064  0.00870 ** 
+#> p7vsp9 == 0   -5.521      0.477 -11.573  < 2e-16 ***
+#> ---
+#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#> (Adjusted p values reported -- single-step method)
+```
+
+Comparing across the two treatment levels for monoculture of species 1
+
+``` r
+contr <- list("treatAvsB" = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0))
+the_C <- contrasts_DI(m3, contrast = contr)
+#> Generated contrast matrix:
+#>           p1_ID p2_ID p3_ID p4_ID p5_ID p6_ID p7_ID p8_ID p9_ID AV treatmentA
+#> treatAvsB     1     0     0     0     0     0     0     0     0  0          1
+#>           `AV:treatmentB`
+#> treatAvsB               0
+summary(the_C)
+#> 
+#>   Simultaneous Tests for General Linear Hypotheses
+#> 
+#> Fit: glm(formula = new_fmla, family = family, data = new_data)
+#> 
+#> Linear Hypotheses:
+#>                Estimate Std. Error z value Pr(>|z|)    
+#> treatAvsB == 0  12.8993     0.4116   31.34   <2e-16 ***
+#> ---
+#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#> (Adjusted p values reported -- single-step method)
+```
+
+Comparing between two species mixtures
+
+``` r
+mixA <- c(0.25, 0,      0.25, 0,      0.25, 0,      0.25, 0, 0, 0, 0, 0)
+mixB <- c(0,    0.3333, 0,    0.3333, 0,    0.3333, 0,    0, 0, 0, 0, 0)
+
+# We have the proportions of the individual species in the mixtures, however
+# we still need to calculate the interaction effect for these communities
+contr_data <- data.frame(rbind(mixA, mixB))
+colnames(contr_data) <- names(coef(m3))
+
+# Adding the interaction effect of the two mixtures
+contr_data$AV <- DI_data_E_AV(prop = 1:9, data = contr_data)$AV
+print(contr_data)
+#>      p1_ID  p2_ID p3_ID  p4_ID p5_ID  p6_ID p7_ID p8_ID p9_ID        AV
+#> mixA  0.25 0.0000  0.25 0.0000  0.25 0.0000  0.25     0     0 0.3750000
+#> mixB  0.00 0.3333  0.00 0.3333  0.00 0.3333  0.00     0     0 0.3332667
+#>      treatmentA `AV:treatmentB`
+#> mixA          0               0
+#> mixB          0               0
+
+# We can now subtract the respective values in each column of the two 
+# mixtures and get our contrast
+my_contrast <- as.matrix(contr_data[1, ] - contr_data[2, ])
+rownames(my_contrast) <- "mixAvsB"
+
+the_C <- contrasts_DI(m3, contrast = my_contrast)
+#> Generated contrast matrix:
+#>         p1_ID   p2_ID p3_ID   p4_ID p5_ID   p6_ID p7_ID p8_ID p9_ID         AV
+#> mixAvsB  0.25 -0.3333  0.25 -0.3333  0.25 -0.3333  0.25     0     0 0.04173333
+#>         treatmentA `AV:treatmentB`
+#> mixAvsB          0               0
+summary(the_C)
+#> 
+#>   Simultaneous Tests for General Linear Hypotheses
+#> 
+#> Fit: glm(formula = new_fmla, family = family, data = new_data)
+#> 
+#> Linear Hypotheses:
+#>              Estimate Std. Error z value Pr(>|z|)    
+#> mixAvsB == 0   2.0379     0.2599   7.841 4.44e-15 ***
+#> ---
+#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
+#> (Adjusted p values reported -- single-step method)
+```
+
 ## References
 
 Connolly J, T Bell, T Bolger, C Brophy, T Carnus, JA Finn, L Kirwan, F
diff --git a/man/DI.Rd b/man/DI.Rd
index 3669e64..6029393 100644
--- a/man/DI.Rd
+++ b/man/DI.Rd
@@ -31,7 +31,7 @@ This function will fit a wide range of Diversity-Interactions (DI) models, one a
 
 \usage{
 DI(y, prop, DImodel, custom_formula, data,
-   block, density, treat, FG, extra_formula,
+   block, density, treat, ID, FG, extra_formula,
    estimate_theta = FALSE, theta = 1)
 }
 
@@ -72,6 +72,18 @@ When used in conjunction with \code{DImodel}, the treatment will be included in
 
 \item The \code{treat} argument is defunct when using the \code{custom_formula} argument, and any treatment must be included directly in the \code{custom_formula} argument.
 }
+}
+  \item{ID}{
+  %%     ~~Describe \code{ID} here~~
+This argument takes a text list (of length \emph{s}) dsecirbing groupings for the identity effects of the species. For example, if there are four species and you wish to group the identity effects all four species into a single term: \code{ID} could be \code{ID = c("ID1","ID1","ID1","ID1")}, where "ID1" is the name of the ID group. Similarly if the we wish to have two identity effect groups where identity effect of species 1 and 3, and species 2 and 4 are grouped together: \code{ID} could be \code{ID = c("ID1","ID2","ID1","ID2")}, where "ID1" and "ID2" are the names of the ID groups. These ideas expand to any number of species and any number or combination of groups. Finally, the ID groups do not have to be named "ID1" and "ID2", the user can specify any name for the groups.
+
+  \itemize{
+   \item If the \code{ID} argument is not specified, each species will be assumed to have a separate identity effect. 
+   
+   \item Specify an grouping for the ID does not affect the interaction terms. The interactions are still calculated using the individual species proportions.
+
+  \item The \code{ID} argument is defunct when using the \code{custom_formula} argument, since species identity effects must be included directly in the \code{custom_formula} argument. 
+}
 }
   \item{FG}{
   %%     ~~Describe \code{FG} here~~
@@ -211,14 +223,32 @@ data(Switzerland)
 Switzerlandsums <- rowSums(Switzerland[4:7])
 summary(Switzerlandsums)
 
+## Fit the a simple AV DImodel with theta = 1
+mod <- DI(y = "yield", prop = c("p1", "p2", "p3", "p4"), 
+          DImodel = "AV", data = Switzerland)
+summary(mod)
+
+## Fit the same model but group the 4 species identity effect into 2 groups
+mod <- DI(y = "yield", prop = c("p1", "p2", "p3", "p4"), 
+          ID = c("ID1", "ID1", "ID2", "ID2"), DImodel = "AV", 
+          data = Switzerland)
+summary(mod)
+
+## Combine the four identity effects into a single term and estimate theta
+mod <- DI(y = "yield", prop = c("p1", "p2", "p3", "p4"), 
+          ID = c("ID1", "ID1", "ID1", "ID1"), DImodel = "AV", 
+          estimate_theta = TRUE, data = Switzerland)
+summary(mod)
+
 ## Fit the FG DImodel, with factors density and treatment, and with theta = 1
 m1 <- DI(y = "yield", density = "density", prop = 4:7, treat = "nitrogen", 
          FG = c("G","G","L","L"), DImodel = "FG", data = Switzerland)
 summary(m1)
 
-## Fit the FG DImodel, with factors density and treatment, and theta estimated
+## Fit the FG DImodel, with factors density and treatment, and a fixed value of theta not equal to 1
 m2 <- DI(y = "yield", density = "density", prop = 4:7, treat = "nitrogen", 
-         FG = c("G","G","L","L"), DImodel = "FG", data = Switzerland, estimate_theta = TRUE)
+         FG = c("G","G","L","L"), DImodel = "FG", data = Switzerland, 
+         theta = 0.5)
 summary(m2)
 
 ## Test if the identity effects interact with nitrogen (and main effect of nitrogen excluded)
diff --git a/man/autoDI.Rd b/man/autoDI.Rd
index 8722c6c..ca65b3f 100644
--- a/man/autoDI.Rd
+++ b/man/autoDI.Rd
@@ -11,7 +11,7 @@ This function provides an automated way to fit a (limited) range of Diversity-In
 
 }
 \usage{
-autoDI(y, prop, data, block, density, treat, FG = NULL, 
+autoDI(y, prop, data, block, density, treat, ID, FG = NULL, 
        selection = c("Ftest","AIC","AICc","BIC","BICc"), 
        step0 = FALSE, step4 = TRUE)
 }
@@ -36,6 +36,18 @@ A vector of \emph{s} column names identifying the species proportions in each ro
   \item{treat}{
 %%     ~~Describe \code{prop} here~~
 The name of a column in the dataset containing the value of a treatment factor or covariate. The treatment name must be included in quotes, for example, \code{treat = "nitrogen"}. (Only one treatment or covariate is permitted in \code{autoDI}, but see \code{DI} for options involving more than one treatment.) If the treatment is a factor, the variable must already be specified as a factor prior to using \code{autoDI}. 
+}
+  \item{ID}{
+  %%     ~~Describe \code{ID} here~~
+This argument takes a text list (of length \emph{s}) dsecirbing groupings for the identity effects of the species. For example, if there are four species and you wish to group the identity effects all four species into a single term: \code{ID} could be \code{ID = c("ID1","ID1","ID1","ID1")}, where "ID1" is the name of the ID group. Similarly if the we wish to have two identity effect groups where identity effect of species 1 and 3, and species 2 and 4 are grouped together: \code{ID} could be \code{ID = c("ID1","ID2","ID1","ID2")}, where "ID1" and "ID2" are the names of the ID groups. These ideas expand to any number of species and any number or combination of groups. Finally, the ID groups do not have to be named "ID1" and "ID2", the user can specify any name for the groups.
+
+  \itemize{
+   \item If the \code{ID} argument is not specified, each species will be assumed to have a separate identity effect. 
+   
+   \item Specify an grouping for the ID does not affect the interaction terms. The interactions are still calculated using the individual species proportions.
+
+  \item The \code{ID} argument is defunct when using the \code{custom_formula} argument, since species identity effects must be included directly in the \code{custom_formula} argument. 
+}
 }
   \item{FG}{
 %%     ~~Describe \code{FG} here~~
@@ -185,6 +197,15 @@ The \code{\link{Switzerland}} dataset examples. }
                   selection = "Ftest")
   summary(auto1)
   
+## Running autoDI after grouping identity effects using the "ID" argument
+  auto2 <- autoDI(y = "yield", density = "density", 
+                  prop = c("p1","p2","p3","p4"), 
+                  treat = "nitrogen", ID = c("ID1", "ID1", "ID1", "ID1"),
+                  FG = c("G", "G", "L", "L"), data = Switzerland, 
+                  selection = "Ftest")
+  summary(auto2)
+
+  
 \donttest{
 ## Using column numbers to indicate which columns contain the proportions and including Step 0
   auto2 <- autoDI(y = "yield", density = "density", prop = 4:7, treat = "nitrogen", 
diff --git a/man/predict.DI.Rd b/man/predict.DI.Rd
index cb19fb5..e53339e 100644
--- a/man/predict.DI.Rd
+++ b/man/predict.DI.Rd
@@ -8,14 +8,20 @@ Predict Method for Diversity-Interactions (DI) Models
 Generates predictions for a fitted DI models object and, optionally, the associated standard errors for those predictions.
 }
 \usage{
-\method{predict}{DI}(object, newdata, se.fit = FALSE, type = c("link", "response"), ...)
+\method{predict}{DI}(object, newdata, se.fit = FALSE,
+interval = c("none", "confidence", "prediction"), 
+level = 0.95, weights = 1,
+type = c("link", "response", "terms"), ...)
 }
 %- maybe also 'usage' for other objects documented here.
 \arguments{
   \item{object}{a \code{\link{DI}} or \code{\link{autoDI}} model object.}
   \item{newdata}{optionally, a data frame in which to look for variables with which to predict. If omitted, predictions will be made for data used to fit the model.}
-  \item{se.fit}{logical switch indicating whether to calculate the associated standard errors}
-  \item{type}{the type of prediction required. The default is on the scale of the linear predictors; the alternative "response" is on the scale of the response variable.}
+  \item{se.fit}{A logical switch indicating whether to calculate the associated standard errors.}
+  \item{interval}{The type of interval to be calculated. Accepts one of "none", "confidence" or "prediction", with "none"" being the default.}
+  \item{level}{The level (1 - \eqn{\alpha}) at which to calculate the interval. Defaults to 0.95, giving a 95\% interval.}
+  \item{weights}{The variance weights for calculating the prediction intervals. By default all values get the same weight of 1. Can also specify a numeric vector of same length as number of rows in \code{newdata}.}
+  \item{type}{the type of prediction required. One of "link", "response" or "terms". The default "link" is on the scale of the linear predictors while "response" is on the scale of the response variable. The "terms" option returns a matrix giving the fitted values of each term in the model formula on the linear predictor scale.}
   \item{...}{further arguments passed to or from other methods. For eg: \code{dispersion} or \code{na.action} arguments from \code{\link[stats]{predict.glm}} function.}
 }
 \details{
diff --git a/tests/testthat/test-DI.R b/tests/testthat/test-DI.R
index fbe4fe9..335a654 100644
--- a/tests/testthat/test-DI.R
+++ b/tests/testthat/test-DI.R
@@ -36,14 +36,14 @@ test_that("DI works with \"E\" model", {
                density = "density", DImodel = "E",
                extra_formula = ~p1:nitrogen, data = Switzerland)
   expect_equal(mod_E$coef,
-               c(8.19821278836538, 8.52407309971615, 15.4175280362485, 11.9330267505285, 5.155459, -0.640652903945622, 0.137755987617647, 0.400343881654642),
+               c(8.5985566700200202, 8.52407309971615, 15.4175280362485392, 11.9330267505285228, 5.1554594166571261, -0.6406529039456219, 0.13775598761764632, -0.4003438816546424),
                ignore_attr = TRUE)
 
   mod_E <- DI(y = "yield", prop = 4:7, block = "nitrogen",
                density = "density", DImodel = "E",
                extra_formula = ~p1:nitrogen, data = Switzerland)
   expect_equal(mod_E$coef,
-               c(8.19821278836538, 8.52407309971615, 15.4175280362485, 11.9330267505285, 5.155459, -0.640652903945622, 0.137755987617647, 0.400343881654642),
+               c(8.5985566700200202, 8.52407309971615, 15.4175280362485392, 11.9330267505285228, 5.1554594166571261, -0.6406529039456219, 0.13775598761764632, -0.4003438816546424),
                ignore_attr = TRUE)
 
   mod_E <- DI(y = "yield", prop = 4:7, estimate_theta = T,
@@ -181,6 +181,53 @@ test_that("FG model works with theta", {
   
 })
 
+## Ensure specific models can't be fit for models with less than four species
+test_that("Correct number of species are present", {
+  data("sim1")
+  
+  sim1$p5 <- 1 - sim1$p1
+  sim1$p6 <- 1 - sim1$p1 - sim1$p2
+  
+  expect_error(DI(data = sim1, y = "response", 
+                  prop = c("p1", "p5"), 
+                  DImodel= "E"),
+               "you must have > 2 species to fit model E")
+
+  expect_error(DI(data = sim1, y = "response", 
+                  prop = c("p1", "p5"), 
+                  DImodel= "AV"),
+               "you must have > 2 species to fit model AV")
+  
+  expect_error(DI(data = sim1, y = "response", 
+                  prop = c("p1", "p5"), 
+                  DImodel= "FG",
+                  FG = c("G", "G")),
+               "you must have > 2 species to fit model FG")
+  
+  expect_error(DI(data = sim1, y = "response", 
+                  prop = c("p1", "p2", "p6"), 
+                  DImodel= "ADD"),
+               "you must have > 3 species to fit model ADD")
+
+})
+
+## Richness vs DI works
+test_that("Richness vs DI works", {
+  data("Switzerland")
+  
+  ## compare the richness model with DI alternatives
+  t1 <- richness_vs_DI(y = "yield", prop = 4:7, data = Switzerland)
+  m1 <- DI(y = "yield", prop = 4:7, data = Switzerland,
+           DImodel = "AV", estimate_theta = TRUE)
+  expect_equal(t1$coefficients, m1$coefficients)
+    
+  ## include the density effects in the linear predictors of the four models
+  t2 <- richness_vs_DI(y = "yield", prop = 4:7, data = Switzerland, extra_formula = ~ density)
+  m2 <- DI(y = "yield", prop = 4:7, data = Switzerland, DImodel = "AV",
+           extra_formula = ~ density, estimate_theta = TRUE)
+  expect_equal(t2$coefficients, m2$coefficients)
+})
+
 ## S3 methods for class DI (extract, AIC, BIC)
 test_that("S3 methods work", {
   data("Switzerland")
diff --git a/tests/testthat/test-autoDI.R b/tests/testthat/test-autoDI.R
index 5e0afbe..0eaabea 100644
--- a/tests/testthat/test-autoDI.R
+++ b/tests/testthat/test-autoDI.R
@@ -114,4 +114,4 @@ test_that("Special conditions", {
   # Drop ADD model if selection F-test and FG is specified
   expect_message(autoDI(FG = c("G","G","H"), data = sim0, y = "response", prop = 3:5),
                  regexp = "Since 'Ftest' was specified as selection criterion and functional groups were specified, dropping the ADD model as it is not nested within the FG model")
-})
\ No newline at end of file
+})
diff --git a/tests/testthat/test-dataDI.R b/tests/testthat/test-dataDI.R
index e16dfa8..d0c86d6 100644
--- a/tests/testthat/test-dataDI.R
+++ b/tests/testthat/test-dataDI.R
@@ -120,9 +120,16 @@ test_that("DI_data_prepare works", {
   expect_error(DI_data_prepare(data = dummy, prop = 1:4, y = 5),
                regexp = "One or more rows have species proportions that do not sum to 1. This must be corrected prior to analysis")
   
+  # Ensure there are no negative proportions
+  dummy <- rbind(Switzerland[, 4:8], c(-0.1, 0.9, 0.1, 0.1, 0))
+  expect_error(DI_data_prepare(data = dummy, prop = 1:4, y = 5),
+               regexp = "One or more rows have species proportions with values less than 0 or greater than 1. This must be corrected prior to analysis")
+  
+  
   # Warning if any species proportions sum to 0
   dummy <- rbind(Switzerland[, 4:8], c(0, 0, 0, 0, 0))
   expect_warning(DI_data_prepare(data = dummy, prop = 1:4, y = 5),
                regexp = "One or more rows in your dataset have ALL proportions equal to zero")
   
+  
 })
diff --git a/tests/testthat/test-predictDI.R b/tests/testthat/test-predictDI.R
index 0c4117d..41d3a36 100644
--- a/tests/testthat/test-predictDI.R
+++ b/tests/testthat/test-predictDI.R
@@ -12,16 +12,20 @@ test_that("predict function works", {
   expect_equal(predict(mod), mod$fitted.values)
   
   # If newdata is specified
+  exp_pred <- c(13.2703349718639, 13.3043419781524, 17.4404149400718, 15.3497141686398)
+  names(exp_pred) <- as.character(1:4)
   expect_equal(predict(mod, newdata = Switzerland[1:4, ]),
-               c(13.2703349718639, 13.3043419781524, 17.4404149400718, 15.3497141686398))
+               exp_pred)
   
   # Ensure SE is accurate
+  exp_se <- c(0.453997370504706, 0.453997370504706, 0.453997370504706, 0.453997370504706)
+  names(exp_se) <- as.character(1:4)
   expect_equal(predict(mod, newdata = Switzerland[1:4, ], se.fit = TRUE)$se.fit,
-               c(0.457828639867846, 0.457828639867846, 0.457828639867846, 0.457828639867846))
+               exp_se)
   
   # If only one row is specified
   expect_equal(suppressWarnings(predict(mod, newdata = Switzerland[4, ])),
-               c(15.3497141686398))
+               c("4" = 15.3497141686398))
   
   # If not all factor variables present in model are present in newdata warning will be thrown
   expect_warning(predict(mod, newdata = Switzerland[1:4, -c(2)]),
@@ -60,11 +64,13 @@ test_that("predict function works", {
                   DImodel = "FULL",
                   data = Switzerland)
   expect_equal(predict(mod_basic, newdata = Switzerland[1:4, ]),
-               c(12.8725915311115, 12.8424977926434, 17.0259686206005, 14.5222165084828))
+               c("1" = 12.8725915311115, "2" = 12.8424977926434, 
+                 "3" = 17.0259686206005, "4" = 14.5222165084828))
   
   # Predict using type = "response"
   expect_equal(predict(mod_basic, newdata = Switzerland[1:4, ], type = "response"),
-               c(12.8725915311115, 12.8424977926434, 17.0259686206005, 14.5222165084828))
+               c("1" = 12.8725915311115, "2" = 12.8424977926434, 
+                 "3" = 17.0259686206005, "4" = 14.5222165084828))
   
   # Predict for when additional treatment is numeric
   swiss_num_treat <- Switzerland
@@ -155,7 +161,146 @@ test_that("Prediction works for all interaction structures", {
                ignore_attr = TRUE)
 })
 
+test_that("Prediction works with grouped ID effects", {
+  # Ensuring predictions work for all interaction structures
+  data("Switzerland")
+  swiss_num_treat <- Switzerland
+  swiss_num_treat$nitrogen <- as.numeric(swiss_num_treat$nitrogen)
+  
+  # E model
+  mod_int <- DI(y = "yield", prop = 4:7, treat = "nitrogen",
+                density = "density", DImodel = "E",
+                ID = c("I1","I1", "I1", "I1"),
+                theta = 0.5,
+                extra_formula = ~ p1:nitrogen, 
+                data = swiss_num_treat)
+  
+  # If not all factor variables present in model are present in newdata warning will be thrown
+  expect_equal(predict(mod_int, newdata = swiss_num_treat[1:3, ]),
+               c(13.5089908, 15.7958394, 15.7958394),
+               ignore_attr = TRUE)
+  
+  # AV model
+  mod_int <- DI(y = "yield", prop = 4:7, treat = "nitrogen",
+                density = "density", DImodel = "AV",
+                ID = c("I1","I2", "I2", "I1"),
+                estimate_theta = TRUE,
+                extra_formula = ~ p1:nitrogen, 
+                data = swiss_num_treat)
+  
+  # If not all factor variables present in model are present in newdata warning will be thrown
+  expect_equal(predict(mod_int, newdata = swiss_num_treat[1:4, ]),
+               c(13.2287655, 15.5557315, 15.5557315, 15.4019903),
+               ignore_attr = TRUE)
+  
+  # ADD model
+  mod_int <- DI(y = "yield", prop = 4:7, treat = "nitrogen",
+                density = "density", DImodel = "ADD",
+                ID = c("I1","I2", "I3", "I4"),
+                estimate_theta = TRUE,
+                extra_formula = ~ I1:nitrogen, 
+                data = swiss_num_treat)
+  
+  # If not all factor variables present in model are present in newdata warning will be thrown
+  expect_equal(predict(mod_int, newdata = swiss_num_treat[1:4, ]),
+               c(13.485657, 13.449836, 17.641787, 15.021399),
+               ignore_attr = TRUE)
+  
+  # FG model
+  mod_int <- DI(y = "yield", prop = 4:7, treat = "nitrogen",
+                density = "density", DImodel = "FG",
+                FG = c("G", "G", "H", "H"),
+                ID = c("I1","I1", "I2", "I2"),
+                estimate_theta = TRUE,
+                extra_formula = ~ p1:nitrogen, 
+                data = Switzerland)
+  
+  # If not all factor variables present in model are present in newdata warning will be thrown
+  expect_equal(predict(mod_int, newdata = Switzerland[1:6, ]),
+               c(13.5117704, 13.5316355, 16.3710272, 16.3710272, 16.7358524, 14.2027248),
+               ignore_attr = TRUE)
+  
+  # FULL model
+  mod_int <- DI(y = "yield", prop = 4:7, treat = "nitrogen",
+                density = "density", DImodel = "FULL",
+                ID = c("I3","I3", "I3", "I3"),
+                estimate_theta = TRUE,
+                extra_formula = ~ p1:nitrogen, 
+                data = swiss_num_treat)
+  
+  # If not all factor variables present in model are present in newdata warning will be thrown
+  expect_equal(predict(mod_int, newdata = swiss_num_treat[1:4, ]),
+               c(13.5538097, 14.3899227, 17.4391819, 15.1266995),
+               ignore_attr = TRUE)
+  
+  # ID model
+  mod_int <- DI(y = "yield", prop = 4:7, treat = "nitrogen",
+                density = "density", DImodel = "ID",
+                ID = c("I3","I3", "I3", "I3"),
+                estimate_theta = TRUE, extra_formula = ~ p1:nitrogen, 
+                data = swiss_num_treat)
+  
+  # If not all factor variables present in model are present in newdata warning will be thrown
+  expect_equal(predict(mod_int, newdata = swiss_num_treat[1, ]),
+               c(12.3940615),
+               ignore_attr = TRUE)
+})
+
+# Testing CI and PI work
+
+test_that("contrasts function works", {
+  data("sim2")
+  
+  mod <- DI(y = "response", DImodel = "FULL",
+            data = sim2, prop = 3:6)
+  mod_lm <- lm(response ~ 0 + (p1 + p2 + p3 + p4)^2, data = sim2)
+  
+  # Base prediction in same
+  expect_equal(predict(mod), predict(mod_lm))
+  
+  # CI is same
+  expect_equal(predict(mod, interval = "conf"),
+               predict(mod_lm, interval = "conf"))
+  
+  # PI is same
+  expect_equal(predict(mod, interval = "pred", level = 0.9),
+               suppressWarnings(predict(mod_lm, interval = "pred", level = 0.9)))
+  
+  
+  # PI with se is same
+  DI_pred <- predict(mod, interval = "pred", se.fit = TRUE)
+  lm_pred <- suppressWarnings(predict(mod_lm, interval = "pred", se.fit = TRUE))
+  expect_equal(as.numeric(DI_pred$se.fit), lm_pred$se.fit)
+  expect_equal(DI_pred$fit, lm_pred$fit)
+  
+  # CI and PI with newdata work
+  DI_pred <- predict(mod, newdata = sim2[1:5, ], interval = "conf", se.fit = TRUE)
+  lm_pred <- suppressWarnings(predict(mod_lm, newdata = sim2[1:5, ], interval = "conf", se.fit = TRUE))
+  expect_equal(as.numeric(DI_pred$se.fit), as.numeric(lm_pred$se.fit))
+  expect_equal(DI_pred$fit, lm_pred$fit)
+  
+  DI_pred <- predict(mod, newdata = sim2[1:5, ], interval = "pred", se.fit = TRUE)
+  lm_pred <- suppressWarnings(predict(mod_lm, newdata = sim2[1:5, ], interval = "pred", se.fit = TRUE))
+  expect_equal(as.numeric(DI_pred$se.fit), as.numeric(lm_pred$se.fit))
+  expect_equal(DI_pred$fit, lm_pred$fit)
+  
+  DI_pred <- predict(mod, newdata = sim2[1:5, ], interval = "none", se.fit = TRUE)
+  lm_pred <- suppressWarnings(predict(mod_lm, newdata = sim2[1:5, ], interval = "none", se.fit = TRUE))
+  expect_equal(as.numeric(DI_pred$se.fit), as.numeric(lm_pred$se.fit))
+  expect_equal(DI_pred$fit, lm_pred$fit)
+  
+  # Ensure response type = "terms" works
+  expect_equal(as.vector(predict(mod_lm, type = "terms")),
+               as.vector(predict(mod, type = "terms")))
+  
+  DI_pred <- predict(mod, type = "terms", se.fit = TRUE)
+  lm_pred <- predict(mod_lm, type = "terms", se.fit = TRUE)
+  expect_equal(as.vector(DI_pred$fit), as.vector(lm_pred$fit))
+  expect_equal(as.vector(DI_pred$se.fit), as.vector(lm_pred$se.fit))
+
+})
 
+# Testing contrast function
 test_that("contrasts function works", {
   data("Switzerland")
   
@@ -171,11 +316,11 @@ test_that("contrasts function works", {
   
   # Mandatory to specify either constrast_vars or contrast
   expect_error(contrasts_DI(mod), 
-               regexp = "Provide either one of contrast_vars or constrast")
+               regexp = "Provide either one of `contrast_vars` or `constrast`")
   
   # Can't specify both contrast_vars and contrast
-  expect_error(contrasts_DI(mod, contrast_vars = 0, contrast = 0),
-               regexp = "Provide only one of contrast_vars or constrast")
+  expect_warning(contrasts_DI(mod, contrast_vars = 0, contrast = rep(0, 8)),
+                 regexp = "Provide only one of `contrast_vars` or `constrast`")
   
   # Ensure contrast vector is of appropriate type
   expect_error(contrasts_DI(mod, contrast = c("1", "-1", "0", "0")),
@@ -195,7 +340,7 @@ test_that("contrasts function works", {
   
   # Ensure contrast_vars are specified as a list
   expect_error(contrasts_DI(mod, contrast_vars = c("density" = c(-0.25, 0.25, 0.25, -0.25))),
-               regexp = "Contrast variables should be specified as a nested list")
+               regexp = "Contrast variables should be specified as a nested named list")
   
   # Ensure user specifies variables present in model in contrast_vars
   expect_error(contrasts_DI(mod, contrast_vars = list(p5 = c(0, 1))),
@@ -227,11 +372,11 @@ test_that("contrasts function works", {
                ignore_attr = TRUE)
   
   # Using contrast_vars
-  the_C <- matrix(c(1, 0, 0, 0, 0, 0, 0, 0), nrow = 1)
+  the_C <- matrix(c(0, 0, 0, 0, 0, 1, 0, 0), nrow = 1)
   colnames(the_C) <- names(mod$coefficients[1:8])
-  expect_equal(contrasts_DI(mod, contrast_vars = list("p1" = c(1))),
+  expect_equal(contrasts_DI(mod, contrast_vars = list("nitrogen" = list("50v150"  = c(1, -1)))),
                multcomp::glht(mod, linfct = the_C , 
                               coef = mod$coef[1:8], vcov = vcov(mod)),
                ignore_attr = TRUE)
   
-})
\ No newline at end of file
+})
diff --git a/vignettes/DImodels.Rmd b/vignettes/DImodels.Rmd
index c0b3dd8..8e31305 100644
--- a/vignettes/DImodels.Rmd
+++ b/vignettes/DImodels.Rmd
@@ -22,7 +22,19 @@ library(DImodels)
 
 The `DImodels` package is designed to make fitting Diversity-Interactions models easier. Diversity-Interactions (DI) models (Kirwan et al 2009) are a set of tools for analysing and interpreting data from experiments that explore the effects of species diversity (from a pool of *S* species) on community-level responses. Data suitable for DI models will include (at least) for each experimental unit: a response recorded at a point in time, and a set of proportions of *S* species  $p_1$, $p_2$, ..., $p_S$ from a point in time prior to the recording of the response. The proportions sum to 1 for each experimental unit.   
 
-__Main changes in the package from version 1.0 to version 1.1 and newer__
+
+__Main changes in the package from version 1.2 to version 1.3__
+
+- The `DI` and `autoDI` functions now have an additional parameter called `ID` which enables the user to group the species identity effects (see examples below).
+- The `predict` function now has flexibility to calculate confidence and prediction intervals for the predicted values.
+
+__Main changes in the package from version 1.1 to version 1.2__
+
+- There are two new functions added to the package:
+     - `predict`: Make predictions from a fitted DI model without having to worry about theta, and the interaction terms in the data.
+     - `contrasts_DI`: Create contrasts for a DI model.
+     
+__Main changes in the package from version 1.0 to version 1.1__
 
 - `DI_data_prepare` is now superseded by `DI_data` (see examples below)
 
@@ -158,6 +170,36 @@ m1_theta <- update_DI(object = m1, estimate_theta = TRUE)
 coef(m1_theta)
 ```
 
+### Grouping the species identity effects in the model
+The species identity effects in a DI model can be grouped by specifying groups for each species using the `ID` argument.
+The `ID` argument functions similar to the `FG` argument and accepts a character list of same length as number of species in the model. The identity effects of species belonging in the same group will be grouped together.
+
+Grouping all identity effects into a single term
+```{r, echo = TRUE}
+m1_group <- update_DI(object = m1_theta, 
+                      ID = c("ID1", "ID1", "ID1", "ID1", "ID1",
+                             "ID1", "ID1", "ID1", "ID1"))
+coef(m1_group)
+```
+
+Grouping identity effects of specific species
+```{r, echo = TRUE}
+m1_group2 <- update_DI(object = m1_theta, 
+                       ID = c("ID1", "ID1", "ID1", 
+                              "ID2", "ID2", "ID2", 
+                              "ID3", "ID3", "ID3"))
+coef(m1_group2)
+```
+
+Note: Grouping ID effects will not have an effect on the calculation of the interaction effects, they would still be calculated by using all species.
+
+Read the documentation of `DI` and `autoDI` for more information and examples using the `ID` parameter.
+```{r, eval = FALSE}
+?DI
+?autoDI
+```
+
+
 ## Fitting customised models using the `DI` function
 
 There are two ways to fit customised models using `DI`; the first is by using the option `DImodel = ` in the `DI` function and adding the argument `extra_formula = ` to it, and the second is to use the `custom_formula` argument in the `DI` function. If species interaction variables (e.g., the FG interactions or the average pairwise interaction) are included in either `extra_formula` or `custom_formula`, they must first be created and included in the dataset. The function `DI_data` can be used to compute several types of species interaction variables. 
@@ -210,6 +252,83 @@ m3 <- DI(y = "response",
 summary(m3)
 ```
 
+## Making predictions and testing contrasts for DI models
+### Predictions using a DI model
+
+We can make predictions from a DI model just like any other regression model using the `predict` function. The user does not need to worry about adding any interaction terms or adjusting any columns if theta is not equal to 1. Only the species proportions along with any additional experimental structures is needed and all other terms in the model will be calculated for the user.  
+```{r, echo = TRUE}
+# Fit model
+m3 <- DI(y = "response", prop = 4:12, 
+         treat = "treatment", DImodel = "AV", 
+         extra_formula = ~ (AV) : treatment, data = sim3a)
+
+predict_data <- sim3[c(1, 79, 352), 3:12]
+# Only species proportions and treatment is needed
+print(predict_data)
+# Make prediction
+predict(m3, newdata = predict_data)
+```
+
+### Uncertainity around predictions
+```{r}
+# The interval and level parameters can be used to calculate the 
+# uncertainty around the predictions
+
+# Get confidence interval around prediction
+predict(m3, newdata = predict_data, interval = "confidence")
+
+# Get prediction interval around prediction
+predict(m3, newdata = predict_data, interval = "prediction")
+
+# The function returns a 95% interval by default, 
+# this can be changed using the level argument
+predict(m3, newdata = predict_data, 
+        interval = "prediction", level = 0.9)
+```
+
+### Contrasts for DI models
+The `contrasts_DI` function can be used to compare and formally test for a difference in performance of communities within the same as well as across different experimental structures
+
+Comparing the performance of the monocultures of different species at treatment A
+```{r, echo = TRUE}
+contr <- list("p1vsp2" = c(1, -1, 0, 0,  0,  0, 0, 0,  0, 0, 0, 0),
+              "p3vsp5" = c(0,  0, 1, 0, -1,  0, 0, 0,  0, 0, 0, 0),
+              "p4vsp6" = c(0,  0, 0, 1,  0, -1, 0, 0,  0, 0, 0, 0),
+              "p7vsp9" = c(0,  0, 0, 0,  0,  0, 1, 0, -1, 0, 0, 0))
+the_C <- contrasts_DI(m3, contrast = contr)
+summary(the_C)
+```
+
+Comparing across the two treatment levels for monoculture of species 1
+```{r, echo = TRUE}
+contr <- list("treatAvsB" = c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0))
+the_C <- contrasts_DI(m3, contrast = contr)
+summary(the_C)
+```
+
+Comparing between two species mixtures
+```{r, echo = TRUE}
+mixA <- c(0.25, 0,      0.25, 0,      0.25, 0,      0.25, 0, 0, 0, 0, 0)
+mixB <- c(0,    0.3333, 0,    0.3333, 0,    0.3333, 0,    0, 0, 0, 0, 0)
+
+# We have the proportions of the individual species in the mixtures, however
+# we still need to calculate the interaction effect for these communities
+contr_data <- data.frame(rbind(mixA, mixB))
+colnames(contr_data) <- names(coef(m3))
+
+# Adding the interaction effect of the two mixtures
+contr_data$AV <- DI_data_E_AV(prop = 1:9, data = contr_data)$AV
+print(contr_data)
+
+# We can now subtract the respective values in each column of the two 
+# mixtures and get our contrast
+my_contrast <- as.matrix(contr_data[1, ] - contr_data[2, ])
+rownames(my_contrast) <- "mixAvsB"
+
+the_C <- contrasts_DI(m3, contrast = my_contrast)
+summary(the_C)
+```
+
 
 ## References
 Connolly J, T Bell, T Bolger, C Brophy, T Carnus, JA Finn, L Kirwan, F Isbell, J Levine, A Lüscher, V Picasso, C Roscher, MT Sebastia, M Suter and A Weigelt (2013) An improved model to predict the effects of changing biodiversity levels on ecosystem function. Journal of Ecology, 101, 344-355.