From 8dc1c1f5dcdbcd44338c1cdbac5a221a29a6b6df Mon Sep 17 00:00:00 2001
From: Rishabh Vishwakarma <rishabhvish98@gmail.com>
Date: Wed, 24 Jan 2024 16:37:02 +0000
Subject: [PATCH] DImodels v1.3.1

---
 DESCRIPTION                     |  7 ++-
 NAMESPACE                       |  3 ++
 R/DI.R                          | 71 ++++++++++++++++++++++++------
 R/autoDI.R                      | 25 ++++++-----
 R/dataDI.R                      |  6 +--
 R/predictDI.R                   | 78 ++++++++++++++++++++++++++++++---
 README.Rmd                      |  6 +++
 README.md                       | 29 ++++--------
 man/describe_model.Rd           | 58 ++++++++++++++++++++++++
 man/sim4.Rd                     |  6 +--
 tests/testthat/test-DI.R        | 43 +++++++++++++++++-
 tests/testthat/test-predictDI.R | 36 ++++++++++++++-
 vignettes/DImodels.Rmd          |  5 +++
 13 files changed, 310 insertions(+), 63 deletions(-)
 create mode 100644 man/describe_model.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 9d4b69d..87de142 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,14 +1,13 @@
 Package: DImodels
 Type: Package
 Title: Diversity-Interactions (DI) Models
-Version: 1.3
-Date: 2023-05-20
+Version: 1.3.1
+Date: 2023-11-22
 Authors@R: c(person("Rafael", "de Andrade Moral", role = c("aut", "cre"), email = "rafael.deandrademoral@mu.ie"), person("John", "Connolly", role = "aut"), person("Rishabh", "Vishwakarma", role = "ctb"), person("Caroline", "Brophy", role = "aut"))
 Author: Rafael de Andrade Moral [aut, cre], John Connolly [aut], Rishabh Vishwakarma [ctb], Caroline Brophy [aut]
 Maintainer: Rafael de Andrade Moral <rafael.deandrademoral@mu.ie>
-Imports: methods, graphics, stats, utils, hnp, rootSolve, multcomp
+Imports: methods, graphics, stats, utils, ggplot2, hnp, rootSolve, multcomp
 Suggests: 
-    ggplot2,
     knitr,
     rmarkdown,
     markdown,
diff --git a/NAMESPACE b/NAMESPACE
index e0fb30a..f1c0700 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -4,9 +4,11 @@ import(stats)
 import(utils)
 import(hnp)
 import(rootSolve)
+importFrom(ggplot2, "fortify")
 
 
 export(autoDI)
+export(describe_model)
 export(DI_data, DI_data_prepare, DI_data_E_AV, DI_data_ADD, DI_data_FG, DI_data_fullpairwise)
 export(DI)
 export(theta_CI)
@@ -17,6 +19,7 @@ export(contrasts_DI)
 export(richness_vs_DI)
 
 S3method(predict, DI)
+S3method(fortify, DI)
 
 S3method(extract, DI)
 S3method(extract, autoDI)
diff --git a/R/DI.R b/R/DI.R
index 1e3fa02..ccfe226 100644
--- a/R/DI.R
+++ b/R/DI.R
@@ -11,6 +11,10 @@ DI <- function(y, prop, DImodel, custom_formula, data,
   
   # ensuring model tag is a string
   if(!missing(DImodel)) {
+    if(!DImodel %in% c("STR", "ID", "AV", "E", "FG", "ADD", "FULL")){
+      stop(paste0("`DImodel` should be one of ", dQuote("STR"),", ", dQuote("ID"), ", ", dQuote("AV"), ", ",
+                  dQuote("E"), ", ", dQuote("FG"), ", ", dQuote("ADD"), ", ", "or ", dQuote("FULL")))
+    }
     find_input <- try(DImodel, silent = TRUE)
     if(inherits(find_input, "try-error")) {
       DImodel <- paste0(substitute(DImodel))
@@ -41,21 +45,22 @@ DI <- function(y, prop, DImodel, custom_formula, data,
   family <- "gaussian"
   total <- NULL
   # family lock
-  if(!(family %in% c("gaussian","normal")))
-    stop("As of version ", packageVersion("DImodels"),
-         " DI models are implemented for family = 'gaussian' (= 'normal') only")
+  # if(!(family %in% c("gaussian","normal")))
+  #   stop("As of version ", packageVersion("DImodels"),
+  #        " DI models are implemented for family = 'gaussian' (= 'normal') only")
   # if model is not specified => it is a CUSTOM model
   if(missing(DImodel)) DImodel <- "CUSTOM"
   # default family set as normal
   if(missing(family) || family == "normal") family <- "gaussian"
+  # Commented for now as not used
   # checks for binomial
-  if(family %in% c("binomial","quasibinomial")) {
-    if(missing(total)) {
-      if(any(!(data[,y] %in% c(0,1)))) {
-        stop("total must be informed for non-binary discrete proportion data")
-      } else total <- 1
-    } else total <- data[,total]
-  } else total <- NULL
+  # if(family %in% c("binomial","quasibinomial")) {
+  #   if(missing(total)) {
+  #     if(any(!(data[,y] %in% c(0,1)))) {
+  #       stop("total must be informed for non-binary discrete proportion data")
+  #     } else total <- 1
+  #   } else total <- data[,total]
+  # } else total <- NULL
   
   # checks for extra_formula
   if(!missing(extra_formula) & !missing(custom_formula)) {
@@ -75,7 +80,8 @@ DI <- function(y, prop, DImodel, custom_formula, data,
   }
   if(treat_flag & length(grep("treat", DImodel)) == 1)
     stop("you must supply the treat argument to fit model ", DImodel)
-  
+  # ID is null if missing
+  if(missing(ID)) ID <- NULL
   # save FG names
   if(!missing(FG)) FGnames <- FG
   # NULL flag for FG variable
@@ -113,9 +119,9 @@ DI <- function(y, prop, DImodel, custom_formula, data,
     data_obj <- DI_data_prepare(y = y, block = block, density = density, prop = prop, treat = treat, ID = ID, FG = FG, data = data, theta = theta)
     newdata <- data_obj$newdata
     nSpecies <- data_obj$nSpecies
-    if(data_obj$P_int_flag & DImodel == "ADD") {
-      stop("you must have > 3 species to fit model ", DImodel, " (", namesub_DI(DImodel), ")")
-    }
+    # if(data_obj$P_int_flag & DImodel == "ADD") {
+    #   stop("you must have > 3 species to fit model ", DImodel, " (", namesub_DI(DImodel), ")")
+    # }
     ## fitting the DI model
   
     ## adding the DI_ prefix
@@ -134,6 +140,7 @@ DI <- function(y, prop, DImodel, custom_formula, data,
                              ID = data_obj$ID, treat = data_obj$treat, data = newdata, family = family, extra_formula = extra_formula,
                              estimate_theta = estimate_theta, nSpecies = nSpecies, total = total)
     }
+    
   }
   if(!estimate_theta) {
     the_DI_model <- model_fit$model
@@ -152,6 +159,14 @@ DI <- function(y, prop, DImodel, custom_formula, data,
   }
   # RV change: Adding original data to model object
   the_DI_model$original_data <- data
+  # Add attributes to help with post-processing of model
+  attr(the_DI_model, "DImodel") <- DImodel
+  attr(the_DI_model, "y") <- if(DImodel == "CUSTOM") NULL else data_obj$y
+  attr(the_DI_model, "prop") <- if(DImodel == "CUSTOM") NULL else data_obj$prop
+  attr(the_DI_model, "FG") <- if(DImodel == "CUSTOM") NULL else FG
+  attr(the_DI_model, "ID") <- if(DImodel == "CUSTOM") NULL else data_obj$ID_map
+  attr(the_DI_model, "theta_flag") <- if(DImodel == "CUSTOM") NULL else estimate_theta
+  attr(the_DI_model, "theta_val") <- if(DImodel == "CUSTOM") NULL else the_DI_model$coef["theta"]
   #the_DI_model$aic <- AIC2(the_DI_model)
   class(the_DI_model) <- c("DI", "glm", "lm")
   return(the_DI_model)
@@ -796,6 +811,34 @@ DI_FULL_treat <- function(y, block, density, prop, treat, FG, ID, data, family,
   return(modlist)
 }
 
+describe_model <- function(model){
+  if(!inherits(model, "DI")){
+    stop("`model` should be regression object of class <DI> fit using the `DI()` or `autoDI()` functions.")
+  }
+  
+  DImodel <- attr(model, "DImodel")
+  
+  if(DImodel == "CUSTOM"){
+     description <- c("This is a custom DI model.")
+  } else if(DImodel == "STR"){
+    model$model
+    description <- paste0("This model doesn't have any species identity or interaction effects and only consists of experimental structures.")
+  } else {
+    choices <- c("ID" = " there being no interaction terms in the model.",
+                 "AV" = " the Average interaction structure.",
+                 "E" = " the Evenness interaction structure.",
+                 "FG" = paste0(" the Functional group interaction structure with FG values of (", paste0(attr(model, "FG"), collapse = ", "), ")."),
+                 "ADD" = " the Additive species interaction structure.",
+                 "FULL" = " the Full pairwise interaction structure.")
+    int_str <- choices[names(choices) == DImodel]
+    ID_text <- if (all(paste0(attr(model, "prop"), "_ID") == (attr(model, "ID")))) "" else paste0("The species identity effects are grouped as (", paste0(attr(model, "ID"), collapse = ", "),").")
+    theta_text <- if(!is.na(attr(model, "theta_val"))) paste0(" The interaction terms have an associated theta (exponent on the interactions) value of '", round(attr(model, "theta_val"), 2), "'.") else ""
+    description <- paste0("This model has ", length(attr(model, "prop")), " species with", int_str, theta_text,
+                          ID_text)
+  }
+  return(description)
+}
+
 #################################
 ## S3 methods for DI class ##
 #################################
diff --git a/R/autoDI.R b/R/autoDI.R
index ddd3137..e77d5fd 100644
--- a/R/autoDI.R
+++ b/R/autoDI.R
@@ -24,9 +24,9 @@ autoDI <- function(y, prop, data, block, density, treat, ID, FG = NULL,
   family <- "gaussian"
   total <- NULL
   # family lock
-  if(!(family %in% c("gaussian","normal")))
-    stop("As of version ", packageVersion("DImodels"),
-         " DI models are implemented for family = 'gaussian' (= 'normal') only")
+  # if(!(family %in% c("gaussian","normal")))
+  #   stop("As of version ", packageVersion("DImodels"),
+  #        " DI models are implemented for family = 'gaussian' (= 'normal') only")
   
   # set theta flag
   estimate_theta <- TRUE
@@ -35,16 +35,17 @@ autoDI <- function(y, prop, data, block, density, treat, ID, FG = NULL,
   # default family set as normal
   if(missing(family) || family == "normal") family <- "gaussian"
   # checks for binomial
-  if(family %in% c("binomial","quasibinomial")) {
-    if(missing(total)) {
-      if(any(!(data[,y] %in% c(0,1)))) {
-        stop("total must be informed for non-binary discrete proportion data")
-      } else total <- 1
-    } else total <- data[,total]
-  } else total <- NULL
+  # Commented for now since not used 
+  # if(family %in% c("binomial","quasibinomial")) {
+  #   if(missing(total)) {
+  #     if(any(!(data[,y] %in% c(0,1)))) {
+  #       stop("total must be informed for non-binary discrete proportion data")
+  #     } else total <- 1
+  #   } else total <- data[,total]
+  # } else total <- NULL
   # checks for quasi and information criteria
-  if(family %in% c("quasipoisson","quasibinomial") & selection %in% c("AIC","AICc","BIC","BICc"))
-    stop("cannot compute information criteria for quasi models, use selection = 'Ftest'")
+  # if(family %in% c("quasipoisson","quasibinomial") & selection %in% c("AIC","AICc","BIC","BICc"))
+  #   stop("cannot compute information criteria for quasi models, use selection = 'Ftest'")
   # flags if block/density and/or treat are missing
   if(missing(block)) block <- NA
   if(missing(density)) density <- NA
diff --git a/R/dataDI.R b/R/dataDI.R
index 9464774..a3bb267 100644
--- a/R/dataDI.R
+++ b/R/dataDI.R
@@ -1,4 +1,4 @@
-DI_data_prepare <- function(y, block, density, prop, treat, ID, FG = NULL, data, theta = 1) {
+DI_data_prepare <- function(y, block, density, prop, treat, ID = NULL, FG = NULL, data, theta = 1) {
   if(any(is.na(data))) {
     stop("The dataset contains missing values. Please remove them prior to analysis.") 
   }
@@ -101,7 +101,7 @@ DI_data_prepare <- function(y, block, density, prop, treat, ID, FG = NULL, data,
   
   ###### RV change########
   # Grouping ID effects
-  if(missing(ID)){
+  if(is.null(ID)){
     ID <- paste0(prop, "_ID")
   } 
   ID_name_check(ID = ID, prop = prop, FG = FG_names)
@@ -111,7 +111,7 @@ DI_data_prepare <- function(y, block, density, prop, treat, ID, FG = NULL, data,
   
   ## return object
   return(list("newdata" = newdata, "y" = y, "block" = block, density = density,
-              "prop" = prop, "ID" = unique(ID), "treat" = treat, "FG" = FG,
+              "prop" = prop, "ID" = unique(ID), ID_map = ID, "treat" = treat, "FG" = FG,
               "P_int_flag" = P_int_flag, "even_flag" = even_flag, "nSpecies" = length(prop)))
 }
 
diff --git a/R/predictDI.R b/R/predictDI.R
index e0d3f15..d3e123a 100644
--- a/R/predictDI.R
+++ b/R/predictDI.R
@@ -136,9 +136,9 @@ predict.DI <- function (object, newdata, se.fit = FALSE,
         
         if (is.numeric(original_data[, covariate])){
           if ( !(covariate %in% colnames(updated_newdata))){
-            warning(paste0(names(structures[structures == covariate]), ' not supplied in newdata. Calculating for \'', covariate,
-                           '\' = ' , 0))
-            updated_newdata[, covariate] <- 0
+            warning(paste0(names(structures[structures == covariate]), ' not supplied in newdata. Calculating the prediction for the median value of (', median(original_data[, covariate]),') of \'', covariate,
+                           '\' from the training data.'))
+            updated_newdata[, covariate] <- median(original_data[, covariate])
           }
         } else {
           # Levels of factor covariate in original data
@@ -152,7 +152,7 @@ predict.DI <- function (object, newdata, se.fit = FALSE,
           
           # If levels of covariate in newdata not matching ones in original data, stop prediction
           if (! (all(unique(updated_newdata[, covariate]) %in% covariate_levels, na.rm = TRUE))){
-            stop(paste0('Values for ', covariate,' given were not present in raw data used for fitting. Predictions can\'t be made for these values.'))
+            stop(paste0('Values for ', covariate,' given were not present in training data used for fitting. Predictions can\'t be made for these values.'))
           }
           
           # If covariate is supplied as character or numeric, converting to factor
@@ -169,9 +169,39 @@ predict.DI <- function (object, newdata, se.fit = FALSE,
     extra_formula <- eval(object$DIcall$extra_formula)
     
     if (! is.null(extra_formula)){
+      # If any column from extra_formula is missing in updated_newdata
+      e <- try(model.frame(terms(extra_formula), updated_newdata), silent = TRUE)
+      if(inherits(e, "try-error")){
+        extra_vars <- model.frame(terms(extra_formula), original_data)
+        for (covariate in colnames(extra_vars)){
+          if(!covariate %in% colnames(updated_newdata)){
+            if(is.numeric(extra_vars[, covariate])){
+              warning(paste0(names(extra_vars[, covariate]), ' not supplied in newdata. Calculating the prediction for the median value (', median(extra_vars[, covariate]),') of \'', covariate,
+                             '\' from the training data.'))
+              updated_newdata[, covariate] <- median(extra_vars[, covariate])
+            } else {
+              # Levels of factor covariate in original data
+              covariate_levels <- as.factor(unique(extra_vars[, covariate]))
+              # If covariate isn't present in newdata, estimating for base level
+              if ( !(covariate %in% colnames(updated_newdata))){
+                warning(paste0(names(structures[structures == covariate]), ' not supplied in newdata. Calculating for \'', covariate,
+                               '\' = ' , levels(covariate_levels)[1]))
+                updated_newdata[, covariate] <- levels(covariate_levels)[1]
+              }
+              
+              # If covariate is supplied as character or numeric, converting to factor
+              if (!is.factor(updated_newdata[, covariate])){
+                updated_newdata[, covariate] <- factor(updated_newdata[, covariate],
+                                                       levels = levels(covariate_levels))
+              }
+            }
+          }
+        }
+      }
       
       extra_data <- model.frame(terms(extra_formula), updated_newdata)
       
+      
       # Matching factors in extra_formula to ones in original_data
       og_factors <- original_data[, sapply(original_data, function(x){is.factor(x) | is.character(x)})]
       common_factors <- intersect(colnames(extra_data), colnames(og_factors))
@@ -284,14 +314,29 @@ predict.DI <- function (object, newdata, se.fit = FALSE,
     #   ret_obj <- y_hat
     # }
     
+    # FG model was failing to give predictions this fixes it
+    if(DImodel_tag == "FG" && is.null(extra_formula)){
+      if(only_one_row){
+        X_new <- rbind(X_new, X_new)
+        X_new$FG_ <- extra_variables
+        X_new <- X_new[1, ]
+      } else {
+        X_new$FG_ <- extra_variables  
+      }
+    }
+    
+    # Prediction gets messy if we have extra_formula 
+    # So manaully making prediction using predict.lm
     if (! is.null(extra_formula)){
       # Terms <- delete.response(terms(glm_fit))
+      # predict.lm because this is what is called by predict.glm internally
       ret_obj <- suppressWarnings(predict.lm(glm_fit, newdata = updated_newdata,
                                              se.fit = ifelse(interval != "none", TRUE, se.fit), 
                                              type = ifelse(type == "link", "response", type), ...))
     } else {
       # Terms <- delete.response(terms(formula(fmla)))
-      ret_obj <- suppressWarnings(predict.glm(object, newdata = X_new, 
+      
+      ret_obj <- suppressWarnings(predict.glm(object, newdata = if(DImodel_tag == "STR") updated_newdata else X_new, 
                                               se.fit = ifelse(interval != "none", TRUE, se.fit),
                                               type = type, ...))
     }
@@ -328,6 +373,11 @@ predict.DI <- function (object, newdata, se.fit = FALSE,
     }
   }
   
+  # Sometimes the prediction could be rank-deficient and lm adds this attribute
+  # which could be scary. So drop it
+  if(!is.null(attr(ret_obj, "non-estim"))){
+    attr(ret_obj, "non-estim") <- NULL
+  }
   return(ret_obj)
 }
 
@@ -445,4 +495,22 @@ contrasts_DI <- function(object, contrast, contrast_vars, ...){
 
 is_near <- function (x, y, tol = .Machine$double.eps^0.5) {
   abs(x - y) < tol
+}
+
+# Fortify method for model diagnostics
+fortify.DI <- function(model, data = model$model, ...){
+  # Add proportions to data
+  prop_idx <- eval(model$DIcall$prop)
+  prop <- model$data[, prop_idx]
+  data <- cbind(data, prop)
+    
+  # Add other statistics
+  infl <- stats::influence(model, do.coef = FALSE)
+  data$.hat <- infl$hat
+  data$.sigma <- infl$sigma
+  data$.cooksd <- stats::cooks.distance(model, infl)
+  data$.fitted <- stats::predict(model)
+  data$.resid <- stats::resid(model)
+  data$.stdresid <- stats::rstandard(model, infl)
+  return(data)
 }
\ No newline at end of file
diff --git a/README.Rmd b/README.Rmd
index 44fe864..85c482b 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -26,6 +26,12 @@ knitr::opts_chunk$set(
 The `DImodels` package is designed to make fitting Diversity-Interactions models easier. Diversity-Interactions (DI) models (Kirwan et al 2009) are a set of tools for analysing and interpreting data from experiments that explore the effects of species diversity (from a pool of *S* species) on community-level responses. Data suitable for DI models will include (at least) for each experimental unit: a response recorded at a point in time, and a set of proportions of *S* species  $p_1$, $p_2$, ..., $p_S$ from a point in time prior to the recording of the response. The proportions sum to 1 for each experimental unit.   
 
 
+__Main changes in the package from version 1.3 to version 1.3.1__
+
+- A `forify` function method has been added to supplement the data fitted to a linear model with model fit statistics.
+- A `describe_model` function is added which can be used to get a short text summary of any DI model.
+- Meta-data about a DI model can be accessed via the `attributes` function. 
+
 __Main changes in the package from version 1.2 to version 1.3__
 
 - The `DI` and `autoDI` functions now have an additional parameter called `ID` which enables the user to group the species identity effects (see examples below).
diff --git a/README.md b/README.md
index 47414a0..a61d99a 100644
--- a/README.md
+++ b/README.md
@@ -24,6 +24,15 @@ species $p_1$, $p_2$, …, $p_S$ from a point in time prior to the
 recording of the response. The proportions sum to 1 for each
 experimental unit.
 
+**Main changes in the package from version 1.3 to version 1.3.1**
+
+- A `forify` function method has been added to supplement the data
+  fitted to a linear model with model fit statistics.
+- A `describe_model` function is added which can be used to get a short
+  text summary of any DI model.
+- Meta-data about a DI model can be accessed via the `attributes`
+  function.
+
 **Main changes in the package from version 1.2 to version 1.3**
 
 - The `DI` and `autoDI` functions now have an additional parameter
@@ -292,10 +301,6 @@ summary(auto1)
 #> Call:
 #> glm(formula = new_fmla, family = family, data = new_data)
 #> 
-#> Deviance Residuals: 
-#>     Min       1Q   Median       3Q      Max  
-#> -3.8425  -0.8141   0.0509   0.8048   3.5657  
-#> 
 #> Coefficients:
 #>                Estimate Std. Error t value Pr(>|t|)    
 #> p1_ID            9.7497     0.3666  26.595  < 2e-16 ***
@@ -359,10 +364,6 @@ summary(m1)
 #> Call:
 #> glm(formula = new_fmla, family = family, data = new_data)
 #> 
-#> Deviance Residuals: 
-#>     Min       1Q   Median       3Q      Max  
-#> -3.8425  -0.8141   0.0509   0.8048   3.5657  
-#> 
 #> Coefficients:
 #>                Estimate Std. Error t value Pr(>|t|)    
 #> p1_ID            9.7497     0.3666  26.595  < 2e-16 ***
@@ -492,10 +493,6 @@ summary(m2)
 #> Call:
 #> glm(formula = new_fmla, family = family, data = new_data)
 #> 
-#> Deviance Residuals: 
-#>     Min       1Q   Median       3Q      Max  
-#> -3.6892  -0.7859   0.0436   0.7781   3.6227  
-#> 
 #> Coefficients:
 #>                  Estimate Std. Error t value Pr(>|t|)    
 #> p1_ID           10.018491   0.466552  21.473  < 2e-16 ***
@@ -556,10 +553,6 @@ summary(m3)
 #> Call:
 #> glm(formula = new_fmla, family = family, data = new_data)
 #> 
-#> Deviance Residuals: 
-#>     Min       1Q   Median       3Q      Max  
-#> -3.8251  -0.8208   0.0554   0.7982   3.4218  
-#> 
 #> Coefficients:
 #>                          Estimate Std. Error t value Pr(>|t|)    
 #> p1_ID                     9.68668    0.40000  24.217  < 2e-16 ***
@@ -618,10 +611,6 @@ summary(m3)
 #> Call:
 #> glm(formula = custom_formula, family = family, data = data)
 #> 
-#> Deviance Residuals: 
-#>     Min       1Q   Median       3Q      Max  
-#> -4.0272  -0.7831   0.0404   0.7570   3.7016  
-#> 
 #> Coefficients:
 #>             Estimate Std. Error t value Pr(>|t|)    
 #> p1           10.3417     0.3138  32.957  < 2e-16 ***
diff --git a/man/describe_model.Rd b/man/describe_model.Rd
new file mode 100644
index 0000000..8959420
--- /dev/null
+++ b/man/describe_model.Rd
@@ -0,0 +1,58 @@
+\name{describe_model}
+\alias{describe_model}
+\encoding{UTF-8}
+%- Also NEED an '\alias' for EACH other topic documented here.
+\title{
+%%  ~~function to do ... ~~
+Describe DI models
+}
+\description{
+%%  ~~ A concise (1-5 lines) description of what the function does. ~~
+This function accepts a DImodel (i.e., regression models fit using the \code{\link{DI}} or \code{\link{autoDI}} functions) object and returns a short text summary describing the model. 
+}
+\usage{
+describe_model(model)
+}
+%- maybe also 'usage' for other objects documented here.
+\arguments{
+  \item{model}{A \code{\link{DI}} or \code{\link{autoDI}} regression model object.}
+}
+
+\value{
+A short text describing the supplied DImodel object.
+}
+\references{
+%% ~put references to the literature/web site here ~
+Kirwan L, J Connolly, JA Finn, C Brophy, A \enc{Lüscher}{}, D Nyfeler and MT Sebastia (2009) Diversity-interaction modelling - estimating contributions of species identities and interactions to ecosystem function. Ecology, 90, 2032-2038. 
+}
+\author{
+Rafael A. Moral, John Connolly, Rishabh Vishwakarma and Caroline Brophy
+}
+
+%% ~Make other sections like Warning with \section{Warning }{....} ~
+
+\seealso{
+\code{\link{DI}}
+\code{\link{autoDI}}
+%% ~~objects to See Also as \code{\link{help}}, ~~~
+}
+\examples{
+
+## Load the  data
+   data(sim2)
+## Fit model
+   mod_FG <- DI(y = "response", FG = c("G", "G", "L", "L"), 
+                prop = 3:6, data = sim2, DImodel = "FG")
+## Describe model
+   describe_model(mod_FG)
+
+
+   mod_FULL <- DI(y = "response", estimate_theta = TRUE, 
+                  prop = 3:6, data = sim2, DImodel = "FULL")
+   describe_model(mod_FULL)
+
+   mod_AV <- DI(y = "response", ID = c("ID1", "ID1", "ID2", "ID2"),
+                estimate_theta = TRUE, 
+                prop = 3:6, data = sim2, DImodel = "AV")
+   describe_model(mod_AV)
+}
diff --git a/man/sim4.Rd b/man/sim4.Rd
index a72de78..81970ca 100644
--- a/man/sim4.Rd
+++ b/man/sim4.Rd
@@ -141,15 +141,15 @@ Kirwan L, J Connolly, JA Finn, C Brophy, A \enc{Lüscher}{}, D Nyfeler and MT Se
 
 ## Fit the functional group model using DI and the FG tag
   m1 <- DI(y = "response", prop = 3:8, treat = "treatment", 
-           FG = c("FG1","FG1","FG2","FG2","FG3","FG3"), DImodel = FG, data = sim4)
+           FG = c("FG1","FG1","FG2","FG2","FG3","FG3"), DImodel = "FG", data = sim4)
   summary(m1)
 
 ## Fit the additive species model using DI and the ADD tag
-  m2 <- DI(y = "response", prop = 3:8, treat = "treatment", DImodel = ADD, data = sim4)
+  m2 <- DI(y = "response", prop = 3:8, treat = "treatment", DImodel = "ADD", data = sim4)
   summary(m2)
 
 ## Fit the full pairwise model using DI and the FULL tag
-  m3 <- DI(y = "response", prop = 3:8, treat = "treatment", DImodel = FULL, data = sim4)
+  m3 <- DI(y = "response", prop = 3:8, treat = "treatment", DImodel = "FULL", data = sim4)
   summary(m3)
   plot(m3)
   
diff --git a/tests/testthat/test-DI.R b/tests/testthat/test-DI.R
index 335a654..111d91d 100644
--- a/tests/testthat/test-DI.R
+++ b/tests/testthat/test-DI.R
@@ -274,6 +274,10 @@ test_that("DI fails with appropriate message", {
   # To suppress rounding error warning
   sim0$p3 <- 1- sim0$p1 - sim0$p2
   
+  # DImodel should be proper
+  expect_error(DI(prop = 3:5, data = sim0, DImodel = "AV1"),
+               regexp = "should be one of")
+  
   # y can't be missing
   expect_error(DI(prop = 3:5, data = sim0),
                regexp = "You must supply a response variable name or column index through the argument 'y'")
@@ -315,7 +319,7 @@ test_that("DI fails with appropriate message", {
   
   ## Warnings
   # Don't specify theta and estimate_theta, estimate_theta takes precedence 
-  expect_warning(DI(y = "yield", prop = 4:7, DImodel = AV,
+  expect_warning(DI(y = "yield", prop = 4:7, DImodel = "AV",
                   data = Switzerland, theta = 0.5, estimate_theta = TRUE),
                 regexp = "By specifying estimate_theta as TRUE, DI is overriding the specified theta value")
   
@@ -367,3 +371,40 @@ test_that("Interior functions in DI_internal work", {
                c(NA, 37.8141225527972, 11.5427716510587))
 })
 
+# Test describe_model
+test_that("describe_model works", {
+  data(sim2)
+  ## Fit model
+  mod_FG <- DI(y = "response", FG = c("G", "G", "L", "L"), 
+               prop = 3:6, data = sim2, DImodel = "FG")
+  ## Describe model
+  expect_equal(describe_model(mod_FG),
+               "This model has 4 species with the Functional group interaction structure with FG values of (G, G, L, L).")
+  
+  
+  mod_FULL <- DI(y = "response", estimate_theta = TRUE, 
+                 prop = 3:6, data = sim2, DImodel = "FULL")
+  expect_equal(describe_model(mod_FULL),
+               "This model has 4 species with the Full pairwise interaction structure. The interaction terms have an associated theta (exponent on the interactions) value of '0.45'.")
+  
+  mod_AV <- DI(y = "response", ID = c("ID1", "ID1", "ID2", "ID2"),
+               estimate_theta = TRUE, 
+               prop = 3:6, data = sim2, DImodel = "AV")
+  expect_equal(describe_model(mod_AV),
+               "This model has 4 species with the Average interaction structure. The interaction terms have an associated theta (exponent on the interactions) value of '0.45'.The species identity effects are grouped as (ID1, ID1, ID2, ID2).")
+  
+  mod_STR <- DI(y = "response", FG = c("G", "G", "L", "L"), 
+               prop = 3:6, data = sim2, DImodel = "STR")
+  ## Describe model
+  expect_equal(describe_model(mod_STR),
+               "This model doesn't have any species identity or interaction effects and only consists of experimental structures.")
+  
+  mod_CUST <- DI(custom_formula = response ~ block, data = sim2)
+  ## Describe model
+  expect_equal(describe_model(mod_CUST),
+               "This is a custom DI model.")
+  
+  # Proper error is thrown
+  expect_error(describe_model(lm(1 ~ 1)),
+               regexp = "`model` should be regression object of class <DI>")
+})
diff --git a/tests/testthat/test-predictDI.R b/tests/testthat/test-predictDI.R
index 41d3a36..e1de1c5 100644
--- a/tests/testthat/test-predictDI.R
+++ b/tests/testthat/test-predictDI.R
@@ -84,7 +84,7 @@ test_that("predict function works", {
   
   # If not numeric variables present in model are present in newdata warning will be thrown
   expect_warning(predict(mod_num, newdata = swiss_num_treat[1:4, -c(2)]),
-                 regexp = "not supplied in newdata. Calculating for")
+                 regexp = "not supplied in newdata. Calculating the prediction")
 })
 
 test_that("Prediction works for all interaction structures", {
@@ -246,6 +246,28 @@ test_that("Prediction works with grouped ID effects", {
                ignore_attr = TRUE)
 })
 
+# Test that predict function works with extra_formula and m
+# missing variables
+test_that("Prediction works with grouped ID effects", {
+  # Ensuring predictions work for all interaction structures
+  data("Switzerland")
+  mod_FG <- DI(y = "yield", DImodel = "FG", FG = c("G", "G", "H", "H"),
+               data = Switzerland, prop = 4:7,
+               extra_formula = ~nitrogen*density)
+  expect_equal(suppressWarnings(predict(mod_FG, newdata = Switzerland[1, 4:7])),
+               c(11.939679),
+               ignore_attr = TRUE)
+  
+  swiss_num_treat <- Switzerland
+  swiss_num_treat$nitrogen <- as.numeric(swiss_num_treat$nitrogen)
+  mod_FG <- DI(y = "yield", DImodel = "FG", FG = c("G", "G", "H", "H"),
+               data = swiss_num_treat, prop = 4:7,
+               extra_formula = ~nitrogen*density)
+  expect_equal(suppressWarnings(predict(mod_FG, newdata = swiss_num_treat[1:2, 4:7])),
+               c(13.036867, 13.070874),
+               ignore_attr = TRUE)
+})
+  
 # Testing CI and PI work
 
 test_that("contrasts function works", {
@@ -380,3 +402,15 @@ test_that("contrasts function works", {
                ignore_attr = TRUE)
   
 })
+
+# test fortify
+# Test describe_model
+test_that("fortify.DI works", {
+  data(sim2)
+  ## Fit model
+  mod_FG <- DI(y = "response", FG = c("G", "G", "L", "L"), 
+               prop = 3:6, data = sim2, DImodel = "FG")
+  ## Describe model
+  expect_equal(fortify(mod_FG),
+               cbind(ggplot2:::fortify.lm(mod_FG), sim2[, 3:6])[, c(1:6, 13:16, 7:12)])
+})
\ No newline at end of file
diff --git a/vignettes/DImodels.Rmd b/vignettes/DImodels.Rmd
index 8e31305..83a4749 100644
--- a/vignettes/DImodels.Rmd
+++ b/vignettes/DImodels.Rmd
@@ -22,6 +22,11 @@ library(DImodels)
 
 The `DImodels` package is designed to make fitting Diversity-Interactions models easier. Diversity-Interactions (DI) models (Kirwan et al 2009) are a set of tools for analysing and interpreting data from experiments that explore the effects of species diversity (from a pool of *S* species) on community-level responses. Data suitable for DI models will include (at least) for each experimental unit: a response recorded at a point in time, and a set of proportions of *S* species  $p_1$, $p_2$, ..., $p_S$ from a point in time prior to the recording of the response. The proportions sum to 1 for each experimental unit.   
 
+__Main changes in the package from version 1.3 to version 1.3.1__
+
+- A `forify` function method has been added to supplement the data fitted to a linear model with model fit statistics.
+- A `describe_model` function is added which can be used to get a short text summary of any DI model.
+- Meta-data about a DI model can be accessed via the `attributes` function. 
 
 __Main changes in the package from version 1.2 to version 1.3__