R/optim-fxns.R

#' Non-linear fits of different models to the decomposition trajectory
#' of one cohort (as in typical litterbag studies) data.
#' Models range from very simple (and easy to fit with limited data)
#' to more complex.
#' 
#' @title Fit different models to single cohort decomposition data
#' 
#' @param time time since decomposition began, that is, ti-t0
#' 
#' @param  mass.remaining proportional mass loss, that is, mi/m0
#' 
#' @param model there are five models currently implemented (see below)
#' 
#' @param iters Number of random starts for the fitting.  Use higher numbers for models with larger numbers of parameters and for models that inherently tend to be less identifiable.
#' 
#' @param upper,lower \bold{Optional} user specified values for the upper and lower bounds used by
#' \code{optim} in the fitting procedure. Use with care, only minimal sanity checking is currently
#'  implemented.
#' 
#' @param ...  Additional arguments passed to \code{\link{optim}}
#' 
#' @return returns a litfit object with the following elements: 
#'  \itemize{
#' \item{\bold{optimFit:} a list generated by the \code{\link{optim}} function}
#' \item{\bold{logLik:} the log-likelihood of the model}
#' \item{\bold{time:} vector of time (same as input)}
#' \item{\bold{mass:} vector os mass remaining (same as input)}
#' \item{\bold{predicted:} predicted values from the model for each of the points within time}
#' \item{\bold{model:} name of the model }
#' \item{\bold{nparams:} number of fit parameters in the model}
#' \item{\bold{AIC:} AIC of the model fit}
#' \item{\bold{AICc:} AICc of the model fit}
#' \item{\bold{BIC:} BIC of the model fit}
#' \item{and some other potentially useful things}
#' }
#' 
#' @import stats
#' @export
#' 
#' @details the model likelihood is maximized using methods available in \code{\link[stats]{optim}}.  Optimization methods to be used within optim can be specified through the control object (i.e., control$method).  The default method is L-BFGS-B with bounds specific to each model. Each model 
#' \itemize{
## ` \item{\bold{neg.exp} The classic negative exponential model--one parameter
## (Olsen 1963)}
#' \item{\bold{weibull} The Weibull residence time model--two parameters (Frechet 1927)}
#' \item{\bold{discrete.parallel} Two pools in parallel with a term for the fraction of initial mass in each pool--three parameter (Manzoni et al. 2012)}
#' \item{\bold{discrete.series} A three parameter model in which there is the possibility of two sequential pools (Manzoni et al. 2012)}
#' \item{\bold{cont.quality} (Ågren and Bosatta 1996, see also Manzoni et al. 2012)}
#' }
#' \emph{Warning}: difficulty in finding the optimal solution is determined by an interaction between the nature and complexity of the 
#' likelihood space (which is both data- and model-dependent) as well as the optimization methods.  There is can never be a guarantee that the optimal solution is found, but using many random starting points will increase these odds.  It should be noted that there is significant variation among models in identifiability, with some models inherently less identifiable likely due to a tendency to form for flat ridges in likelihood space.  The confidence in the fit should be very low in these cases (see Cornwell and Weedon 2013).   A number of random starting points are used in optimization and are given through the iters.  The function checks whether the the top 10 optimizations have converged on the same likelihood, and if they have not this function will return a warning.  
#'
#' @author Will Cornwell and James Weedon
#' 
#' @references \itemize{ \item{Ågren, G. and Bosatta, E. (1996) Quality: a bridge between theory and experiment in soil organic matter studies. Oikos, 76, 522–528.}
#' \item{Cornwell, W. K., and J. T. Weedon. (2013).  Decomposition trajectories of diverse litter types: a model selection analysis. Methods in Ecology and Evolution.}
#' \item{Frechet, M. (1927) Sur la loi de probabilite de lecart maximum. Ann de la Soc polonaise de Math, 6, 93–116.}
#' \item{Manzoni, S., Pineiro, G., Jackson, R. B., Jobbagy, E. G., Kim, J. H., & Porporato, A. (2012). Analytical models of soil and litter decomposition: Solutions for mass loss and time-dependent decay rates. Soil Biology and Biochemistry, 50, 66-76.}
#' \item{Olson, J.S. (1963) Energy storage and the balance of producers and decomposers in ecological systems. Ecology, 44, 322–331.}}
#' 
#' @seealso \code{\link{optim}},
#' \code{\link{steady_state}},
#' \code{\link{plot.litfit}}
#' 
#' @examples
#' data(pineneedles)
#' fit<-fit_litter(time=pineneedles$Year,mass.remaining=pineneedles$Mass.remaining,
#' model='neg.exp',iters=1000)
fit_litter <- function(time, mass.remaining, model = c("neg.exp", "weibull", "discrete.parallel", 
    "discrete.series", "cont.quality", "neg.exp.limit"), iters = 500, 
    upper = NULL, lower = NULL, ...) {

    if (length(time) != length(mass.remaining)) {
        stop("Time vector must have the same length and correspond to the mass remaining vector")
    }
    if (min(mass.remaining) > 1 | max(mass.remaining) > 2) {
        stop("Check mass remaining vector; must be in proportional mass remaining.")
    }
    
    if (min(time) < 0) {
        stop("Check time vector; negative values detected.")
    }
    
    # test that if user supplied bounds, there are enough of them
    
    if (!is.null(upper)) {
        if (length(upper) != length(eval(formals(get(model))$upper))) {
            stop(paste("Incorrect number of upper bounds supplied, should be:", length(eval(formals(get(model))$upper))))
        }
        
    }
    if (!is.null(lower)) {
        if (length(lower) != length(eval(formals(get(model))$lower))) {
            stop(paste("Incorrect number of lower bounds supplied, should be:", length(eval(formals(get(model))$lower))))
        }
    }

    fit <- multioptimFit(time, mass.remaining, model, iters = iters, upper = upper, 
        lower = lower, ...)
    if (is.null(fit)) 
        return(NULL)
    predicted_vals <- do.call(model, c(list(time), as.list(fit$par)))
    LL <- calculateLL(predicted_vals, mass.remaining, sqrt(fit$value/length(time)))
    nps <- length(fit$par)
    model.AIC <- calculateAIC(LL, nps)
    model.AICc <- calculateAICc(LL, nps, length(mass.remaining))
    model.BIC <- calculateBIC(LL, nps, length(mass.remaining))
    fit.out <- list(optimFit = fit, logLik = LL, fitAIC = model.AIC, fitAICc = model.AICc, 
        fitBIC = model.BIC, time = time, mass = mass.remaining, predicted = predicted_vals, 
        model = model, nparams = nps)
    class(fit.out) <- "litfit"
    
    # create objects for testing if best fits are on boundary values
    test.lower <- if(is.null(lower)) eval(formals(get(model))$lower) else lower
    test.upper <- if(is.null(upper)) eval(formals(get(model))$upper) else upper
    
    if (any(fit.out$optimFit$par == test.lower | fit.out$optimFit$par == test.upper)) {
        warning("one or more parameters fit on the boundary, check fit closely")
    }
    return(fit.out)
}