increase robustness to not-converging glms

statgenlmu · Dec 7, 2015 · 66f0604 · 66f0604
1 parent c611c96
commit 66f0604
Show file tree

Hide file tree

Showing 8 changed files with 29 additions and 12 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: jaatha
-Version: 3.0.0.9004
+Version: 3.0.0.9005
 Date: 2015-12-03
 License: GPL (>= 3)
 Title: Simulation-Based Maximum Likelihood Parameter Estimation

diff --git a/NEWS.md b/NEWS.md
@@ -11,7 +11,10 @@ jaatha 3.1.0 (in development)
   `parallel::mclapply` is now set to `FALSE` (#107).
 * Adds the `block_width` argument to `jaatha` (#108).
 * Also exports `create_jaatha_model.function` and  
-  `create_jaatha_model.coalmodel` as functions.
+  `create_jaatha_model.coalmodel` as functions (#109).
+* Increases robustsness to not-converging GLMs. If a GLM fails to converge, now
+  only the current repetition is aborded instead of the compelete analysis 
+  (#110). 
 
 
 
@@ -47,8 +50,8 @@ jaatha 2.6.0
 ============
 
 * Switched unittests from RUnit to testthat
-* Added a Rcpp function to parse tree from ms(ms) output for seqgen. This should
-  make seqgen less platform dependent (it still requires a Unix platform).
+* Added a Rcpp function to parse tree from ms(ms) output for seq-gen. This should
+  make seq-gen less platform dependent (it still requires a Unix platform).
 * Add a subset parameter to Jaatha.confidenceIntervals that allows to distribute
   the calculation on multiple machines.
 * Bug fix: in 2.5.1, the ms output file was not delete when using seq-gen. This can cause

diff --git a/R/initialization.R b/R/initialization.R
@@ -27,9 +27,13 @@ do_initial_search <- function(model, data, reps, sim, cores, sim_cache) {
   blocks_per_par <- determine_bpp(par_number, reps)
   blocks <- create_initial_blocks(model$get_par_ranges(), blocks_per_par)
 
-  # Get an estimate infor each block
+  # Get an estimate for each block
   estimates <- lapply(blocks, estimate_local_ml, model, data, 
                       sim, cores, sim_cache)
+  estimates <- estimates[!vapply(estimates, is.null, logical(1))]
+  if (length(estimates) < reps) {
+    stop("To many GLMs in initial search failed to converge.")
+  }
 
   # Return the parameters for the best estimates
   best_indexes <- order(vapply(estimates, function(x) x$value, numeric(1)), 
@@ -80,6 +84,7 @@ do_zoom_in_search <- function(model, data, reps, sim, cores, sim_cache) {
       block <- create_block(cbind(middle - block_width * .5,
                                   middle + block_width * .5), cut = TRUE)
       middle <- estimate_local_ml(block, model, data, sim, cores, sim_cache)$par
+      if (is.null(middle)) return(block$get_middle())
     }
     middle
   }, numeric(model$get_par_number())))

diff --git a/R/jaatha.R b/R/jaatha.R
@@ -104,6 +104,7 @@ jaatha <- function(model, data,
                             cut = TRUE)
 
       local_ml <- estimate_local_ml(block, model, data, sim, cores, sim_cache)
+      if (is.null(local_ml)) break
       log$log_estimate(rep, step, local_ml)
       estimate <- local_ml$par
 
@@ -122,6 +123,7 @@ jaatha <- function(model, data,
   # get presice llh values for best estimates
   log$log_llh_correction()
   best_values <- log$get_best_estimates(5)
+  if (nrow(best_values) == 0) stop("No valid estimates.")
   for (i in 1:nrow(best_values)) {
     llh <- estimate_llh(model, data, as.numeric(best_values[i, -(1:3)]), 
                         100, cores, TRUE)

diff --git a/R/jaatha_log.R b/R/jaatha_log.R
@@ -77,7 +77,7 @@ jaatha_log_class <- R6Class("jaatha_log",
       else estimates_list <- private$estimates
       best_est <- do.call(rbind, lapply(estimates_list, function(estimates) {
         best_llh <- order(estimates$llh, decreasing = TRUE)[1:n]
-        best_llh <- best_llh[!is.na(best_llh)]
+        best_llh <- best_llh[!is.na(estimates$llh[best_llh])]
         estimates[best_llh, ]
       }))
       best_est[order(best_est$llh, decreasing = TRUE), ]

diff --git a/R/likelihood.R b/R/likelihood.R
@@ -69,7 +69,7 @@ optimize_llh <- function(block, model, data, glms, sim) {
 
 
 estimate_local_ml <- function(block, model, data, sim, cores, sim_cache) {
-  for (j in 1:5) {
+  for (j in 1:6) {
     sim_data <- model$simulate(pars = block$sample_pars(sim, TRUE), 
                                data = data, cores = cores)
 
@@ -85,10 +85,13 @@ estimate_local_ml <- function(block, model, data, sim, cores, sim_cache) {
     converged <- vapply(glms, function(x) {
       all(vapply(x, function(y) y$converged, logical(1)))
     }, logical(1))
-    if (all(converged)) {
-      break
+    if (all(converged)) break
+
+    if (j == 3) sim_cache$clear()
+    if (j == 6) {
+      warning("A GLM failed to converge. One repetition was aborted.")
+      return(NULL)
     }
-    if (j == 5) stop("A GLM did not converge. Check your model")
   }
 
   optimize_llh(block, model, data, glms, length(sim_data))

diff --git a/R/simulation_cache.R b/R/simulation_cache.R
@@ -40,7 +40,11 @@ sim_cache_class <- R6Class("sim_cache",
       }, logical(1))
       private$sim_data[in_block]
     },
-    get_size = function() private$size
+    get_size = function() private$size,
+    clear = function() {
+      private$size <- 0
+      private$sim_data <- list()
+    }
   )
 )
 

diff --git a/tests/testthat/test-jaatha-function.R b/tests/testthat/test-jaatha-function.R
@@ -26,7 +26,7 @@ test_that("it supports a one parameter model", {
 
   expect_is(results, "list")
   expect_true(is.finite(results$loglikelihood))
-  expect_true(all(results$param > 1))
+  expect_true(all(results$estimate > 1))
   expect_identical(results$args$model, model)
   expect_identical(results$args$data, data)
   expect_equal(results$args$repetitions, 1)