From 47e02c884d63533313d88ed4cc62b4a7f03d5d77 Mon Sep 17 00:00:00 2001
From: Paul Staab <paulstaab@users.noreply.github.com>
Date: Sun, 1 May 2016 09:31:08 +0200
Subject: [PATCH] Export `final_sim` and `zoom_in_steps` options (#134)

* add `zoom_in_steps` and `final_sim` opts

* add instructions for mem and disk usage
---
 DESCRIPTION                    |  2 +-
 NEWS.md                        |  3 ++-
 R/initialization.R             |  4 ++--
 R/jaatha.R                     | 27 ++++++++++++++++++++-------
 man/get_start_pos.Rd           |  6 +++++-
 man/jaatha.Rd                  | 12 +++++++++++-
 vignettes/jaatha-evolution.Rmd | 10 +++++++++-
 7 files changed, 50 insertions(+), 14 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 704a967..b9a3a49 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,5 +1,5 @@
 Package: jaatha
-Version: 3.1.1.9004
+Version: 3.1.1.9005
 License: GPL (>= 3)
 Title: Simulation-Based Maximum Likelihood Parameter Estimation
 Authors@R: c(
diff --git a/NEWS.md b/NEWS.md
index c0db117..6c15d3d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -7,7 +7,8 @@ jaatha 3.2.0 (in development)
   the standard JSFS coarsening from an unpolarized spectrum (#130).
 * Now tries to continue the estimation even when a few simulations return 
   errors (#131).
-* Skip tests if `testthat` is not available (#132).
+* Skips tests if `testthat` is not available (#132).
+* Adds new options `final_sim` and `zoom_in_steps` to the main jaatha `method`.
 
 
 
diff --git a/R/initialization.R b/R/initialization.R
index 2dd58a3..f360497 100644
--- a/R/initialization.R
+++ b/R/initialization.R
@@ -14,12 +14,12 @@
 #' @author Paul Staab
 #' @keywords internal
 get_start_pos <- function(model, data, reps, sim, init_method, cores, 
-                          sim_cache, block_width) {
+                          sim_cache, block_width, zoom_in_steps = 3) {
   
   start_pos <- NULL
   if (init_method[1] == "zoom-in") {
     start_pos <- do_zoom_in_search(model, data, reps, sim, 
-                                   cores, sim_cache, block_width)
+                                   cores, sim_cache, block_width, zoom_in_steps)
   } else if (init_method[1] == "initial-search") {
     start_pos <- do_initial_search(model, data, reps, sim, cores, sim_cache)
   } else if (init_method[1] == "middle") {
diff --git a/R/jaatha.R b/R/jaatha.R
index ca3b21c..8b60740 100644
--- a/R/jaatha.R
+++ b/R/jaatha.R
@@ -9,7 +9,7 @@ NULL
 #'   See \code{\link{create_jaatha_model}}.
 #' @param data The data used for the estimation.
 #'   See \code{\link{create_jaatha_data}}.
-#' @param repetitions The number of independend optimizations that will be
+#' @param repetitions The number of independent optimizations that will be
 #'   conducted. You should use a value greater than one here, to minimize
 #'   the chance that the algorithms is stuck in a local maximum.
 #' @param sim The number of simulations conducted for each step.
@@ -18,7 +18,7 @@ NULL
 #' @param init_method Determines how the starting position of each repetition
 #'   is chosen. See below for a description of the different options. 
 #' @param cores The number of CPU cores that will be used for the simulations.
-#'   The relies on the \pkg{parallel} package, and consequenlty only one
+#'   The relies on the \pkg{parallel} package, and consequently only one
 #'   core is supported on Windows.
 #' @param sim_cache_limit The maximal number of simulations results that will be 
 #'   cached. Cached results may be reused in following estimation steps if 
@@ -31,6 +31,14 @@ NULL
 #'   help in case jaatha fails to converge, while you can try decreasing it if 
 #'   the estimates of the likelihoods differ from the corrected values in the 
 #'   'Correcting likelihoods for best estimates' phase.
+#' @param final_sim The number of simulations conducted for calculating 
+#'   precise likelihoods for the best estimates found in the optimization
+#'   procedure. These number of simulations is conducted for the best
+#'   five estimates from each repetition. Using the default value is usually
+#'   fine.
+#' @param zoom_in_steps The number of steps conducted in the \code{zoom-in}
+#'   initialization method. Has no effect if a different initialization method
+#'   is used. Using the default value is usually fine.
 #' @return A list contain the results. The list has the following entries:
 #' \describe{
 #'    \item{estimate}{The (approximated) maximum likelihood estimate}
@@ -48,7 +56,7 @@ NULL
 #'   starting positions. The option \code{zoom-in} starts with a block that
 #'   is equal to the complete parameter space, estimate parameters in there,
 #'   and then iteratively creates a smaller block around the estimates. Finally,
-#'   \code{random} chooses random starting postions and
+#'   \code{random} chooses random starting positions and
 #'   \code{middle} will just start all repetitions at the middle of the 
 #'   parameter space.
 #'   
@@ -57,13 +65,15 @@ NULL
 jaatha <- function(model, data, 
                    repetitions = 3, 
                    sim = model$get_par_number() * 25, 
-                   max_steps = 100, 
+                   max_steps = 100,
                    init_method = c("zoom-in", "initial-search", 
                                    "random", "middle"),
                    cores = 1,
                    verbose = TRUE,
                    sim_cache_limit = 10000,
-                   block_width = 0.1) {
+                   block_width = 0.1,
+                   final_sim = 100,
+                   zoom_in_steps = 3) {
   
   # Check parameters
   assert_that(is_jaatha_model(model))
@@ -73,6 +83,8 @@ jaatha <- function(model, data,
   assert_that(is.count(cores))
   assert_that(is.numeric(block_width) && length(block_width) == 1)
   assert_that(block_width > 0 && block_width < 1)
+  assert_that(is.count(final_sim))
+  assert_that(is.count(zoom_in_steps))
   
   # Setup
   log <- create_jaatha_log(model, data, repetitions, max_steps, verbose)
@@ -82,7 +94,8 @@ jaatha <- function(model, data,
   # Get start positions
   log$log_initialization(init_method[1])
   start_pos <- get_start_pos(model, data, repetitions, sim, init_method, cores,
-                             sim_cache = sim_cache, block_width = block_width)
+                             sim_cache = sim_cache, block_width = block_width,
+                             zoom_in_steps)
   
   for (rep in 1:repetitions) {
     estimate <- start_pos[rep, ]
@@ -125,7 +138,7 @@ jaatha <- function(model, data,
   if (nrow(best_values) == 0) stop("No valid estimates.")
   for (i in 1:nrow(best_values)) {
     llh <- estimate_llh(model, data, as.numeric(best_values[i, -(1:3)]), #nolint 
-                        100, cores, TRUE)
+                        final_sim, cores, TRUE)
     log$log_estimate("final", i, llh, best_values[i, 3])
   }
   
diff --git a/man/get_start_pos.Rd b/man/get_start_pos.Rd
index b141531..b832bbb 100644
--- a/man/get_start_pos.Rd
+++ b/man/get_start_pos.Rd
@@ -5,7 +5,7 @@
 \title{Determine good starting postions}
 \usage{
 get_start_pos(model, data, reps, sim, init_method, cores, sim_cache,
-  block_width)
+  block_width, zoom_in_steps = 3)
 }
 \arguments{
 \item{model}{The model used for the estimation. 
@@ -32,6 +32,10 @@ local GLM. The default value is usually fine. Increasing this value may
 help in case jaatha fails to converge, while you can try decreasing it if 
 the estimates of the likelihoods differ from the corrected values in the 
 'Correcting likelihoods for best estimates' phase.}
+
+\item{zoom_in_steps}{The number of steps conducted in the \code{zoom-in}
+initialization method. Has no effect if a different initialization method
+is used. Using the default value is usually fine.}
 }
 \value{
 The starting positions, as a matrix. Each row corresponds
diff --git a/man/jaatha.Rd b/man/jaatha.Rd
index e2fb8d2..9216c7d 100644
--- a/man/jaatha.Rd
+++ b/man/jaatha.Rd
@@ -7,7 +7,7 @@
 jaatha(model, data, repetitions = 3, sim = model$get_par_number() * 25,
   max_steps = 100, init_method = c("zoom-in", "initial-search", "random",
   "middle"), cores = 1, verbose = TRUE, sim_cache_limit = 10000,
-  block_width = 0.1)
+  block_width = 0.1, final_sim = 100, zoom_in_steps = 3)
 }
 \arguments{
 \item{model}{The model used for the estimation. 
@@ -45,6 +45,16 @@ local GLM. The default value is usually fine. Increasing this value may
 help in case jaatha fails to converge, while you can try decreasing it if 
 the estimates of the likelihoods differ from the corrected values in the 
 'Correcting likelihoods for best estimates' phase.}
+
+\item{final_sim}{The number of simulations conducted for calculating 
+precise likelihoods for the best estimates found in the optimization
+predure. These number of simulations is conducted for the the best
+five estimates from each repetition. Using the default value is usually
+fine.}
+
+\item{zoom_in_steps}{The number of steps conducted in the \code{zoom-in}
+initialization method. Has no effect if a different initialization method
+is used. Using the default value is usually fine.}
 }
 \value{
 A list contain the results. The list has the following entries:
diff --git a/vignettes/jaatha-evolution.Rmd b/vignettes/jaatha-evolution.Rmd
index ea5a202..3a2fc63 100644
--- a/vignettes/jaatha-evolution.Rmd
+++ b/vignettes/jaatha-evolution.Rmd
@@ -58,5 +58,13 @@ on to `create_jaatha_data`.
 Running Jaatha
 --------------
 From here on, you can estimate parameters using the `jaatha` as described in the
-introduction vignette.
+introduction vignette. 
 
+If you are using a simulator that is writing temporary
+files to disk (e.g. `ms`, `msms` and `seq-gen`), please make sure that there
+is sufficient free space on your `tempdir()` to store the output of `sim` 
+simulations per core that you use (arguments `sim` and `cores` in the `jaatha`
+function). Also, please make sure that your machine does not run out of memory.
+Both will lead to failtures during the estimation process. Reducing the number
+of cores reduces both the required memory and disk space at the cost of a 
+longer runtime.