diff --git a/.gitignore b/.gitignore
index 557faab14..f7069779d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@
renv
renv.lock
inst/doc
+man/figures/README*
diff --git a/DESCRIPTION b/DESCRIPTION
index 43b199c3e..0032f19ef 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -42,6 +42,7 @@ VignetteBuilder:
RdMacros:
mlr3misc
Remotes:
+ mlr-org/mlr3misc,
rvalavi/blockCV
Encoding: UTF-8
LazyData: true
@@ -56,6 +57,7 @@ Collate:
'ResamplingSpCVCoords.R'
'ResamplingSpCVEnv.R'
'ResamplingRepeatedSpCVCoords.R'
+ 'ResamplingRepeatedSpCVEnv.R'
'TaskClassifST.R'
'TaskRegrST.R'
'helper.R'
diff --git a/NAMESPACE b/NAMESPACE
index 0351b5b34..a66b05410 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,11 +1,13 @@
# Generated by roxygen2: do not edit by hand
S3method(autoplot,ResamplingRepeatedSpCVCoords)
+S3method(autoplot,ResamplingRepeatedSpCVEnv)
S3method(autoplot,ResamplingSpCVBlock)
S3method(autoplot,ResamplingSpCVBuffer)
S3method(autoplot,ResamplingSpCVCoords)
S3method(autoplot,ResamplingSpCVEnv)
export(ResamplingRepeatedSpCVCoords)
+export(ResamplingRepeatedSpCVEnv)
export(ResamplingSpCVBlock)
export(ResamplingSpCVBuffer)
export(ResamplingSpCVCoords)
diff --git a/R/ResamplingRepeatedSpCVCoords.R b/R/ResamplingRepeatedSpCVCoords.R
index 896fd9864..7876100aa 100644
--- a/R/ResamplingRepeatedSpCVCoords.R
+++ b/R/ResamplingRepeatedSpCVCoords.R
@@ -52,11 +52,17 @@ ResamplingRepeatedSpCVCoords = R6Class("ResamplingRepeatedSpCVCoords",
},
+ #' @description Translates iteration numbers to fold number.
+ #' @param iters `integer()`\cr
+ #' Iteration number.
folds = function(iters) {
iters = assert_integerish(iters, any.missing = FALSE, coerce = TRUE)
((iters - 1L) %% as.integer(self$param_set$values$repeats)) + 1L
},
+ #' @description Translates iteration numbers to repetition number.
+ #' @param iters `integer()`\cr
+ #' Iteration number.
repeats = function(iters) {
iters = assert_integerish(iters, any.missing = FALSE, coerce = TRUE)
((iters - 1L) %/% as.integer(self$param_set$values$folds)) + 1L
@@ -83,6 +89,10 @@ ResamplingRepeatedSpCVCoords = R6Class("ResamplingRepeatedSpCVCoords",
),
active = list(
+
+ #' @field iters `integer(1)`\cr
+ #' Returns the number of resampling iterations, depending on the
+ #' values stored in the `param_set`.
iters = function() {
pv = self$param_set$values
as.integer(pv$repeats) * as.integer(pv$folds)
diff --git a/R/ResamplingRepeatedSpCVEnv.R b/R/ResamplingRepeatedSpCVEnv.R
new file mode 100644
index 000000000..d81c3c007
--- /dev/null
+++ b/R/ResamplingRepeatedSpCVEnv.R
@@ -0,0 +1,153 @@
+#' @title Repeated Environmental Block Cross Validation Resampling
+#'
+#' @import mlr3
+#'
+#' @description Environmental Block Cross Validation. This strategy uses k-means
+#' clustering to specify blocks of similar environmental conditions. Only
+#' numeric features can be used. The `features` used for building blocks can
+#' be specified in the `param_set`. By default, all numeric features are used.
+#'
+#' @references
+#' \cite{mlr3spatiotempcv}{valavi2018}
+#'
+#' @export
+#' @examples
+#' library(mlr3)
+#' task = tsk("ecuador")
+#'
+#' # Instantiate Resampling
+#' rrcv = rsmp("repeated-spcv-env")
+#' rrcv$param_set$values = list(folds = 4, repeats = 2)
+#' rrcv$instantiate(task)
+#'
+#' # Individual sets:
+#' rrcv$train_set(1)
+#' rrcv$test_set(1)
+#' intersect(rrcv$train_set(1), rrcv$test_set(1))
+#'
+#' # Internal storage:
+#' rrcv$instance
+ResamplingRepeatedSpCVEnv = R6Class("ResamplingRepeatedSpCVEnv",
+ inherit = mlr3::Resampling,
+
+ public = list(
+ #' @description
+ #' Create an "coordinate-based" repeated resampling instance.
+ #' @param id `character(1)`\cr
+ #' Identifier for the resampling strategy.
+ initialize = function(id = "repeated-spcv-env") {
+ ps = ParamSet$new(params = list(
+ ParamInt$new("repeats", lower = 1),
+ ParamInt$new("folds", lower = 1L, tags = "required")
+ ))
+ ps$values = list(folds = 10L)
+ super$initialize(
+ id = id,
+ param_set = ps,
+ man = "mlr3spatiotempcv::mlr_resamplings_repeated_spcv_env"
+ )
+
+ },
+
+ #' @description Translates iteration numbers to fold number.
+ #' @param iters `integer()`\cr
+ #' Iteration number.
+ folds = function(iters) {
+ iters = assert_integerish(iters, any.missing = FALSE, coerce = TRUE)
+ ((iters - 1L) %% as.integer(self$param_set$values$repeats)) + 1L
+ },
+
+ #' @description Translates iteration numbers to repetition number.
+ #' @param iters `integer()`\cr
+ #' Iteration number.
+ repeats = function(iters) {
+ iters = assert_integerish(iters, any.missing = FALSE, coerce = TRUE)
+ ((iters - 1L) %/% as.integer(self$param_set$values$folds)) + 1L
+ },
+
+ #' @description
+ #' Materializes fixed training and test splits for a given task.
+ #' @param task [Task]\cr
+ #' A task to instantiate.
+ instantiate = function(task) {
+
+ assert_task(task)
+ pv = self$param_set$values
+
+ # Set values to default if missing
+ if (is.null(pv$rows)) {
+ pv$rows = self$param_set$default[["rows"]]
+ }
+ if (is.null(pv$cols)) {
+ pv$cols = self$param_set$default[["cols"]]
+ }
+ if (is.null(pv$features)) {
+ pv$features = task$feature_names
+ }
+
+ # Remove non-numeric features, target and coordinates
+ columns = task$col_info[!id %in%
+ c(task$target_names, "x", "y")][type == "numeric"]
+
+ # Check for selected features that are not in task
+ diff = setdiff(pv$features, columns[, id])
+ if (length(diff) > 0) {
+ stopf("'spcv-env' requires numeric features for clustering.
+ Feature '%s' is either non-numeric or does not exist in the data.",
+ diff, wrap = TRUE)
+ }
+ columns = columns[id %in% pv$features]
+ columns = columns[, id]
+
+ data = task$data()[, columns, with = FALSE]
+
+ instance = private$.sample(task$row_ids, data)
+
+ self$instance = instance
+ self$task_hash = task$hash
+ invisible(self)
+ }
+ ),
+
+ active = list(
+
+ #' @field iters `integer(1)`\cr
+ #' Returns the number of resampling iterations, depending on the
+ #' values stored in the `param_set`.
+ iters = function() {
+ pv = self$param_set$values
+ as.integer(pv$repeats) * as.integer(pv$folds)
+ }
+ ),
+
+ private = list(
+ .sample = function(ids, coords) {
+ pv = self$param_set$values
+ folds = as.integer(pv$folds)
+
+ map_dtr(seq_len(pv$repeats), function(i) {
+ data.table(row_id = ids, rep = i,
+ fold = kmeans(coords, centers = folds)$cluster
+ )
+ })
+ },
+
+ .get_train = function(i) {
+ i = as.integer(i) - 1L
+ folds = as.integer(self$param_set$values$folds)
+ rep = i %/% folds + 1L
+ fold = i %% folds + 1L
+ ii = data.table(rep = rep, fold = seq_len(folds)[-fold])
+ self$instance[ii, "row_id", on = names(ii), nomatch = 0L][[1L]]
+ },
+
+ .get_test = function(i) {
+ i = as.integer(i) - 1L
+ folds = as.integer(self$param_set$values$folds)
+ rep = i %/% folds + 1L
+ fold = i %% folds + 1L
+ ii = data.table(rep = rep, fold = fold)
+ self$instance[ii, "row_id", on = names(ii), nomatch = 0L][[1L]]
+ }
+ )
+)
diff --git a/R/ResamplingSpCVEnv.R b/R/ResamplingSpCVEnv.R
index 247ad40cf..67bc2f504 100644
--- a/R/ResamplingSpCVEnv.R
+++ b/R/ResamplingSpCVEnv.R
@@ -2,15 +2,14 @@
#'
#' @import mlr3
#'
-#' @description Environmental Block Cross Validation. This strategy uses k-means
-#' clustering to specify blocks of smilar environmental conditions. Only numeric
-#' features can be used. The `features` used for building blocks can be
-#' specified in the `param_set`. By default, all numeric features are used.
+#' @description
+#' Environmental Block Cross Validation. This strategy uses k-means clustering
+#' to specify blocks of similar environmental conditions. Only numeric features
+#' can be used. The `features` used for building blocks can be specified in the
+#' `param_set`. By default, all numeric features are used.
#'
-#' @references Valavi R, Elith J, Lahoz-Monfort JJ, Guillera-Arroita G. blockCV:
-#' An r package for generating spatially or environmentally separated folds for
-#' k-fold cross-validation of species distribution models. Methods Ecol Evol.
-#' 2019; 10:225–232. https://doi.org/10.1111/2041-210X.13107
+#' @references
+#' \cite{mlr3spatiotempcv}{valavi2018}
#'
#' @export
#' @examples
@@ -80,8 +79,9 @@ ResamplingSpCVEnv = R6Class("ResamplingSpCVEnv", inherit = mlr3::Resampling,
# Check for selected features that are not in task
diff = setdiff(pv$features, columns[, id])
if (length(diff) > 0) {
- stop(sprintf("'spcv-env' requires numeric features for clustering. Feature '%s' is either non-numeric or does not exist in the data",
- diff))
+ stopf("'spcv-env' requires numeric features for clustering.
+ Feature '%s' is either non-numeric or does not exist in the data.",
+ diff, wrap = TRUE)
}
columns = columns[id %in% pv$features]
columns = columns[, id]
diff --git a/R/autoplot.R b/R/autoplot.R
index d8e938b49..0b616e633 100644
--- a/R/autoplot.R
+++ b/R/autoplot.R
@@ -199,6 +199,39 @@ autoplot.ResamplingRepeatedSpCVCoords = function(
grid = grid)
}
+#' @title Plot for Repeated Spatial Resampling
+#'
+#' @rdname autoplot_spatial_resampling
+#' @export
+#' @examples
+#' #####
+#' # RepeatedSpCVEnv
+#' #####
+#' \donttest{
+#' task = tsk("ecuador")
+#' resampling = rsmp("repeated-spcv-env", folds = 10, repeats = 2)
+#' resampling$instantiate(task)
+#' autoplot(resampling, task)
+#' autoplot(resampling, task, 1)
+#' autoplot(resampling, task, fold_id = 2, repeats_id = 2)
+#' autoplot(resampling, task, c(1, 2, 3, 4))
+#' }
+autoplot.ResamplingRepeatedSpCVEnv = function(
+ object,
+ task,
+ fold_id = NULL,
+ repeats_id = 1,
+ grid = TRUE,
+ train_color = "#0072B5",
+ test_color = "#E18727",
+ ...) {
+ autoplot_spatial(resampling = object,
+ task = task,
+ fold_id = fold_id,
+ repeats_id = repeats_id,
+ grid = grid)
+}
+
autoplot_spatial = function(
resampling = NULL,
task = NULL,
diff --git a/R/zzz.R b/R/zzz.R
index 08090e27e..3af92de3a 100644
--- a/R/zzz.R
+++ b/R/zzz.R
@@ -43,7 +43,9 @@ register_mlr3 = function() {
mlr_resamplings$add("spcv-buffer", ResamplingSpCVBuffer)
mlr_resamplings$add("spcv-coords", ResamplingSpCVCoords)
mlr_resamplings$add("spcv-env", ResamplingSpCVEnv)
+
mlr_resamplings$add("repeated-spcv-coords", ResamplingRepeatedSpCVCoords)
+ mlr_resamplings$add("repeated-spcv-env", ResamplingRepeatedSpCVEnv)
}
}
diff --git a/README.Rmd b/README.Rmd
index fc6288f6d..ab28fb5a8 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -30,7 +30,8 @@ Currently, the following ones are implemented:
| Spatial CV | [sperrorest](https://github.com/giscience-fsu/sperrorest) | Brenning 2012 | `ResamplingSpCVCoords` | `rsmp("spcv-coords")` |
| Environmental Blocking | [blockCV](https://github.com/rvalavi/blockCV) | Valavi 2019 | `ResamplingSpCVEnv` | `rsmp("spcv-env")` |
| --- | --- | --- | --- | --- |
-| Repeated Spatial CV | [sperrorest](https://github.com/giscience-fsu/sperrorest) | Brenning 2012 | `RepeatedResamplingSpCVCoords` | `rsmp("repeated-spcv-coords")` |
+| Repeated Spatial CV | [sperrorest](https://github.com/giscience-fsu/sperrorest) | Brenning 2012 | `RepeatedResamplingSpCVCoords` | `rsmp("repeated-spcv-coords")` |
+| Repeated Env Blocking | [blockCV](https://github.com/rvalavi/blockCV) | Valavi 2019 | `RepeatedResamplingSpCVEnv` | `rsmp("repeated-spcv-env")` |
## Spatial tasks
diff --git a/README.md b/README.md
index 7bd80aff9..d01d9e1e0 100644
--- a/README.md
+++ b/README.md
@@ -32,6 +32,7 @@ Currently, the following ones are implemented:
| Environmental Blocking | [blockCV](https://github.com/rvalavi/blockCV) | Valavi 2019 | `ResamplingSpCVEnv` | `rsmp("spcv-env")` |
| — | — | — | — | — |
| Repeated Spatial CV | [sperrorest](https://github.com/giscience-fsu/sperrorest) | Brenning 2012 | `RepeatedResamplingSpCVCoords` | `rsmp("repeated-spcv-coords")` |
+| Repeated Env Blocking | [blockCV](https://github.com/rvalavi/blockCV) | Valavi 2019 | `RepeatedResamplingSpCVEnv` | `rsmp("repeated-spcv-env")` |
## Spatial tasks
diff --git a/man/ResamplingRepeatedSpCVCoords.Rd b/man/ResamplingRepeatedSpCVCoords.Rd
index db0b920f5..f5aa1ca88 100644
--- a/man/ResamplingRepeatedSpCVCoords.Rd
+++ b/man/ResamplingRepeatedSpCVCoords.Rd
@@ -35,6 +35,15 @@ rrcv$instance # table
\section{Super class}{
\code{\link[mlr3:Resampling]{mlr3::Resampling}} -> \code{ResamplingRepeatedSpCVCoords}
}
+\section{Active bindings}{
+\if{html}{\out{
}}
+\describe{
+\item{\code{iters}}{\code{integer(1)}\cr
+Returns the number of resampling iterations, depending on the
+values stored in the \code{param_set}.}
+}
+\if{html}{\out{
}}
+}
\section{Methods}{
\subsection{Public methods}{
\itemize{
@@ -75,18 +84,36 @@ Identifier for the resampling strategy.}
\if{html}{\out{}}
\if{html}{\out{}}
\subsection{Method \code{folds()}}{
+Translates iteration numbers to fold number.
\subsection{Usage}{
\if{html}{\out{
}}
+}
}
\if{html}{\out{}}
\if{html}{\out{}}
diff --git a/man/ResamplingRepeatedSpCVEnv.Rd b/man/ResamplingRepeatedSpCVEnv.Rd
new file mode 100644
index 000000000..e736bb0e0
--- /dev/null
+++ b/man/ResamplingRepeatedSpCVEnv.Rd
@@ -0,0 +1,148 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ResamplingRepeatedSpCVEnv.R
+\name{ResamplingRepeatedSpCVEnv}
+\alias{ResamplingRepeatedSpCVEnv}
+\title{Repeated Environmental Block Cross Validation Resampling}
+\description{
+Environmental Block Cross Validation. This strategy uses k-means
+clustering to specify blocks of similar environmental conditions. Only
+numeric features can be used. The \code{features} used for building blocks can
+be specified in the \code{param_set}. By default, all numeric features are used.
+}
+\examples{
+library(mlr3)
+task = tsk("ecuador")
+
+# Instantiate Resampling
+rrcv = rsmp("repeated-spcv-env")
+rrcv$param_set$values = list(folds = 4, repeats = 2)
+rrcv$instantiate(task)
+
+# Individual sets:
+rrcv$train_set(1)
+rrcv$test_set(1)
+intersect(rrcv$train_set(1), rrcv$test_set(1))
+
+# Internal storage:
+rrcv$instance
+}
+\references{
+\cite{mlr3spatiotempcv}{valavi2018}
+}
+\section{Super class}{
+\code{\link[mlr3:Resampling]{mlr3::Resampling}} -> \code{ResamplingRepeatedSpCVEnv}
+}
+\section{Active bindings}{
+\if{html}{\out{
}}
+\describe{
+\item{\code{iters}}{\code{integer(1)}\cr
+Returns the number of resampling iterations, depending on the
+values stored in the \code{param_set}.}
+}
+\if{html}{\out{
}}
+}
+}
+\if{html}{\out{}}
+\if{html}{\out{}}
+\subsection{Method \code{instantiate()}}{
+Materializes fixed training and test splits for a given task.
+\subsection{Usage}{
+\if{html}{\out{
}}
+\describe{
+\item{\code{task}}{\link{Task}\cr
+A task to instantiate.}
+}
+\if{html}{\out{
}}
+}
+}
+\if{html}{\out{}}
+\if{html}{\out{}}
+\subsection{Method \code{clone()}}{
+The objects of this class are cloneable with this method.
+\subsection{Usage}{
+\if{html}{\out{