Skip to content

Commit

Permalink
Merge pull request #2 from BenjaminLouis/include
Browse files Browse the repository at this point in the history
Include
  • Loading branch information
BenjaminLouis committed Oct 4, 2018
2 parents c4f197d + 700f96e commit 2dd3f51
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 17 deletions.
32 changes: 21 additions & 11 deletions R/clhs-data.frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
clhs.data.frame <- function(
x, # data.frame
size, # Number of samples you want
include = NULL, # row index of data that must be in the final sample
cost = NULL, # Number or name of the attribute used as a cost
iter = 10000, # Number of max iterations
temp = 1, # initial temperature
Expand Down Expand Up @@ -62,6 +63,11 @@ clhs.data.frame <- function(
# Remove cost attribute from attribute table
x <- x[, -1*i_cost, drop = FALSE]

# Si include, cost is 0
if (!is.null(include)) {
cost[include, ] <- 0
}

# Flags
cost_mode <- TRUE
track_mode <- FALSE # cost is taken into account, therefore computed
Expand All @@ -76,7 +82,7 @@ clhs.data.frame <- function(
data_factor <- x[, i_factor, drop = FALSE]
# Creating a list storing the levels of each factor
factor_levels <- apply(data_factor, 2, function(x) {
ifelse(is.factor(x), res <- levels(x), res <-levels(factor(x)))
ifelse(is.factor(x), res <- levels(x), res <- levels(factor(x)))
res}
)
} else {
Expand All @@ -102,9 +108,13 @@ clhs.data.frame <- function(
if (n_factor == 0) data_factor_sampled <- data.frame()
else factor_obj <- alply(data_factor, 2, function(x) table(x)/n_data)

# Mandatory data in the sample
sampled_size <- size - length(include)
not_included <- setdiff(1:n_data, include)

# initialise, pick randomly
n_remainings <- n_data - size # number of individuals remaining unsampled
i_sampled <- sample(1:n_data, size = size, replace = FALSE) # individuals randomly chosen
i_sampled <- c(sample(not_included, size = sampled_size, replace = FALSE), include) # individuals randomly chosen
i_unsampled <- setdiff(1:n_data, i_sampled) # individuals remaining unsampled
data_continuous_sampled <- data_continuous[i_sampled, , drop = FALSE] # sampled continuous data

Expand Down Expand Up @@ -145,11 +155,11 @@ clhs.data.frame <- function(

if (runif(1) < 0.5) {
# pick a random sampled point and random unsampled point and swap them
idx_removed <- sample(1:length(i_sampled), size = 1, replace = FALSE)
spl_removed <- i_sampled[idx_removed]
idx_removed <- sample(1:length(setdiff(i_sampled, include)), size = 1, replace = FALSE)
spl_removed <- setdiff(i_sampled, include)[idx_removed]
idx_added <- sample(1:length(i_unsampled), size = 1, replace = FALSE)
i_sampled <- i_sampled[-idx_removed]
i_sampled <- c(i_sampled, i_unsampled[idx_added])
i_sampled <- setdiff(i_sampled, include)[-idx_removed]
i_sampled <- c(i_sampled, i_unsampled[idx_added], include)
i_unsampled <- i_unsampled[-idx_added]
i_unsampled <- c(i_unsampled, spl_removed)

Expand All @@ -163,16 +173,16 @@ clhs.data.frame <- function(
}
else {
# remove the worse sampled & resample
worse <- max(delta_obj_continuous)
i_worse <- which(delta_obj_continuous == worse)
worse <- max(delta_obj_continuous[!i_sampled %in% include])
i_worse <- which(delta_obj_continuous[!i_sampled %in% include] == worse)
# If there's more than one worse candidate, we pick one at random
if (length(i_worse) > 1) i_worse <- sample(i_worse, size = 1)

# swap with reservoir
spl_removed <- i_sampled[i_worse] # will be removed from the sampled set.
spl_removed <- setdiff(i_sampled, include)[i_worse] # will be removed from the sampled set.
idx_added <- sample(1:n_remainings, size = 1, replace = FALSE) # new candidate that will take their place
i_sampled <- i_sampled[-i_worse]
i_sampled <- c(i_sampled, i_unsampled[idx_added])
i_sampled <- setdiff(i_sampled, include)[-i_worse]
i_sampled <- c(i_sampled, i_unsampled[idx_added], include)
i_unsampled <- i_unsampled[-idx_added]
i_unsampled <- c(i_unsampled, spl_removed)

Expand Down
6 changes: 5 additions & 1 deletion R/clhs.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
#' @param x A \code{data.frame}, \code{SpatialPointsDataFrame} or \code{Raster}
#' object.
#' @param size A non-negative integer giving the number of samples to pick.
#' @param include Vector of row indexes of data from \code{x} that must be
#' included in the final sample. For the cost-constrained cLHS method, cost of
#' these mandatory samples is set to 0. If NULL (default), all data are randomly
#' choosen according to the classic cLHS method.
#' @param cost A character giving the name or an integer giving the index of
#' the attribute in \code{x} that gives a cost that can be use to constrain the
#' cLHS sampling. If NULL (default), the cost-constrained implementation is not
Expand Down Expand Up @@ -110,4 +114,4 @@
#'
#' @include clhs-data.frame.R
#' @export
clhs <- function(x, size, cost, iter, temp, tdecrease, weights, eta, obj.limit, length.cycle, simple, progress, track) UseMethod("clhs")
clhs <- function(x, size, include, cost, iter, temp, tdecrease, weights, eta, obj.limit, length.cycle, simple, progress, track) UseMethod("clhs")
14 changes: 9 additions & 5 deletions man/clhs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 2dd3f51

Please sign in to comment.