Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add minimum distance option to clhs #24

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ Imports: utils,
sp,
sf,
raster,
reshape2,
data.table,
plyr,
cluster,
Rcpp,
Expand All @@ -35,7 +35,7 @@ Suggests: knitr,
rmarkdown,
testthat
VignetteBuilder: knitr
RoxygenNote: 7.1.1
RoxygenNote: 7.2.3
Collate:
'RcppExports.R'
'clhs-internal.R'
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import(Rcpp)
import(RcppArmadillo)
import(ggplot2)
importFrom(cluster,daisy)
importFrom(data.table,melt)
importFrom(graphics,hist)
importFrom(grid,grid.layout)
importFrom(grid,grid.newpage)
Expand All @@ -27,7 +28,6 @@ importFrom(raster,nlayers)
importFrom(raster,raster)
importFrom(raster,rasterToPoints)
importFrom(raster,stack)
importFrom(reshape2,melt)
importFrom(sf,st_coordinates)
importFrom(sf,st_geometry_type)
importFrom(sf,st_set_geometry)
Expand Down
4 changes: 2 additions & 2 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#' @return list with sampled data, indices, objective values, cost value, and final continuous weights for each sample
NULL

CppLHS <- function(xA, cost, strata, include, idx, factors, i_fact, nsample, cost_mode, iter, wCont, wFact, wCorr, etaMat, temperature, tdecrease, length_cycle) {
.Call('_clhs_CppLHS', PACKAGE = 'clhs', xA, cost, strata, include, idx, factors, i_fact, nsample, cost_mode, iter, wCont, wFact, wCorr, etaMat, temperature, tdecrease, length_cycle)
CppLHS <- function(xA, cost, strata, latlon, include, latlon_inc, idx, factors, continuous, i_fact, nsample, min_dist, cost_mode, iter, wCont, wFact, wCorr, wDist, etaMat, temperature, tdecrease, length_cycle) {
.Call('_clhs_CppLHS', PACKAGE = 'clhs', xA, cost, strata, latlon, include, latlon_inc, idx, factors, continuous, i_fact, nsample, min_dist, cost_mode, iter, wCont, wFact, wCorr, wDist, etaMat, temperature, tdecrease, length_cycle)
}

36 changes: 30 additions & 6 deletions R/clhs-data.frame.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ clhs.data.frame <- function(
cost = NULL, # Number or name of the attribute used as a cost
iter = 10000, # Number of max iterations
use.cpp = TRUE, # use C++ code for metropolis-hasting loop?
latlon = NULL,
min.dist = NULL,
temp = 1, # initial temperature
tdecrease = 0.95, # temperature decrease rate
weights = list(numeric = 1, factor = 1, correlation = 1), # weight for continuous data , weight for correlation among data, weight for object data
weights = list(numeric = 1, factor = 1, correlation = 1, distance = 0.1), # weight for continuous data , weight for correlation among data, weight for object data
eta = 1,
obj.limit = -Inf, # Stopping criterion
length.cycle = 10, # Number of cycles done at each constant temperature value
Expand Down Expand Up @@ -67,7 +69,7 @@ clhs.data.frame <- function(
for(i in 1:ncol(data_factor)){
data_factor[,i] <- as.numeric(data_factor[,i])
}
data <- as.matrix(cbind(data_factor,data_continuous))
data <- as.matrix(cbind(data_factor,data_continuous))##here's the bug
factIdx <- 1:n_factor
ncont <- ncol(data_continuous)
} else {
Expand Down Expand Up @@ -96,24 +98,46 @@ clhs.data.frame <- function(
}
)

if(is.null(latlon)){
latlon <- matrix(c(0,0),nrow = 1, ncol = 2)
min.dist = 0
}else{
latlon <- as.matrix(latlon)
}

if(is.null(include)){
dat <- data
inc <- dat[0,]
ssize <- size
spat_inc <- matrix(c(0,0),nrow = 1, ncol = 2)
spat <- latlon
}else{
if(nrow(latlon) > 1){
spat <- latlon[-include,]
spat_inc <- latlon[include,,drop = FALSE]
}
idx <- 1:nrow(data)
idx_new <- idx[-include]
dat <- data[-include,]
inc <- data[include,,drop = FALSE] ##keep as matrix if just one row
ssize <- size - length(include)
can.include <- 1:nrow(dat)
}

areContinuous = TRUE
if(length(continuous_strata) == 0){
continuous_strata = matrix(c(0,0),nrow = 1, ncol = 2)
areContinuous = FALSE
}
can.include <- can.include-1 ##convert to zero based for C
res <- CppLHS(xA = dat, cost = costVec, strata = continuous_strata, include = inc, idx = can.include,
factors = areFactors, i_fact = factIdx-1, nsample = ssize, cost_mode = costFlag, iter = iter,
wCont = weights$numeric, wFact = weights$factor, wCorr = weights$correlation, etaMat = eMat,
res <- CppLHS(xA = dat, cost = costVec, strata = continuous_strata, latlon = spat, include = inc, latlon_inc = spat_inc, idx = can.include,
factors = areFactors, continuous = areContinuous, i_fact = factIdx-1, nsample = ssize, min_dist = min.dist, cost_mode = costFlag, iter = iter,
wCont = weights$numeric, wFact = weights$factor, wCorr = weights$correlation, wDist = weights$distance, etaMat = eMat,
temperature = temp, tdecrease = tdecrease, length_cycle = length.cycle)
res$index_samples <- res$index_samples + 1 ##fix indexing difference
if(!is.null(include)){
res$index_samples <- c(res$index_samples,include)
samples2 <- idx_new[res$index_samples]
res$index_samples <- c(samples2,include) ##this is the problem
}
res$sampled_data <- x[res$index_samples,]

Expand Down
5 changes: 4 additions & 1 deletion R/clhs.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
#' @param use.cpp TRUE or FALSE. If set to TRUE, annealing process uses C++ code.
#' This is ~ 150 times faster than the R version, but is less stable and currently
#' doesn't accept track or obj.limit parameters. Default to TRUE.
#' @param latlon A dataframe or matrix with two columns containing the spatial coordinates, if minimum distance between points is required. Default to NULL.
#' @param min.dist Numeric value of minimum distance between sample points.
#' @param temp The initial temperature at which the simulated annealing
#' begins. Defaults to 1.
#' @param tdecrease A number between 0 and 1, giving the rate at which
Expand Down Expand Up @@ -134,4 +136,5 @@
#'
#' @include clhs-data.frame.R
#' @export
clhs <- function(x, size, must.include, can.include, cost, iter, use.cpp, temp, tdecrease, weights, eta, obj.limit, length.cycle, simple, progress, track, use.coords, ...) UseMethod("clhs")
clhs <- function(x, size, must.include, can.include, cost, iter, use.cpp, latlon, min.dist, temp, tdecrease, weights, eta, obj.limit, length.cycle, simple, progress, track, use.coords, ...) UseMethod("clhs")

2 changes: 1 addition & 1 deletion R/plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#'
#'
#' @import ggplot2
#' @importFrom reshape2 melt
#' @importFrom data.table melt
#' @importFrom plyr dlply ddply
#' @importFrom grid pushViewport viewport grid.newpage grid.layout
#' @importFrom utils packageVersion
Expand Down
16 changes: 5 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
[![R build status](https://github.com/pierreroudier/clhs/workflows/R-CMD-check/badge.svg)](https://github.com/pierreroudier/clhs/actions)
[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/clhs)](https://cran.r-project.org/package=clhs)
[![Total_Downloads](http://cranlogs.r-pkg.org/badges/grand-total/clhs)](https://cran.r-project.org/package=clhs)
# clhs development

# clhs

A faster (C++) implementation of the conditioned Latin Hypercube Sampling method

## Scope
This repo is a fork of the main clhs package, used for development (especially of the C++ functions). The original C++ version is on CRAN, but there are multiple updates to the development version, including bug fixes and specification of minimum distance between points.

## Installation

The C++ method is not yet on CRAN.
The CRAN package currently contains a bug if the C++ version is used along with `must.include`.

You can install it using the `devtools` package to install `clhs`:
You can install the development version using the `devtools` package:

`devtools::install_github("pierreroudier/clhs")`
`devtools::install_github("kdaust/clhs")`
6 changes: 6 additions & 0 deletions man/clhs.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.