Skip to content

Commit

Permalink
Fixes in easy cases the spatial distribution of samples
Browse files Browse the repository at this point in the history
  • Loading branch information
llrs committed Dec 6, 2023
1 parent eeb3d73 commit faafe91
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 30 deletions.
30 changes: 23 additions & 7 deletions R/indexing.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,22 +33,38 @@ create_subset <- function(size_data, size_subset = NULL, n = NULL, name = "SubSe
}

# The workhorse function without any check
# size_batches is a vector with the number of elements in each batch.
create_index <- function(size_data, size_batches, n, name = "SubSet") {
# The size of each batch
stopifnot("Batches match the length" = length(size_batches) == n)
i <- distribute_samples(size_data, size_batches)
names(i) <- id2batch_names(name, n)
i
}

# Shuffle sample within index to improve positioning
create_index4index <- function(index, n, name) {
i <- vector("list", length = n)
names(i) <- id2batch_names(name, n)
for (id in seq_along(index)){
s <- sample(index[[id]])
names(i)[s]
create_index4index <- function(index, size_subset, n, name) {
index_out <- vector("list", n)
names(index_out) <- id2batch_names(name, n)
for (batch in seq_along(index)) {
pos <- index[[batch]]
# Pick a batch from the new index to place the previous position
# Which hasn't been picked within the batch
i_lengths <- lengths(index_out)

# Pick from the ones that are almost filled: to ensure that the batches are fully used.
i_lengths <- i_lengths[order(size_subset - i_lengths, decreasing = TRUE)]
batch_w_space <- i_lengths < size_subset
possible_positions <- which(batch_w_space)[seq_len(length(pos))]

# Sample the positions of the new index for each current position in the index
index_i <- sample(possible_positions, length(pos))
for (position in seq_along(pos)) {
index_out[[index_i[position]]] <- c(index_out[[index_i[position]]],
pos[position])
}
}
i
index_out
}

id2batch_names <- function(name, n) {
Expand Down
22 changes: 7 additions & 15 deletions R/spatial.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,22 +67,13 @@ spatial <- function(index, pheno, omit = NULL, remove_positions = NULL, rows = L

# Use index to duplicate samples in case the index comes from replicates.
pheno_o <- pheno_o[unlist(index), ]
old_rows <- round(as.numeric(rownames(pheno_o)))
rownames(pheno_o) <- NULL
new_rows <- as.numeric(rownames(pheno_o))
batches <- length(position)
# size_batches <- internal_batches(size_data, size_subset, batches)
i0 <- 0L
while (iterations > 0L) {
i <- create_index4index(index, batches, position)
# i <- create_index(size_data, size_batches, batches, name = position)
i0 <- i0 + 1L
message("Try ", i0, " iterations ", iterations)
if (!any(table(spatial = batch_names(i), batch = batch_names(index)) > 1L)) {
iterations <- iterations - 1
} else {
next
}
size_subset <- optimum_batches(sum(lengths(index)), batches)
for (j in seq_len(iterations)) {

i <- create_index4index(index, size_subset, name = position, n = batches)

meanDiff <- .check_index(i, pheno_o, num, eval_n, original_pheno)
# Minimize the value
optimize <- sum(rowMeans(abs(meanDiff)))
Expand All @@ -93,5 +84,6 @@ spatial <- function(index, pheno, omit = NULL, remove_positions = NULL, rows = L
val <- i
}
}
translate_index(val, old_rows, new_rows)

val
}
33 changes: 25 additions & 8 deletions tests/testthat/test-indexing.R
Original file line number Diff line number Diff line change
@@ -1,8 +1,25 @@

test_that("sizes_batches works", {
out <- sizes_batches(size_data = 237, size_subset = 60, batches = 4)
expect_equal(out, c(60, 59, 59, 59))

expect_error(sizes_batches(size_data = 237, size_subset = 59, batches = 4),
"batches or size_subset is too small to fit all the samples.")
})
test_that("sizes_batches works", {
out <- sizes_batches(size_data = 237, size_subset = 60, batches = 4)
expect_equal(out, c(60, 59, 59, 59))

expect_error(sizes_batches(size_data = 237, size_subset = 59, batches = 4),
"batches or size_subset is too small to fit all the samples.")
})

test_that("create_index4index works", {
i1 <- create_index(15, rep.int(5, 3), 3)
i2 <- create_index4index(i1, size_subset = 3, n = 5, name = "spatial")
bn1 <- batch_names(i1)
bn2 <- batch_names(i2)
expect_true(all(table(bn1, bn2) == 1))
})


test_that("translate_index works", {
index <- create_index(45, rep.int(9, 5), 5)
old_rows <- seq_len(47)[-c(2, 7)]
new_rows <- seq_len(45)
ti <- translate_index(index, old_rows, new_rows)
expect_true(all(c(2, 7) %in% unlist(index, FALSE, FALSE)))
expect_false(all(c(2, 7) %in% unlist(ti, FALSE, FALSE)))
})

0 comments on commit faafe91

Please sign in to comment.