Skip to content

Commit

Permalink
Fix the index from index problem!
Browse files Browse the repository at this point in the history
Related to #51.

Might need to check code
  • Loading branch information
llrs committed May 7, 2024
1 parent 956bb21 commit a4f284e
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 51 deletions.
52 changes: 9 additions & 43 deletions R/indexing.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,53 +43,19 @@ create_index <- function(size_data, size_batches, n, name = "SubSet") {
}

# Shuffle sample within index to improve positioning
create_index4index <- function(index, size_subset, n, name) {
index_out <- vector("list", n)
names(index_out) <- id2batch_names(name, n)
create_index4index <- function(index, name) {

# Find if there are duplicates: aka controls
li <- table(unlist(index))
dups <- as.numeric(names(li)[li != 1])

duplicated_positions <- length(dups) > 1L
m <- matrix(data = NA, nrow = length(name), ncol = length(index),
dimnames = list(name, names(index)))

for (batch in seq_along(index)) {
pos <- index[[batch]]

# Store which are already filled in this batch
index_f <- NULL

# Put each position to the right new batch
for (position in pos) {
# Exclude position already filled in the batch
# Pick a batch from the new index to place the previous position
# Which hasn't been picked within the batch
i_lengths <- lengths(index_out)
names(i_lengths) <- NULL
# Pick from the ones that can be filled
pp <- which(i_lengths < size_subset)
pp <- setdiff(pp, index_f)

# If this sample is duplicated, exclude that position too
if (duplicated_positions) {
dups_out <- vapply(index_out, function(x){any(x %in% dups)}, logical(1L))
pp <- setdiff(pp, which(dups_out))
}

# Somehow sometimes there is an error with sample!
if (length(pp) < 1) {
warning("Some sample seem to be missing")
next
}
i_index <- sample(pp, 1)
index_f <- c(index_f, i_index)

# Add the position to the index at the right batch
index_out[[i_index]] <- c(index_out[[i_index]], position)
}
positions <- sample(index[[batch]])
rows <- sample(seq_along(positions))
m[rows, batch] <- positions
}

index_out
# Transform to a list format omitting the empty values
index_out <- apply(m, 1, function(x){x[!is.na(x)]}, simplify = FALSE)
index_out[lengths(index_out) != 0]
}

id2batch_names <- function(name, n) {
Expand Down
3 changes: 1 addition & 2 deletions R/spatial.R
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,9 @@ spatial <- function(index, pheno, omit = NULL, remove_positions = NULL, rows = L
eval_n <- evaluations(num)

n_positions <- length(position)
size_subset <- optimum_batches(sum(lengths(index)), n_positions)
for (j in seq_len(iterations)) {

i <- create_index4index(i2, size_subset, name = position, n = n_positions)
i <- create_index4index(i2, name = position)
meanDiff <- .check_index(i, pheno_o, num, eval_n, original_pheno)
# Minimize the value
optimize <- sum(rowMeans(abs(meanDiff)))
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-indexing.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ test_that("sizes_batches works", {

test_that("create_index4index works", {
i1 <- create_index(15, rep.int(5, 3), 3)
i2 <- create_index4index(i1, size_subset = 3, n = 5, name = "spatial")
i2 <- create_index4index(i1, paste0("spatial", 1:5))
bn1 <- batch_names(i1)
bn2 <- batch_names(i2)
expect_true(all(table(bn1, bn2) == 1))
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-inspect.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ test_that("inspect with translate_index", {
index2 <- spatial(index1, survey[-nas, columns], iterations = 25)
i1 <- inspect(index1, survey[-nas, columns])
i2 <- inspect(index2, i1, index_name = "spatial")
expect_true(all(table(i2$batch, i2$spatial)<= 1))
expect_true(all(table(i2$batch, i2$spatial) <= 1))
})

test_that("Warning on duplidate names", {
Expand Down
8 changes: 4 additions & 4 deletions tests/testthat/test-spatial.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@ test_that("spatial works", {
index2 <- spatial(index, survey[-nas, c("Sex", "Smoke", "Age")],
rows = LETTERS[1:9],
columns = 1:12, iterations = 25)
expect_length(index2, 9*12)
expect_equal(names(index2), position_name(rows = LETTERS[1:9], 1:12)$name)
expect_lte(length(index2), 9*12)
expect_true(all(names(index2) %in% position_name(rows = LETTERS[1:9], 1:12)$name))
expect_false(any(table(batch_names(index), batch_names(index2)) > 1))
})

Expand All @@ -19,9 +19,9 @@ test_that("spatial works with replicates", {
index <- replicates(survey[-nas, c("Sex", "Smoke", "Age")], size_subset = 50,
iterations = 25, controls = 15)
index2 <- spatial(index, survey[-nas, c("Sex", "Smoke", "Age")], iterations = 25)
expect_length(index2, 50)
expect_lte(length(index2), 50)
expect_false(any(is.na(unlist(index2))))
expect_equal(names(index2), position_name(rows = LETTERS[1:5], 1:10)$name)
expect_true(all(names(index2) %in% position_name(rows = LETTERS[1:5], 1:10)$name))
expect_true(all(sort(unlist(index2)) == seq_len(325)))
})

Expand Down

0 comments on commit a4f284e

Please sign in to comment.