Skip to content

Commit

Permalink
Identify columns with the same name, and either delete them (if they …
Browse files Browse the repository at this point in the history
…have the same content) or rename them and copy their attributes (+ respective unit tests)
  • Loading branch information
sjentsch committed Nov 18, 2023
1 parent 83c4a6a commit fb79c34
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 1 deletion.
19 changes: 18 additions & 1 deletion R/merge_cols_omv.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ merge_cols_omv <- function(dtaInp = NULL, fleOut = "", typMrg = c("outer", "inne
# store attributes and remove empty lines from the data sets
attCol <- list()
for (i in seq_along(dtaFrm)) {
attCol <- c(attCol, sapply(dtaFrm[[i]][, setdiff(names(dtaFrm[[i]]), names(attCol))], attributes))
attCol <- c(attCol, sapply(dtaFrm[[i]][, setdiff(names(dtaFrm[[i]]), names(attCol))], attributes, simplify = FALSE))
dtaFrm[[i]] <- dtaFrm[[i]][!apply(is.na(dtaFrm[[i]]), 1, all), ]
}
attDF <- attributes(dtaFrm[[1]])
Expand All @@ -108,6 +108,23 @@ merge_cols_omv <- function(dtaInp = NULL, fleOut = "", typMrg = c("outer", "inne
tmpMrg <- dtaFrm[[1]]
for (i in setdiff(seq_along(dtaFrm), 1)) {
tmpMrg <- do.call(merge, c(list(x = tmpMrg, y = dtaFrm[[i]], by.x = varBy[[1]], by.y = varBy[[i]]), crrArg[!grepl("^x$|^y$|^by.x$|^by.y$", names(crrArg))]))
# if there are duplicate columns (i.e., columns with the same name in two of the input data sets), unify them
for (unfClm in setdiff(names(attCol), names(tmpMrg))) {
dplClm <- grep(paste0(unfClm, "\\."), names(tmpMrg))
if (length(dplClm) == 0) next
names(tmpMrg)[dplClm[1]] <- unfClm
rmvClm <- NULL
for (i in seq(2, length(dplClm))) {
if (identical(tmpMrg[, dplClm[1]], tmpMrg[, dplClm[i]])) {
rmvClm <- c(rmvClm, -dplClm[i])
} else {
addClm <- sprintf("%s_%d", unfClm, sum(grepl(paste0(unfClm, "_"), names(tmpMrg))) + 2)
names(tmpMrg)[dplClm[i]] <- addClm
attCol[[addClm]] <- attCol[[unfClm]]
}
}
if (length(rmvClm) > 0) tmpMrg <- tmpMrg[, rmvClm]
}
}
dtaFrm <- tmpMrg

Expand Down
31 changes: 31 additions & 0 deletions tests/testthat/test-merge_cols_omv.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,37 @@ test_that("merge_cols_omv works", {
dtaFrm <- merge_cols_omv(dtaInp = nmeInp[-2], typMrg = "inner", varBy = "ID", varSrt = c("gender_3", "age_3"))
expect_s3_class(dtaFrm, "data.frame")
expect_equal(dim(dtaFrm), c(245, 57))
unlink(nmeInp)

nmeInp <- vector(mode = "character", length = 3)
dtaTmp <- jmvReadWrite::bfi_sample2
for (i in seq_along(nmeInp)) {
nmeInp[i] <- tempfile(fileext = ".rds")
saveRDS(dtaTmp, nmeInp[i])
}
dtaFrm <- merge_cols_omv(dtaInp = nmeInp, typMrg = "outer", varBy = "ID", varSrt = c("ID"))
expect_s3_class(dtaFrm, "data.frame")
expect_equal(dim(dtaFrm), c(250, 29))
expect_true(all(dtaFrm == dtaTmp[order(dtaTmp[, "ID"]), ]))
unlink(nmeInp)

nmeInp <- vector(mode = "character", length = 5)
dtaTmp <- jmvReadWrite::bfi_sample2
for (i in seq_along(nmeInp)) {
nmeInp[i] <- tempfile(fileext = ".rds")
strAtt <- attributes(dtaTmp[, "age"])
dtaTmp[, "age"] <- dtaTmp[sample(nrow(dtaTmp)), "age"]
attributes(dtaTmp[, "age"]) <- strAtt
saveRDS(dtaTmp, nmeInp[i])
}
dtaFrm <- merge_cols_omv(dtaInp = nmeInp, typMrg = "outer", varBy = "ID", varSrt = c("ID"))
dplClm <- gsub("age_1", "age", paste0("age_", seq(5)))
expect_s3_class(dtaFrm, "data.frame")
expect_equal(dim(dtaFrm), c(250, 33))
expect_true(all(dplClm %in% names(dtaFrm)))
expect_true(all(apply(sapply(dtaFrm[, dplClm], sort), 1, diff) == 0))
expect_true(all(diff(colMeans(dtaFrm[, dplClm])) == 0))
expect_true(all(sapply(dtaFrm[, dplClm], attributes) == "Age of the respondent (years)"))

# test cases for code coverage ============================================================================================================================
expect_error(merge_cols_omv(fleInp = nmeInp, typMrg = "outer", varBy = "ID"), regexp = "Please use the argument dtaInp instead of fleInp\\.")
Expand Down

0 comments on commit fb79c34

Please sign in to comment.