Skip to content

Commit

Permalink
Preparation for CRAN-submission (v0.3.8)
Browse files Browse the repository at this point in the history
  • Loading branch information
sjentsch committed Jul 14, 2023
1 parent 6fffda2 commit 5263a2f
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 12 deletions.
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* adjustments to make `jmvReadWrite` more suited for using it together with the jamovi-module `jTransform` (https://github.com/sjentsch/jTransform)
* implement reading both data frames or file names for merge_cols_omv and merge_rows_omv (incl. phasing out fleInp as parameter for the helper functions: it now throws an error to prevent using it)
* initial handling of weights in `read_omv` and `write_omv`
* correction / bug fix for `long2wide_omv` (the original bug led to incorrect transformations when transforming complex data sets)
* improvements and corrections for `long2wide_omv` (added aggregation, mean or take first, and fixed a bug that led to incorrect naming of variables when transforming complex data sets)
* improved unit tests (implementation of regular expressions for expect_error and expect_warning, bug-fixes and additional coverage)
* improvements to setting attributes (`setAtt`, e.g., from the metadata to the data frame and vice versa)
* improved handling of factors with numerical values (measureType Nominal or Ordinal and dataType Integer)
Expand Down
17 changes: 9 additions & 8 deletions R/long2wide_omv.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#' @param varTgt Names of one or more variables to be transformed / reshaped (other variables are excluded, if empty(c()) all variables except varTme, varID and varExc are included; default: c())
#' @param varSep Separator character when concatenating the fixed and time-varying part of the variable name ("VAR1_1", "VAR1_2"; default: "_")
#' @param varOrd How variables / columns are organized: for "times" (default) the steps of the time varying variable are adjacent, for "vars" the steps of the original columns in the long dataset
#' @param varAgg How multiple occurrences of particular combinations of time varying variables are aggregated: either "mean" (calculate the mean over occurrences), or "first" (take the first occurrence)
#' @param varSrt Variable(s) that are used to sort the data frame (see Details; if empty, the order returned from reshape is kept; default: c())
#' @param usePkg Name of the package: "foreign" or "haven" that shall be used to read SPSS, Stata and SAS files; "foreign" is the default (it comes with base R), but "haven" is newer and more comprehensive
#' @param selSet Name of the data set that is to be selected from the workspace (only applies when reading .RData-files)
Expand Down Expand Up @@ -105,7 +106,7 @@ long2wide_omv <- function(dtaInp = NULL, fleOut = "", varID = "ID", varTme = c()
# [b] store the original variable labels, the original time-varying / target variable,
# and an empty vector for storing labels
lstLbl <- list(orgLbl = sapply(dtaFrm, attr, "jmv-desc"), orgTgt = varTgt)

# [c] there might be several occurrences for each combination of varID and varTme; aggregate them
dtaFrm <- aggDta(dtaFrm = dtaFrm, varAgg = varAgg, varID = varID, varTme = varTme, varExc = varExc, varTgt = varTgt)

Expand Down Expand Up @@ -148,16 +149,16 @@ aggDta <- function(dtaFrm = NULL, varAgg = "", varID = c(), varTme = c(), varExc
# if there exists only one occurence of each possible combination of the variables in varID and
# varTme, the data don't need to be aggregated, just return the data frame with the relevant
# columns selected
if (!any(aggregate(dtaFrm[, varTgt[1]], by = dtaFrm[, c(varID, varTme)], FUN = length)[["x"]] > 1)) {
if (!any(stats::aggregate(dtaFrm[, varTgt[1]], by = dtaFrm[, c(varID, varTme)], FUN = length)[["x"]] > 1)) {
dtaFrm[, c(varID, varTme, varExc, varTgt)]
# otherwise (with more than one occurence), values are aggregate at each possible combination of the
# variables in varID and varTme
} else if (varAgg == "first") {
# [1] if "first" is chosen as aggregation function, the first occurence at each step is returned
aggregate(x = dtaFrm[, c(varTgt, varExc), drop = FALSE], by = dtaFrm[, c(varID, varTme), drop = FALSE], FUN = "[[", 1)
stats::aggregate(x = dtaFrm[, c(varTgt, varExc), drop = FALSE], by = dtaFrm[, c(varID, varTme), drop = FALSE], FUN = "[[", 1)
} else if (varAgg == "mean") {
# [2] if "mean" is chosen as aggregation function, it becomes (a little) more complicated
# [a] the target variables (for which the mean is calculated) should be numeric
# [a] the target variables (for which the mean is calculated) should be numeric
if (!all(sapply(dtaFrm[, varTgt], is.numeric))) {
stop(paste("In order to calculate the mean when aggregating the data, all target variables (varTgt) need to be numeric. Use varAgg = \"first\" instead",
"(to use the first occuring value) or convert the target variables to numeric."))
Expand All @@ -170,12 +171,12 @@ aggDta <- function(dtaFrm = NULL, varAgg = "", varID = c(), varTme = c(), varExc
# participant [ID]); finally the results from the two aggregate-functions are merged again
# to return the complete data set
if (length(varExc) > 0) {
merge(aggregate(x = dtaFrm[, c(varTgt), drop = FALSE], by = dtaFrm[, c(varID, varTme), drop = FALSE], FUN = mean),
aggregate(x = dtaFrm[, c(varExc), drop = FALSE], by = dtaFrm[, c(varID, varTme), drop = FALSE], FUN = "[[", 1))
merge(stats::aggregate(x = dtaFrm[, c(varTgt), drop = FALSE], by = dtaFrm[, c(varID, varTme), drop = FALSE], FUN = mean),
stats::aggregate(x = dtaFrm[, c(varExc), drop = FALSE], by = dtaFrm[, c(varID, varTme), drop = FALSE], FUN = "[[", 1))
# [c] if there is no “excluded” variable, the mean is calculated for the target variables
# at each possible combination of the variables varID and varTme
} else {
aggregate(x = dtaFrm[, c(varTgt), drop = FALSE], by = dtaFrm[, c(varID, varTme), drop = FALSE], FUN = mean)
stats::aggregate(x = dtaFrm[, c(varTgt), drop = FALSE], by = dtaFrm[, c(varID, varTme), drop = FALSE], FUN = mean)
}
}
}
Expand All @@ -191,7 +192,7 @@ rstLbl <- function(dtaFrm = NULL, lstLbl = list(), varTgt = c(), varTme = c(), v
attr(dtaFrm[[varTgt[i]]], "jmv-desc") <-
sprintf("%s (%s)", lstLbl$orgLbl[[crrNme]], paste0(apply(rbind(varTme, splTgt[[i]][-1]), 2, paste0, collapse = ": "), collapse = ", "))
}
}
}
}
}

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,9 +198,9 @@ list.files(".", "Trial.omv")
#> [1] "Trial.omv"
file.info("Trial.omv")
#> size isdir mode mtime ctime
#> Trial.omv 1573 FALSE 664 2023-07-13 17:04:11 2023-07-13 17:04:11
#> Trial.omv 1573 FALSE 664 2023-07-14 21:45:22 2023-07-14 21:45:22
#> atime uid gid uname grname
#> Trial.omv 2023-07-13 17:04:11 1000 1000 sjentsch sjentsch
#> Trial.omv 2023-07-14 21:45:22 1000 1000 sjentsch sjentsch
unlink("Trial.omv")
```

Expand Down
3 changes: 3 additions & 0 deletions man/long2wide_omv.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion tests/testthat/test-long2wide_omv.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ test_that("long2wide_omv works", {
expect_equal(unname(as.matrix(df4Chk[3:8])), cbind(matrix(avgTmp[, "rspCrr"], ncol = 3), matrix(avgTmp[, "rspTme"], ncol = 3)))
expect_equal(unname(unlist(sapply(df4Chk, attr, "jmv-desc"))), c(unname(unlist(lblTmp[1:2])), sprintf("%s (cond: %s)",
rep(unname(unlist(lblTmp[6:7])), each = 3), rep(c("cong", "incong", "neutral"), times = 2))))

dtaTmp$rspCrr <- as.factor(dtaTmp$rspCrr)
expect_error(long2wide_omv(dtaInp = dtaTmp, varID = "ID", varTme = "cond", varTgt = c("rspCrr", "rspTme"), varExc = "sex"),
regexp = "^In order to calculate the mean when aggregating the data, all target variables \\(varTgt\\) need to be numeric\\.")
Expand Down

0 comments on commit 5263a2f

Please sign in to comment.