Skip to content

Commit

Permalink
Merge pull request #48 from reconhub/fix-default-warning
Browse files Browse the repository at this point in the history
Fix default warning
  • Loading branch information
zkamvar committed Mar 1, 2019
2 parents 8d527ae + 4cb86f0 commit 6d10845
Show file tree
Hide file tree
Showing 13 changed files with 192 additions and 18 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: linelist
Title: Tools to Import and Tidy Case Linelist Data
Version: 0.8.0.9000
Version: 0.8.1.9000
Authors@R: c(person("Thibaut", "Jombart", email = "thibautjombart@gmail.com", role = c("aut", "cre")),
person("Zhian N.", "Kamvar", email = "zkamvar@gmail.com", role = c("aut")))
Description: A collection of wrappers for importing case linelist data from usual formats, and tools for cleaning data, detecting dates, and storing meta-information on the content of the resulting \code{data.frame}.
Expand Down
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# linelist 0.8.1.9000

* `clean_spelling()` will no longer throw a warning if there is no value for
.default to replace.
* `clean_variable_spelling()`, `clean_variables()`, and `clean_data()` gain the
`warn` and `warn_spelling` arguments which will capture all errors and
warnings issued from `clean_spelling()` for each variable.
See https://github.com/reconhub/linelist/pull/48 for details).

# linelist 0.8.0.9000

* `compare_data()` allows users to compare _structural changes_ to data frames
Expand Down
6 changes: 4 additions & 2 deletions R/clean_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,9 @@


clean_data <- function(x, sep = "_", force_Date = TRUE, guess_dates = TRUE,
error_tolerance = 0.5, wordlists = NULL, spelling_vars = 3,
sort_by = NULL, protect = FALSE, ...) {
error_tolerance = 0.5, wordlists = NULL,
spelling_vars = 3, sort_by = NULL, warn_spelling = FALSE,
protect = FALSE, ...) {

xname <- deparse(substitute(x))
if (!is.data.frame(x)) {
Expand All @@ -100,6 +101,7 @@ clean_data <- function(x, sep = "_", force_Date = TRUE, guess_dates = TRUE,
wordlists = wordlists,
spelling_vars = spelling_vars,
sort_by = sort_by,
warn_spelling = warn_spelling,
classes = classes)

# Cleaning and guessing dates ------------------------------------------------
Expand Down
22 changes: 20 additions & 2 deletions R/clean_spelling.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
#' @param quiet a `logical` indicating if warnings should be issued if no
#' replacement is made; if `FALSE`, these warnings will be disabled
#'
#' @param warn_default a `logical`. When a `.default` keyword is set and
#' `warn_default = TRUE`, a warning will be issued listing the variables
#' that were changed to the default value. This can be used to update your
#' wordlist.
#'
#'
#' @details
#'
Expand Down Expand Up @@ -57,8 +62,13 @@
#' # You can also set a default value
#' corrections_with_default <- rbind(corrections, c(bad = ".default", good = "unknown"))
#' corrections_with_default
#'
#' # a warning will be issued about the data that were converted
#' clean_spelling(my_data, corrections_with_default)
#'
#' # use the warn_default = FALSE, if you are absolutely sure you don't want it.
#' clean_spelling(my_data, corrections_with_default, warn_default = FALSE)
#'
#' # The function will give you a warning if the wordlist does not
#' # match the data
#' clean_spelling(letters, corrections)
Expand All @@ -76,7 +86,7 @@
#' @importFrom rlang "!!!"

clean_spelling <- function(x = character(), wordlist = data.frame(),
quiet = FALSE) {
quiet = FALSE, warn_default = TRUE) {

if (length(x) == 0 || !is.atomic(x)) {
stop("x must be coerceable to a character")
Expand Down Expand Up @@ -176,7 +186,15 @@ clean_spelling <- function(x = character(), wordlist = data.frame(),

# Replace any untranslated variables if .default is defined -----------------
if (length(default) > 0) {
x <- forcats::fct_other(x, keep = c(names(dict), names(nas)), other = names(default))
default_vars <- levels(x)[!levels(x) %in% c(names(dict), names(nas))]
if (warn_default && length(default_vars) > 0) {
was <- if (length(default_vars) > 1) "were" else "was"
msg <- "'%s' %s changed to the default value ('%s')"
warning(sprintf(msg, paste(default_vars, collapse = "', '"), was, names(default)))
}
suppressWarnings({
x <- forcats::fct_other(x, keep = c(names(dict), names(nas)), other = names(default))
})
}

# Make sure order is preserved if it's a factor -----------------------------
Expand Down
31 changes: 26 additions & 5 deletions R/clean_variable_spelling.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
#' each data frame. If the incoming variables are factors, this determines how
#' the resulting factors will be sorted.
#'
#' @param warn if `TRUE`, warnings and errors from [clean_spelling()] will be
#' shown as a single warning. Defaults to `FALSE`, which shows nothing.
#'
#' @inheritParams clean_variable_labels
#'
#'
Expand Down Expand Up @@ -131,7 +134,7 @@
#' head(res)
#' as.list(head(res))

clean_variable_spelling <- function(x = data.frame(), wordlists = list(), spelling_vars = 3, sort_by = NULL, classes = NULL) {
clean_variable_spelling <- function(x = data.frame(), wordlists = list(), spelling_vars = 3, sort_by = NULL, classes = NULL, warn = FALSE) {

if (length(x) == 0 || !is.data.frame(x)) {
stop("x must be a data frame")
Expand All @@ -154,11 +157,11 @@ clean_variable_spelling <- function(x = data.frame(), wordlists = list(), spelli
# There is a spelling_varsing column ----------------------------------------
if (!is.null(spelling_vars) && length(spelling_vars) == 1) {
is_number <- is.numeric(spelling_vars) && # spelling_vars is a number
as.integer(spelling_vars) == spelling_vars && # ... and an integer
spelling_vars <= ncol(wordlists) # ... and is within the bounds
as.integer(spelling_vars) == spelling_vars && # ... and an integer
spelling_vars <= ncol(wordlists) # ... and is within the bounds

is_name <- is.character(spelling_vars) && # spelling_vars is a name
any(names(wordlists) == spelling_vars) # ... in the wordlists
any(names(wordlists) == spelling_vars) # ... in the wordlists
if (is_number || is_name) {
wordlists <- split(wordlists, wordlists[[spelling_vars]])
} else {
Expand Down Expand Up @@ -223,11 +226,29 @@ clean_variable_spelling <- function(x = data.frame(), wordlists = list(), spelli
to_iterate <- unique(c(to_iterate, unprotected))
}
}

# Prepare warning/error labels ---------------------------------------------
warns <- vector(mode = "list", length = length(to_iterate)) -> errs
iter_print <- gsub(" ", "_", format(to_iterate))
names(iter_print) <- to_iterate

# Loop over the variables and clean spelling --------------------------------
for (i in to_iterate) {
d <- if (ddf) wordlists else wordlists[[i]]
d <- if (is.null(d)) global_words else d
try(x[[i]] <- clean_spelling(x[[i]], d, quiet = TRUE))
# Evaluate and collect any warnings/errors that pop up
w <- withWarnings(clean_spelling(x[[i]], d, quiet = FALSE))
x[[i]] <- if(is.null(w$val)) x[[i]] else w$val
if (warn) {
warns[[i]] <- collect_ya_errs(w$warnings, iter_print[i])
errs[[i]] <- collect_ya_errs(w$errors, iter_print[i])
}
}

# Process warnings and errors and give a warning if there were any
if (warn) {
wemsg <- process_werrors(warns, errs)
if (!is.null(wemsg)) warning(wemsg)
}

x
Expand Down
10 changes: 9 additions & 1 deletion R/clean_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,17 @@
#' @param protect a logical or numeric vector defining the columns to protect
#' from any manipulation. Note: columns in `protect` will override any columns
#' in either `force_Date` or `guess_dates`.
#'
#' @param warn_spelling if `TRUE`, errors and warnings from [clean_spelling()]
#' will be aggregated and presented for each column that issues them. The
#' default value is `FALSE`, which means that all errors and warnings will be
#' ignored.
#'
#' @export
#' @author Zhian N. Kamvar
#' @seealso [clean_variable_labels()] to standardise text,
#' [clean_variable_spelling()] to correct spelling with a wordlist.
#'
#' @inheritParams clean_variable_spelling
#'
#' @examples
Expand Down Expand Up @@ -45,7 +51,8 @@
#' spelling_vars = "variable"
#' )
clean_variables <- function(x, sep = "_", wordlists = NULL, spelling_vars = 3,
sort_by = NULL, protect = FALSE, classes = NULL) {
sort_by = NULL, protect = FALSE, classes = NULL,
warn_spelling = FALSE) {

xname <- deparse(substitute(x))
if (!is.data.frame(x)) {
Expand All @@ -71,6 +78,7 @@ clean_variables <- function(x, sep = "_", wordlists = NULL, spelling_vars = 3,
wordlists = wordlists,
spelling_vars = spelling_vars,
sort_by = sort_by,
warn = warn_spelling,
classes = classes)
}

Expand Down
45 changes: 45 additions & 0 deletions R/warnings_errors.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@

withWarnings <- function(expr) {
myWarnings <- NULL
myErrors <- NULL
wHandler <- function(w) {
myWarnings <<- c(myWarnings, list(w))
invokeRestart("muffleWarning")
}
eHandler <- function(e) {
myErrors <<- c(myErrors, list(e))
NULL
}
val <- withCallingHandlers(tryCatch(expr, error = eHandler), warning = wHandler)
list(value = val, warnings = myWarnings, errors = myErrors)
}

collect_ya_errs <- function(e, fmt) {
if (is.null(e)) return(NULL)
warn <- vapply(e, "[[", character(1), "message")
warn <- paste0(" ", warn, collapse = "\n ....")
paste(sprintf(" %s__:\n ....%s", fmt, warn), collapse = "\n")
}

process_werrors <- function(warns, errs) {
warns <- warns[lengths(warns) > 0]
errs <- errs[lengths(errs) > 0]
warned <- length(warns) > 0
errored <- length(errs) > 0
if (warned || errored) {

wrn <- "" -> err
if (warned) {
wngs <- do.call("paste", c(warns, sep = "\n"))
wrn <- sprintf("The following warnings were found...\n%s", wngs)
}
if (errored) {
errs <- do.call("paste", c(errs, sep = "\n"))
err <- sprintf("The following errors were found:\n%s", errs)
}
res <- sprintf("%s\n%s", wrn, err)
if (res == "\n") NULL else res
} else {
NULL
}
}
7 changes: 6 additions & 1 deletion man/clean_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion man/clean_spelling.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/clean_variable_spelling.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion man/clean_variables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 18 additions & 3 deletions tests/testthat/test-clean_spelling.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,25 @@ test_that("clean_spelling will throw a warning if there are duplicated keys", {

test_that("clean_spelling will take in a default value", {

with_default <- rbind(corrections, c(bad = ".default", good = "unknown"))
with_default <- rbind(corrections, c(bad = ".default", good = "check me"))
cleaned_default <- cleaned_data
cleaned_default[!cleaned_default %in% with_default$good] <- "unknown"
expect_identical(clean_spelling(my_data, with_default), cleaned_default)
cleaned_default[!cleaned_default %in% with_default$good] <- "check me"
d_warn <- "'a', 'b', 'c', 'd', 'e', 'fumar' were changed to the default value \\('check me'\\)"
expect_warning({
my_cleaned <- clean_spelling(my_data, with_default)
}, d_warn)
expect_identical(my_cleaned, cleaned_default)

})


test_that("nothing to default will not throw a warning", {


with_default <- rbind(corrections, c(bad = ".default", good = "unknown"))
cleaned_default <- cleaned_data[6:7]
expect_failure(expect_warning(clean_spelling(my_data[6:7], with_default)))


})

Expand Down

0 comments on commit 6d10845

Please sign in to comment.