Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix default warning #48

Merged
merged 9 commits into from
Mar 1, 2019
Merged
2 changes: 1 addition & 1 deletion DESCRIPTION
@@ -1,6 +1,6 @@
Package: linelist
Title: Tools to Import and Tidy Case Linelist Data
Version: 0.8.0.9000
Version: 0.8.1.9000
Authors@R: c(person("Thibaut", "Jombart", email = "thibautjombart@gmail.com", role = c("aut", "cre")),
person("Zhian N.", "Kamvar", email = "zkamvar@gmail.com", role = c("aut")))
Description: A collection of wrappers for importing case linelist data from usual formats, and tools for cleaning data, detecting dates, and storing meta-information on the content of the resulting \code{data.frame}.
Expand Down
9 changes: 9 additions & 0 deletions NEWS.md
@@ -1,3 +1,12 @@
# linelist 0.8.1.9000

* `clean_spelling()` will no longer throw a warning if there is no value for
.default to replace.
* `clean_variable_spelling()`, `clean_variables()`, and `clean_data()` gain the
`warn` and `warn_spelling` arguments which will capture all errors and
warnings issued from `clean_spelling()` for each variable.
See https://github.com/reconhub/linelist/pull/48 for details).

# linelist 0.8.0.9000

* `compare_data()` allows users to compare _structural changes_ to data frames
Expand Down
6 changes: 4 additions & 2 deletions R/clean_data.R
Expand Up @@ -74,8 +74,9 @@


clean_data <- function(x, sep = "_", force_Date = TRUE, guess_dates = TRUE,
error_tolerance = 0.5, wordlists = NULL, spelling_vars = 3,
sort_by = NULL, protect = FALSE, ...) {
error_tolerance = 0.5, wordlists = NULL,
spelling_vars = 3, sort_by = NULL, warn_spelling = FALSE,
protect = FALSE, ...) {

xname <- deparse(substitute(x))
if (!is.data.frame(x)) {
Expand All @@ -100,6 +101,7 @@ clean_data <- function(x, sep = "_", force_Date = TRUE, guess_dates = TRUE,
wordlists = wordlists,
spelling_vars = spelling_vars,
sort_by = sort_by,
warn_spelling = warn_spelling,
classes = classes)

# Cleaning and guessing dates ------------------------------------------------
Expand Down
22 changes: 20 additions & 2 deletions R/clean_spelling.R
Expand Up @@ -14,6 +14,11 @@
#' @param quiet a `logical` indicating if warnings should be issued if no
#' replacement is made; if `FALSE`, these warnings will be disabled
#'
#' @param warn_default a `logical`. When a `.default` keyword is set and
#' `warn_default = TRUE`, a warning will be issued listing the variables
#' that were changed to the default value. This can be used to update your
#' wordlist.
#'
#'
#' @details
#'
Expand Down Expand Up @@ -57,8 +62,13 @@
#' # You can also set a default value
#' corrections_with_default <- rbind(corrections, c(bad = ".default", good = "unknown"))
#' corrections_with_default
#'
#' # a warning will be issued about the data that were converted
#' clean_spelling(my_data, corrections_with_default)
#'
#' # use the warn_default = FALSE, if you are absolutely sure you don't want it.
#' clean_spelling(my_data, corrections_with_default, warn_default = FALSE)
#'
#' # The function will give you a warning if the wordlist does not
#' # match the data
#' clean_spelling(letters, corrections)
Expand All @@ -76,7 +86,7 @@
#' @importFrom rlang "!!!"

clean_spelling <- function(x = character(), wordlist = data.frame(),
quiet = FALSE) {
quiet = FALSE, warn_default = TRUE) {

if (length(x) == 0 || !is.atomic(x)) {
stop("x must be coerceable to a character")
Expand Down Expand Up @@ -176,7 +186,15 @@ clean_spelling <- function(x = character(), wordlist = data.frame(),

# Replace any untranslated variables if .default is defined -----------------
if (length(default) > 0) {
x <- forcats::fct_other(x, keep = c(names(dict), names(nas)), other = names(default))
default_vars <- levels(x)[!levels(x) %in% c(names(dict), names(nas))]
if (warn_default && length(default_vars) > 0) {
was <- if (length(default_vars) > 1) "were" else "was"
msg <- "'%s' %s changed to the default value ('%s')"
warning(sprintf(msg, paste(default_vars, collapse = "', '"), was, names(default)))
}
suppressWarnings({
x <- forcats::fct_other(x, keep = c(names(dict), names(nas)), other = names(default))
})
}

# Make sure order is preserved if it's a factor -----------------------------
Expand Down
31 changes: 26 additions & 5 deletions R/clean_variable_spelling.R
Expand Up @@ -20,6 +20,9 @@
#' each data frame. If the incoming variables are factors, this determines how
#' the resulting factors will be sorted.
#'
#' @param warn if `TRUE`, warnings and errors from [clean_spelling()] will be
#' shown as a single warning. Defaults to `FALSE`, which shows nothing.
#'
#' @inheritParams clean_variable_labels
#'
#'
Expand Down Expand Up @@ -131,7 +134,7 @@
#' head(res)
#' as.list(head(res))

clean_variable_spelling <- function(x = data.frame(), wordlists = list(), spelling_vars = 3, sort_by = NULL, classes = NULL) {
clean_variable_spelling <- function(x = data.frame(), wordlists = list(), spelling_vars = 3, sort_by = NULL, classes = NULL, warn = FALSE) {

if (length(x) == 0 || !is.data.frame(x)) {
stop("x must be a data frame")
Expand All @@ -154,11 +157,11 @@ clean_variable_spelling <- function(x = data.frame(), wordlists = list(), spelli
# There is a spelling_varsing column ----------------------------------------
if (!is.null(spelling_vars) && length(spelling_vars) == 1) {
is_number <- is.numeric(spelling_vars) && # spelling_vars is a number
as.integer(spelling_vars) == spelling_vars && # ... and an integer
spelling_vars <= ncol(wordlists) # ... and is within the bounds
as.integer(spelling_vars) == spelling_vars && # ... and an integer
spelling_vars <= ncol(wordlists) # ... and is within the bounds

is_name <- is.character(spelling_vars) && # spelling_vars is a name
any(names(wordlists) == spelling_vars) # ... in the wordlists
any(names(wordlists) == spelling_vars) # ... in the wordlists
if (is_number || is_name) {
wordlists <- split(wordlists, wordlists[[spelling_vars]])
} else {
Expand Down Expand Up @@ -223,11 +226,29 @@ clean_variable_spelling <- function(x = data.frame(), wordlists = list(), spelli
to_iterate <- unique(c(to_iterate, unprotected))
}
}

# Prepare warning/error labels ---------------------------------------------
warns <- vector(mode = "list", length = length(to_iterate)) -> errs
iter_print <- gsub(" ", "_", format(to_iterate))
names(iter_print) <- to_iterate

# Loop over the variables and clean spelling --------------------------------
for (i in to_iterate) {
d <- if (ddf) wordlists else wordlists[[i]]
d <- if (is.null(d)) global_words else d
try(x[[i]] <- clean_spelling(x[[i]], d, quiet = TRUE))
# Evaluate and collect any warnings/errors that pop up
w <- withWarnings(clean_spelling(x[[i]], d, quiet = FALSE))
x[[i]] <- if(is.null(w$val)) x[[i]] else w$val
if (warn) {
warns[[i]] <- collect_ya_errs(w$warnings, iter_print[i])
errs[[i]] <- collect_ya_errs(w$errors, iter_print[i])
}
}

# Process warnings and errors and give a warning if there were any
if (warn) {
wemsg <- process_werrors(warns, errs)
if (!is.null(wemsg)) warning(wemsg)
}

x
Expand Down
10 changes: 9 additions & 1 deletion R/clean_variables.R
Expand Up @@ -8,11 +8,17 @@
#' @param protect a logical or numeric vector defining the columns to protect
#' from any manipulation. Note: columns in `protect` will override any columns
#' in either `force_Date` or `guess_dates`.
#'
#' @param warn_spelling if `TRUE`, errors and warnings from [clean_spelling()]
#' will be aggregated and presented for each column that issues them. The
#' default value is `FALSE`, which means that all errors and warnings will be
#' ignored.
#'
#' @export
#' @author Zhian N. Kamvar
#' @seealso [clean_variable_labels()] to standardise text,
#' [clean_variable_spelling()] to correct spelling with a wordlist.
#'
#' @inheritParams clean_variable_spelling
#'
#' @examples
Expand Down Expand Up @@ -45,7 +51,8 @@
#' spelling_vars = "variable"
#' )
clean_variables <- function(x, sep = "_", wordlists = NULL, spelling_vars = 3,
sort_by = NULL, protect = FALSE, classes = NULL) {
sort_by = NULL, protect = FALSE, classes = NULL,
warn_spelling = FALSE) {

xname <- deparse(substitute(x))
if (!is.data.frame(x)) {
Expand All @@ -71,6 +78,7 @@ clean_variables <- function(x, sep = "_", wordlists = NULL, spelling_vars = 3,
wordlists = wordlists,
spelling_vars = spelling_vars,
sort_by = sort_by,
warn = warn_spelling,
classes = classes)
}

Expand Down
45 changes: 45 additions & 0 deletions R/warnings_errors.R
@@ -0,0 +1,45 @@

withWarnings <- function(expr) {
myWarnings <- NULL
myErrors <- NULL
wHandler <- function(w) {
myWarnings <<- c(myWarnings, list(w))
invokeRestart("muffleWarning")
}
eHandler <- function(e) {
myErrors <<- c(myErrors, list(e))
NULL
}
val <- withCallingHandlers(tryCatch(expr, error = eHandler), warning = wHandler)
list(value = val, warnings = myWarnings, errors = myErrors)
}

collect_ya_errs <- function(e, fmt) {
if (is.null(e)) return(NULL)
warn <- vapply(e, "[[", character(1), "message")
warn <- paste0(" ", warn, collapse = "\n ....")
paste(sprintf(" %s__:\n ....%s", fmt, warn), collapse = "\n")
}

process_werrors <- function(warns, errs) {
warns <- warns[lengths(warns) > 0]
errs <- errs[lengths(errs) > 0]
warned <- length(warns) > 0
errored <- length(errs) > 0
if (warned || errored) {

wrn <- "" -> err
if (warned) {
wngs <- do.call("paste", c(warns, sep = "\n"))
wrn <- sprintf("The following warnings were found...\n%s", wngs)
}
if (errored) {
errs <- do.call("paste", c(errs, sep = "\n"))
err <- sprintf("The following errors were found:\n%s", errs)
}
res <- sprintf("%s\n%s", wrn, err)
if (res == "\n") NULL else res
} else {
NULL
}
}
7 changes: 6 additions & 1 deletion man/clean_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 11 additions & 1 deletion man/clean_spelling.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/clean_variable_spelling.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion man/clean_variables.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

21 changes: 18 additions & 3 deletions tests/testthat/test-clean_spelling.R
Expand Up @@ -71,10 +71,25 @@ test_that("clean_spelling will throw a warning if there are duplicated keys", {

test_that("clean_spelling will take in a default value", {

with_default <- rbind(corrections, c(bad = ".default", good = "unknown"))
with_default <- rbind(corrections, c(bad = ".default", good = "check me"))
cleaned_default <- cleaned_data
cleaned_default[!cleaned_default %in% with_default$good] <- "unknown"
expect_identical(clean_spelling(my_data, with_default), cleaned_default)
cleaned_default[!cleaned_default %in% with_default$good] <- "check me"
d_warn <- "'a', 'b', 'c', 'd', 'e', 'fumar' were changed to the default value \\('check me'\\)"
expect_warning({
my_cleaned <- clean_spelling(my_data, with_default)
}, d_warn)
expect_identical(my_cleaned, cleaned_default)

})


test_that("nothing to default will not throw a warning", {


with_default <- rbind(corrections, c(bad = ".default", good = "unknown"))
cleaned_default <- cleaned_data[6:7]
expect_failure(expect_warning(clean_spelling(my_data[6:7], with_default)))


})

Expand Down