From f52ab74a20a8d09afd5f94a411be085ddfe49de8 Mon Sep 17 00:00:00 2001 From: Julia Silge Date: Sun, 1 Apr 2018 21:06:56 -0600 Subject: [PATCH] Code style updates from lintr --- .travis.yml | 2 ++ R/mallet_tidiers.R | 5 ++--- R/sparse_casters.R | 3 +-- R/sparse_tidiers.R | 3 --- R/stm_tidiers.R | 3 ++- R/stop_words.R | 1 - R/unnest_tokens.R | 11 +++++++---- tests/testthat/test-corpus-tidiers.R | 8 ++++---- tests/testthat/test-lda-tidiers.R | 2 +- tests/testthat/test-stm-tidiers.R | 2 +- tests/testthat/test-tf-idf.R | 2 -- tests/testthat/test-unnest-tokens.R | 11 ++++++----- 12 files changed, 26 insertions(+), 27 deletions(-) diff --git a/.travis.yml b/.travis.yml index 88799c3a..296cb993 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,6 +18,8 @@ addons: r_packages: - covr - data.table # so data.table tests will get checked + - lintr # for code linting after_success: - Rscript -e 'covr::codecov(coverage = covr::package_coverage(line_exclusions = list("R/compat_lazyeval.R")))' + - Rscript -e 'lintr::lint_package()' diff --git a/R/mallet_tidiers.R b/R/mallet_tidiers.R index cf255014..b131e76d 100644 --- a/R/mallet_tidiers.R +++ b/R/mallet_tidiers.R @@ -95,11 +95,11 @@ tidy.jobjRef <- function(x, matrix = c("beta", "gamma"), log = FALSE, ret <- dplyr::tbl_df(reshape2::melt(m)) if (matrix == "beta") { - # per-term-per-topic + # per term per topic colnames(ret) <- c("topic", "term", "beta") ret$term <- x$getVocabulary()[ret$term] } else { - # per-document-per-topic + # per document per topic colnames(ret) <- c("document", "topic", "gamma") ret$document <- x$getDocumentNames()[ret$document] } @@ -145,4 +145,3 @@ augment.jobjRef <- function(x, data, ...) { data$.topic[keep] <- apply(products[keep, ], 1, which.max) data } - diff --git a/R/sparse_casters.R b/R/sparse_casters.R index f93f2ba2..13f4cca1 100644 --- a/R/sparse_casters.R +++ b/R/sparse_casters.R @@ -81,7 +81,7 @@ cast_sparse_ <- function(data, row, column, value) { cast_sparse(data, !! row, !! column) } else { value_col <- compat_lazy(value, caller_env()) - cast_sparse(data, !! row, !! column, !! value) + cast_sparse(data, !! row, !! column, !! value_col) } } @@ -178,4 +178,3 @@ cast_dfm_ <- function(data, document, term, value, ...) { value <- compat_lazy(value, caller_env()) cast_dfm(data, !! document, !! term, !! value) } - diff --git a/R/sparse_tidiers.R b/R/sparse_tidiers.R index 5b8344d5..70551e6f 100644 --- a/R/sparse_tidiers.R +++ b/R/sparse_tidiers.R @@ -65,7 +65,6 @@ tidy.simple_triplet_matrix <- function(x, tidy_triplet(x, triplets, row_names, col_names) } - #' Utility function to tidy a simple triplet matrix #' #' @param x Object with rownames and colnames @@ -90,7 +89,5 @@ tidy_triplet <- function(x, triplets, row_names = NULL, col_names = NULL) { ret } - #' @export broom::tidy - diff --git a/R/stm_tidiers.R b/R/stm_tidiers.R index 0939de34..7339836a 100644 --- a/R/stm_tidiers.R +++ b/R/stm_tidiers.R @@ -78,7 +78,8 @@ #' @name stm_tidiers #' #' @export -tidy.STM <- function(x, matrix = c("beta", "gamma", "theta"), log = FALSE, document_names = NULL, ...) { +tidy.STM <- function(x, matrix = c("beta", "gamma", "theta"), log = FALSE, + document_names = NULL, ...) { matrix <- match.arg(matrix) if (matrix == "beta") { mat <- x$beta diff --git a/R/stop_words.R b/R/stop_words.R index 2fb252f7..90afb7bb 100644 --- a/R/stop_words.R +++ b/R/stop_words.R @@ -48,4 +48,3 @@ get_stopwords <- function(language = "en", source = "snowball") { data_frame(word = stopwords::stopwords(language = language, source = source), lexicon = source) } - diff --git a/R/unnest_tokens.R b/R/unnest_tokens.R index 576b98a6..5e41b166 100644 --- a/R/unnest_tokens.R +++ b/R/unnest_tokens.R @@ -142,11 +142,13 @@ unnest_tokens.data.frame <- function(tbl, output, input, token = "words", format = format) } else { if (is.null(collapse) && token %in% c("ngrams", "skip_ngrams", "sentences", - "lines", "paragraphs", "regex", "character_shingles")) { + "lines", "paragraphs", "regex", + "character_shingles")) { collapse <- TRUE } tf <- get(paste0("tokenize_", token)) - if (token %in% c("characters", "words", "ngrams", "skip_ngrams", "tweets", "ptb")) { + if (token %in% c("characters", "words", "ngrams", "skip_ngrams", + "tweets", "ptb")) { tokenfunc <- function(col, ...) tf(col, lowercase = FALSE, ...) } else { tokenfunc <- tf @@ -156,7 +158,7 @@ unnest_tokens.data.frame <- function(tbl, output, input, token = "words", if (!is.null(collapse) && collapse) { if (any(!purrr::map_lgl(tbl, is.atomic))) { stop("If collapse = TRUE (such as for unnesting by sentence or paragraph), ", - "unnest_tokens needs all columns of input to be atomic vectors (not lists)") + "unnest_tokens needs all input columns to be atomic vectors (not lists)") } group_vars <- setdiff(names(tbl), input) @@ -218,7 +220,8 @@ unnest_tokens_ <- function(tbl, output, input, token = "words", #' @export unnest_tokens_.data.frame <- function(tbl, output, input, token = "words", - format = c("text", "man", "latex", "html", "xml"), + format = c("text", "man", "latex", + "html", "xml"), to_lower = TRUE, drop = TRUE, collapse = NULL, ...) { output <- compat_lazy(output, caller_env()) diff --git a/tests/testthat/test-corpus-tidiers.R b/tests/testthat/test-corpus-tidiers.R index 3f5e8eda..4e437414 100644 --- a/tests/testthat/test-corpus-tidiers.R +++ b/tests/testthat/test-corpus-tidiers.R @@ -29,7 +29,7 @@ test_that("Can tidy corpus from quanteda package", { } }) -test_that("Can tidy corpus from quanteda package using accessor functions", { +test_that("Can tidy corpus from quanteda using accessor functions", { if (requireNamespace("quanteda", quietly = TRUE)) { x <- quanteda::data_corpus_inaugural @@ -45,19 +45,19 @@ test_that("Can tidy corpus from quanteda package using accessor functions", { } }) -test_that("Can glance a corpus from quanteda package using accessor functions", { +test_that("Can glance a corpus from quanteda using accessor functions", { if (requireNamespace("quanteda", quietly = TRUE)) { x <- quanteda::data_corpus_inaugural ## old method - glanceOLD <- function(x, ...) { + glance_old <- function(x, ...) { md <- purrr::compact(x$metadata) # turn vectors into list columns md <- purrr::map_if(md, ~length(.) > 1, list) as_data_frame(md) } - ret_old <- glanceOLD(x) + ret_old <- glance_old(x) ## new method ret_new <- glance(x) diff --git a/tests/testthat/test-lda-tidiers.R b/tests/testthat/test-lda-tidiers.R index 33552079..bd2299a1 100644 --- a/tests/testthat/test-lda-tidiers.R +++ b/tests/testthat/test-lda-tidiers.R @@ -2,7 +2,7 @@ context("LDA tidiers") suppressPackageStartupMessages(library(dplyr)) -if(require("topicmodels", quietly = TRUE)) { +if (require("topicmodels", quietly = TRUE)) { data(AssociatedPress) ap <- AssociatedPress[1:100, ] diff --git a/tests/testthat/test-stm-tidiers.R b/tests/testthat/test-stm-tidiers.R index 5f552483..c3c8c6c1 100644 --- a/tests/testthat/test-stm-tidiers.R +++ b/tests/testthat/test-stm-tidiers.R @@ -2,7 +2,7 @@ context("stm tidiers") suppressPackageStartupMessages(library(dplyr)) -if(require("stm", quietly = TRUE)) { +if (require("stm", quietly = TRUE)) { dat <- data_frame(document = c("row1", "row1", "row2", "row2", "row2"), term = c("col1", "col2", "col1", "col3", "col4"), diff --git a/tests/testthat/test-tf-idf.R b/tests/testthat/test-tf-idf.R index 58d4ce39..7152b246 100644 --- a/tests/testthat/test-tf-idf.R +++ b/tests/testthat/test-tf-idf.R @@ -30,7 +30,6 @@ test_that("Can calculate TF-IDF", { group_by(document) %>% bind_tf_idf(word, document, frequency) - #expect_equal(result, ungroup(result2)) expect_equal(length(groups(result2)), 1) expect_equal(as.character(groups(result2)[[1]]), "document") }) @@ -76,7 +75,6 @@ test_that("tf-idf with tidyeval works", { group_by(document) %>% bind_tf_idf(!! termvar, !! documentvar, !! countvar) - #expect_equal(result, ungroup(result2)) expect_equal(length(groups(result2)), 1) expect_equal(as.character(groups(result2)[[1]]), "document") }) diff --git a/tests/testthat/test-unnest-tokens.R b/tests/testthat/test-unnest-tokens.R index 1d4efb0a..92e793e7 100644 --- a/tests/testthat/test-unnest-tokens.R +++ b/tests/testthat/test-unnest-tokens.R @@ -20,7 +20,7 @@ test_that("tokenizing by character shingles works", { expect_equal(d$char_ngram[1], "tidy") }) -test_that("tokenizing by character shingles works with an option to include whitespaces and punctuation", { +test_that("tokenizing by character shingles can include whitespace/punctuation", { d <- data_frame(txt = "tidytext is the best!") d <- d %>% unnest_tokens(char_ngram, txt, token = "character_shingles", strip_non_alphanum = FALSE) @@ -273,7 +273,7 @@ test_that("Trying to tokenize a data.table works", { expect_equal(output$author[1], "Watsky") }) -test_that("Trying to tokenize a data.table work when the input has only one column", { +test_that("Can tokenize a data.table work when the input has only one column", { skip_if_not_installed("data.table") text <- data.table::data.table(txt = "You gotta bring yourself your flowers now in showbiz") output <- unnest_tokens(text, word, txt) @@ -335,10 +335,11 @@ test_that("Can't tokenize with list columns with collapse = TRUE", { line = 1L:2L, list_col = list(1L:3L, c("a", "b"))) - expect_error(unnest_tokens(df, word, txt, token = "sentences"), "to be atomic vectors") + expect_error(unnest_tokens(df, word, txt, token = "sentences"), + "to be atomic vectors") - # Can tokenize by sentence without collapsing, though it sort of defeats the purpose + # Can tokenize by sentence without collapsing + # though it sort of defeats the purpose ret <- unnest_tokens(df, word, txt, token = "sentences", collapse = FALSE) expect_equal(nrow(ret), 2) }) -