From f52ab74a20a8d09afd5f94a411be085ddfe49de8 Mon Sep 17 00:00:00 2001
From: Julia Silge <julia.silge@gmail.com>
Date: Sun, 1 Apr 2018 21:06:56 -0600
Subject: [PATCH] Code style updates from lintr

---
 .travis.yml                          |  2 ++
 R/mallet_tidiers.R                   |  5 ++---
 R/sparse_casters.R                   |  3 +--
 R/sparse_tidiers.R                   |  3 ---
 R/stm_tidiers.R                      |  3 ++-
 R/stop_words.R                       |  1 -
 R/unnest_tokens.R                    | 11 +++++++----
 tests/testthat/test-corpus-tidiers.R |  8 ++++----
 tests/testthat/test-lda-tidiers.R    |  2 +-
 tests/testthat/test-stm-tidiers.R    |  2 +-
 tests/testthat/test-tf-idf.R         |  2 --
 tests/testthat/test-unnest-tokens.R  | 11 ++++++-----
 12 files changed, 26 insertions(+), 27 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 88799c3a..296cb993 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -18,6 +18,8 @@ addons:
 r_packages:
   - covr
   - data.table # so data.table tests will get checked
+  - lintr      # for code linting
 
 after_success:
   - Rscript -e 'covr::codecov(coverage = covr::package_coverage(line_exclusions = list("R/compat_lazyeval.R")))'
+  - Rscript -e 'lintr::lint_package()'
diff --git a/R/mallet_tidiers.R b/R/mallet_tidiers.R
index cf255014..b131e76d 100644
--- a/R/mallet_tidiers.R
+++ b/R/mallet_tidiers.R
@@ -95,11 +95,11 @@ tidy.jobjRef <- function(x, matrix = c("beta", "gamma"), log = FALSE,
   ret <- dplyr::tbl_df(reshape2::melt(m))
 
   if (matrix == "beta") {
-    # per-term-per-topic
+    # per term per topic
     colnames(ret) <- c("topic", "term", "beta")
     ret$term <- x$getVocabulary()[ret$term]
   } else {
-    # per-document-per-topic
+    # per document per topic
     colnames(ret) <- c("document", "topic", "gamma")
     ret$document <- x$getDocumentNames()[ret$document]
   }
@@ -145,4 +145,3 @@ augment.jobjRef <- function(x, data, ...) {
   data$.topic[keep] <- apply(products[keep, ], 1, which.max)
   data
 }
-
diff --git a/R/sparse_casters.R b/R/sparse_casters.R
index f93f2ba2..13f4cca1 100644
--- a/R/sparse_casters.R
+++ b/R/sparse_casters.R
@@ -81,7 +81,7 @@ cast_sparse_ <- function(data, row, column, value) {
     cast_sparse(data, !! row, !! column)
   } else {
     value_col <- compat_lazy(value, caller_env())
-    cast_sparse(data, !! row, !! column, !! value)
+    cast_sparse(data, !! row, !! column, !! value_col)
   }
 }
 
@@ -178,4 +178,3 @@ cast_dfm_ <- function(data, document, term, value, ...) {
   value <- compat_lazy(value, caller_env())
   cast_dfm(data, !! document, !! term, !! value)
 }
-
diff --git a/R/sparse_tidiers.R b/R/sparse_tidiers.R
index 5b8344d5..70551e6f 100644
--- a/R/sparse_tidiers.R
+++ b/R/sparse_tidiers.R
@@ -65,7 +65,6 @@ tidy.simple_triplet_matrix <- function(x,
   tidy_triplet(x, triplets, row_names, col_names)
 }
 
-
 #' Utility function to tidy a simple triplet matrix
 #'
 #' @param x Object with rownames and colnames
@@ -90,7 +89,5 @@ tidy_triplet <- function(x, triplets, row_names = NULL, col_names = NULL) {
   ret
 }
 
-
 #' @export
 broom::tidy
-
diff --git a/R/stm_tidiers.R b/R/stm_tidiers.R
index 0939de34..7339836a 100644
--- a/R/stm_tidiers.R
+++ b/R/stm_tidiers.R
@@ -78,7 +78,8 @@
 #' @name stm_tidiers
 #'
 #' @export
-tidy.STM <- function(x, matrix = c("beta", "gamma", "theta"), log = FALSE, document_names = NULL, ...) {
+tidy.STM <- function(x, matrix = c("beta", "gamma", "theta"), log = FALSE,
+                     document_names = NULL, ...) {
   matrix <- match.arg(matrix)
   if (matrix == "beta") {
     mat <- x$beta
diff --git a/R/stop_words.R b/R/stop_words.R
index 2fb252f7..90afb7bb 100644
--- a/R/stop_words.R
+++ b/R/stop_words.R
@@ -48,4 +48,3 @@ get_stopwords <- function(language = "en", source = "snowball") {
   data_frame(word = stopwords::stopwords(language = language, source = source),
              lexicon = source)
 }
-
diff --git a/R/unnest_tokens.R b/R/unnest_tokens.R
index 576b98a6..5e41b166 100644
--- a/R/unnest_tokens.R
+++ b/R/unnest_tokens.R
@@ -142,11 +142,13 @@ unnest_tokens.data.frame <- function(tbl, output, input, token = "words",
                                                              format = format)
   } else {
     if (is.null(collapse) && token %in% c("ngrams", "skip_ngrams", "sentences",
-                                          "lines", "paragraphs", "regex", "character_shingles")) {
+                                          "lines", "paragraphs", "regex",
+                                          "character_shingles")) {
       collapse <- TRUE
     }
     tf <- get(paste0("tokenize_", token))
-    if (token %in% c("characters", "words", "ngrams", "skip_ngrams", "tweets", "ptb")) {
+    if (token %in% c("characters", "words", "ngrams", "skip_ngrams",
+                     "tweets", "ptb")) {
       tokenfunc <- function(col, ...) tf(col, lowercase = FALSE, ...)
     } else {
       tokenfunc <- tf
@@ -156,7 +158,7 @@ unnest_tokens.data.frame <- function(tbl, output, input, token = "words",
   if (!is.null(collapse) && collapse) {
     if (any(!purrr::map_lgl(tbl, is.atomic))) {
       stop("If collapse = TRUE (such as for unnesting by sentence or paragraph), ",
-           "unnest_tokens needs all columns of input to be atomic vectors (not lists)")
+           "unnest_tokens needs all input columns to be atomic vectors (not lists)")
     }
 
     group_vars <- setdiff(names(tbl), input)
@@ -218,7 +220,8 @@ unnest_tokens_ <- function(tbl, output, input, token = "words",
 
 #' @export
 unnest_tokens_.data.frame <- function(tbl, output, input, token = "words",
-                                      format = c("text", "man", "latex", "html", "xml"),
+                                      format = c("text", "man", "latex",
+                                                 "html", "xml"),
                                       to_lower = TRUE, drop = TRUE,
                                       collapse = NULL, ...) {
   output <- compat_lazy(output, caller_env())
diff --git a/tests/testthat/test-corpus-tidiers.R b/tests/testthat/test-corpus-tidiers.R
index 3f5e8eda..4e437414 100644
--- a/tests/testthat/test-corpus-tidiers.R
+++ b/tests/testthat/test-corpus-tidiers.R
@@ -29,7 +29,7 @@ test_that("Can tidy corpus from quanteda package", {
   }
 })
 
-test_that("Can tidy corpus from quanteda package using accessor functions", {
+test_that("Can tidy corpus from quanteda using accessor functions", {
   if (requireNamespace("quanteda", quietly = TRUE)) {
 
     x <- quanteda::data_corpus_inaugural
@@ -45,19 +45,19 @@ test_that("Can tidy corpus from quanteda package using accessor functions", {
   }
 })
 
-test_that("Can glance a corpus from quanteda package using accessor functions", {
+test_that("Can glance a corpus from quanteda using accessor functions", {
   if (requireNamespace("quanteda", quietly = TRUE)) {
 
     x <- quanteda::data_corpus_inaugural
 
     ## old method
-    glanceOLD <- function(x, ...) {
+    glance_old <- function(x, ...) {
       md <- purrr::compact(x$metadata)
       # turn vectors into list columns
       md <- purrr::map_if(md, ~length(.) > 1, list)
       as_data_frame(md)
     }
-    ret_old <- glanceOLD(x)
+    ret_old <- glance_old(x)
 
     ## new method
     ret_new <- glance(x)
diff --git a/tests/testthat/test-lda-tidiers.R b/tests/testthat/test-lda-tidiers.R
index 33552079..bd2299a1 100644
--- a/tests/testthat/test-lda-tidiers.R
+++ b/tests/testthat/test-lda-tidiers.R
@@ -2,7 +2,7 @@ context("LDA tidiers")
 
 suppressPackageStartupMessages(library(dplyr))
 
-if(require("topicmodels", quietly = TRUE)) {
+if (require("topicmodels", quietly = TRUE)) {
 
   data(AssociatedPress)
   ap <- AssociatedPress[1:100, ]
diff --git a/tests/testthat/test-stm-tidiers.R b/tests/testthat/test-stm-tidiers.R
index 5f552483..c3c8c6c1 100644
--- a/tests/testthat/test-stm-tidiers.R
+++ b/tests/testthat/test-stm-tidiers.R
@@ -2,7 +2,7 @@ context("stm tidiers")
 
 suppressPackageStartupMessages(library(dplyr))
 
-if(require("stm", quietly = TRUE)) {
+if (require("stm", quietly = TRUE)) {
 
   dat <- data_frame(document = c("row1", "row1", "row2", "row2", "row2"),
                     term = c("col1", "col2", "col1", "col3", "col4"),
diff --git a/tests/testthat/test-tf-idf.R b/tests/testthat/test-tf-idf.R
index 58d4ce39..7152b246 100644
--- a/tests/testthat/test-tf-idf.R
+++ b/tests/testthat/test-tf-idf.R
@@ -30,7 +30,6 @@ test_that("Can calculate TF-IDF", {
     group_by(document) %>%
     bind_tf_idf(word, document, frequency)
 
-  #expect_equal(result, ungroup(result2))
   expect_equal(length(groups(result2)), 1)
   expect_equal(as.character(groups(result2)[[1]]), "document")
 })
@@ -76,7 +75,6 @@ test_that("tf-idf with tidyeval works", {
     group_by(document) %>%
     bind_tf_idf(!! termvar, !! documentvar, !! countvar)
 
-  #expect_equal(result, ungroup(result2))
   expect_equal(length(groups(result2)), 1)
   expect_equal(as.character(groups(result2)[[1]]), "document")
 })
diff --git a/tests/testthat/test-unnest-tokens.R b/tests/testthat/test-unnest-tokens.R
index 1d4efb0a..92e793e7 100644
--- a/tests/testthat/test-unnest-tokens.R
+++ b/tests/testthat/test-unnest-tokens.R
@@ -20,7 +20,7 @@ test_that("tokenizing by character shingles works", {
   expect_equal(d$char_ngram[1], "tidy")
 })
 
-test_that("tokenizing by character shingles works with an option to include whitespaces and punctuation", {
+test_that("tokenizing by character shingles can include whitespace/punctuation", {
   d <- data_frame(txt = "tidytext is the best!")
   d <- d %>% unnest_tokens(char_ngram, txt, token = "character_shingles",
                            strip_non_alphanum = FALSE)
@@ -273,7 +273,7 @@ test_that("Trying to tokenize a data.table works", {
   expect_equal(output$author[1], "Watsky")
 })
 
-test_that("Trying to tokenize a data.table work when the input has only one column", {
+test_that("Can tokenize a data.table work when the input has only one column", {
   skip_if_not_installed("data.table")
   text <- data.table::data.table(txt = "You gotta bring yourself your flowers now in showbiz")
   output <- unnest_tokens(text, word, txt)
@@ -335,10 +335,11 @@ test_that("Can't tokenize with list columns with collapse = TRUE", {
                    line = 1L:2L,
                    list_col = list(1L:3L, c("a", "b")))
 
-  expect_error(unnest_tokens(df, word, txt, token = "sentences"), "to be atomic vectors")
+  expect_error(unnest_tokens(df, word, txt, token = "sentences"),
+               "to be atomic vectors")
 
-  # Can tokenize by sentence without collapsing, though it sort of defeats the purpose
+  # Can tokenize by sentence without collapsing
+  # though it sort of defeats the purpose
   ret <- unnest_tokens(df, word, txt, token = "sentences", collapse = FALSE)
   expect_equal(nrow(ret), 2)
 })
-