ropensci · jimhester · Sep 7, 2017
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -14,10 +14,16 @@ Encoding: UTF-8
 LazyData: true
 URL: https://github.com/ropensci/spelling#readme
 BugReports: https://github.com/ropensci/spelling/issues
-Imports:
-  commonmark,
-  xml2, 
-  hunspell,
-  knitr
+Imports: 
+    commonmark,
+    xml2,
+    hunspell,
+    knitr,
+    roxygen2,
+    Rcpp
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 6.0.1
+LinkingTo: 
+    Rcpp
+Remotes:
+  klutometis/roxygen
diff --git a/NAMESPACE b/NAMESPACE
@@ -8,3 +8,5 @@ export(spell_check_setup)
 export(spell_check_test)
 export(spell_check_text)
 export(update_wordlist)
+importFrom(Rcpp,sourceCpp)
+useDynLib(spelling, .registration = TRUE)
diff --git a/R/RcppExports.R b/R/RcppExports.R
@@ -0,0 +1,7 @@
+# Generated by using Rcpp::compileAttributes() -> do not edit by hand
+# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+find_word_positions <- function(lines, words) {
+    .Call(`_spelling_find_word_positions`, lines, words)
+}
+
diff --git a/R/check-files.R b/R/check-files.R
@@ -89,3 +89,54 @@ spell_check_file_plain <- function(path, format, dict){
   text <- vapply(words, paste, character(1), collapse = " ")
   spell_check_plain(text, dict = dict)
 }
+
+#' @useDynLib spelling, .registration = TRUE
+#' @importFrom Rcpp sourceCpp
+spell_check_file_roxygen <- function(path, dict, global_options = list()) {
+
+  parsed <- roxygen2::parse_file(file = path, global_options = global_options)
+
+  lines <- readLines(path)
+  is_roxygen <- grep("^[[:space:]]*#+'", lines)
+  roxygen_lines <- lines[is_roxygen]
+
+  # Some roxygen tags (such as param) have a name and a description, we only
+  # want to spell check the latter.
+  extract_text <- function(x) {
+    if (is.list(x) && exists("description", x)) {
+      return(x[["description"]])
+    }
+    x
+  }
+
+  # roxygen tags that contain text
+  text_tags <- c("concept", "describeIn", "description", "details", "field", "note", "param", "return", "section", "slot", "title")
+  parse_block <- function(tags) {
+    text <- unlist(lapply(tags[names(tags) %in% text_tags], extract_text))
+    if (length(text) == 0) {
+      return(data.frame(word = character(), line = integer(), start = integer(), stringsAsFactors = FALSE))
+    }
+
+    # blank out rd tags, tag list derived from RdTextFilter
+    # https://github.com/wch/r-source/blob/89ec1150299f7be62b839d5d5eb46bd9a63653bd/src/library/tools/R/Rdtools.R#L113-L126
+    rd_tags <- c("S3method", "S4method", "command", "code", "docType", "email", "encoding", "file", "keyword", "link", "linkS4class", "method", "pkg", "var")
+    re <- paste0("\\\\(", paste0(collapse = "|", rd_tags), ")[^}]+}")
+    text <- blank_matches(text, re)
+    bad_words <- hunspell::hunspell(text, dict = dict)
+    res <- find_word_positions(roxygen_lines, unique(sort(unlist(bad_words))))
+
+    # Fix line numbers for real file.
+    res$line <- is_roxygen[res$line]
+
+    vapply(split(res$line, res$word), paste, character(1), collapse = ", ")
+  }
+
+  unlist(lapply(parsed, parse_block))
+}
+
+blank_matches <- function(str, re) {
+  m <- gregexpr(re, str)
+  blanks <- function(n) strrep(" ", n)
+  regmatches(str, m) <- Map(blanks, lapply(regmatches(str, m), nchar))
+  str
+}
diff --git a/R/spell-check.R b/R/spell-check.R
@@ -40,15 +40,19 @@ spell_check_package <- function(pkg = ".", vignettes = TRUE, lang = "en_GB", use
   rd_files <- list.files(file.path(pkg$path, "man"), "\\.rd$", ignore.case = TRUE, full.names = TRUE)
   rd_lines <- lapply(sort(rd_files), spell_check_file_rd, dict = dict)
 
+  # Check Roxygen comments
+  r_files <- list.files(file.path(pkg$path, "R"), "\\.R$", ignore.case = TRUE, full.names = TRUE)
+  r_lines <- lapply(sort(r_files), spell_check_file_roxygen, dict = dict, global_options = roxygen2::load_options(pkg$path))
+
   # Check 'DESCRIPTION' fields
   pkg_fields <- c("title", "description")
   pkg_lines <- lapply(pkg_fields, function(x){
     spell_check_file_text(textConnection(pkg[[x]]), dict = dict)
   })
 
   # Combine
-  all_sources <- c(rd_files, pkg_fields)
-  all_lines <- c(rd_lines, pkg_lines)
+  all_sources <- c(r_files, rd_files, pkg_fields)
+  all_lines <- c(r_lines, rd_lines, pkg_lines)
 
   if(isTRUE(vignettes)){
     # Markdown vignettes

diff --git a/man/spell_check_files.Rd b/man/spell_check_files.Rd
diff --git a/man/spell_check_package.Rd b/man/spell_check_package.Rd
diff --git a/man/wordlist.Rd b/man/wordlist.Rd
diff --git a/src/.gitignore b/src/.gitignore
@@ -0,0 +1,3 @@
+*.o
+*.so
+*.dll
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
@@ -0,0 +1,29 @@
+// Generated by using Rcpp::compileAttributes() -> do not edit by hand
+// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393
+
+#include <Rcpp.h>
+
+using namespace Rcpp;
+
+// find_word_positions
+Rcpp::DataFrame find_word_positions(CharacterVector lines, CharacterVector words);
+RcppExport SEXP _spelling_find_word_positions(SEXP linesSEXP, SEXP wordsSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< CharacterVector >::type lines(linesSEXP);
+    Rcpp::traits::input_parameter< CharacterVector >::type words(wordsSEXP);
+    rcpp_result_gen = Rcpp::wrap(find_word_positions(lines, words));
+    return rcpp_result_gen;
+END_RCPP
+}
+
+static const R_CallMethodDef CallEntries[] = {
+    {"_spelling_find_word_positions", (DL_FUNC) &_spelling_find_word_positions, 2},
+    {NULL, NULL, 0}
+};
+
+RcppExport void R_init_spelling(DllInfo *dll) {
+    R_registerRoutines(dll, NULL, CallEntries, NULL, NULL);
+    R_useDynamicSymbols(dll, FALSE);
+}
diff --git a/src/find_word_positions.cpp b/src/find_word_positions.cpp
@@ -0,0 +1,39 @@
+#include <Rcpp.h>
+#include <cstring>
+using namespace Rcpp;
+
+// [[Rcpp::export]]
+Rcpp::DataFrame find_word_positions(CharacterVector lines,
+                                    CharacterVector words) {
+  std::vector<const char*> found_words;
+  std::vector<int> found_lines;
+  std::vector<int> found_starts;
+
+  for (int i = 0; i < words.size(); ++i) {
+    const char* word = words.at(i);
+    size_t len = strlen(word);
+    bool found = false;
+    for (int j = 0; j < lines.size(); ++j) {
+      const char* line = lines.at(j);
+      for (const char* p = line; (p = strstr(p, word)) != NULL; ++p) {
+        if ((p == line) || (p != NULL && !isalnum(p[-1]))) {
+          if (!isalnum(p[len])) {
+            found = true;
+            found_words.push_back(word);
+            found_lines.push_back(j + 1);
+            found_starts.push_back((int)(p - lines.at(j)) + 1);
+          }
+          p += len;
+        }
+      }
+    }
+    if (!found) {
+      found_words.push_back(word);
+      found_lines.push_back(NA_INTEGER);
+      found_starts.push_back(NA_INTEGER);
+    }
+  }
+  return DataFrame::create(_["word"] = found_words, _["line"] = found_lines,
+                           _["start"] = found_starts,
+                           Rcpp::_["stringsAsFactors"] = false);
+}