diff --git a/DESCRIPTION b/DESCRIPTION index c351f98..4fb82c5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,10 +14,16 @@ Encoding: UTF-8 LazyData: true URL: https://github.com/ropensci/spelling#readme BugReports: https://github.com/ropensci/spelling/issues -Imports: - commonmark, - xml2, - hunspell, - knitr +Imports: + commonmark, + xml2, + hunspell, + knitr, + roxygen2, + Rcpp Roxygen: list(markdown = TRUE) RoxygenNote: 6.0.1 +LinkingTo: + Rcpp +Remotes: + klutometis/roxygen diff --git a/NAMESPACE b/NAMESPACE index 78d0a37..f8f1409 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,3 +8,5 @@ export(spell_check_setup) export(spell_check_test) export(spell_check_text) export(update_wordlist) +importFrom(Rcpp,sourceCpp) +useDynLib(spelling, .registration = TRUE) diff --git a/R/RcppExports.R b/R/RcppExports.R new file mode 100644 index 0000000..66ca933 --- /dev/null +++ b/R/RcppExports.R @@ -0,0 +1,7 @@ +# Generated by using Rcpp::compileAttributes() -> do not edit by hand +# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +find_word_positions <- function(lines, words) { + .Call(`_spelling_find_word_positions`, lines, words) +} + diff --git a/R/check-files.R b/R/check-files.R index 3cf3322..f87215e 100644 --- a/R/check-files.R +++ b/R/check-files.R @@ -89,3 +89,54 @@ spell_check_file_plain <- function(path, format, dict){ text <- vapply(words, paste, character(1), collapse = " ") spell_check_plain(text, dict = dict) } + +#' @useDynLib spelling, .registration = TRUE +#' @importFrom Rcpp sourceCpp +spell_check_file_roxygen <- function(path, dict, global_options = list()) { + + parsed <- roxygen2::parse_file(file = path, global_options = global_options) + + lines <- readLines(path) + is_roxygen <- grep("^[[:space:]]*#+'", lines) + roxygen_lines <- lines[is_roxygen] + + # Some roxygen tags (such as param) have a name and a description, we only + # want to spell check the latter. + extract_text <- function(x) { + if (is.list(x) && exists("description", x)) { + return(x[["description"]]) + } + x + } + + # roxygen tags that contain text + text_tags <- c("concept", "describeIn", "description", "details", "field", "note", "param", "return", "section", "slot", "title") + parse_block <- function(tags) { + text <- unlist(lapply(tags[names(tags) %in% text_tags], extract_text)) + if (length(text) == 0) { + return(data.frame(word = character(), line = integer(), start = integer(), stringsAsFactors = FALSE)) + } + + # blank out rd tags, tag list derived from RdTextFilter + # https://github.com/wch/r-source/blob/89ec1150299f7be62b839d5d5eb46bd9a63653bd/src/library/tools/R/Rdtools.R#L113-L126 + rd_tags <- c("S3method", "S4method", "command", "code", "docType", "email", "encoding", "file", "keyword", "link", "linkS4class", "method", "pkg", "var") + re <- paste0("\\\\(", paste0(collapse = "|", rd_tags), ")[^}]+}") + text <- blank_matches(text, re) + bad_words <- hunspell::hunspell(text, dict = dict) + res <- find_word_positions(roxygen_lines, unique(sort(unlist(bad_words)))) + + # Fix line numbers for real file. + res$line <- is_roxygen[res$line] + + vapply(split(res$line, res$word), paste, character(1), collapse = ", ") + } + + unlist(lapply(parsed, parse_block)) +} + +blank_matches <- function(str, re) { + m <- gregexpr(re, str) + blanks <- function(n) strrep(" ", n) + regmatches(str, m) <- Map(blanks, lapply(regmatches(str, m), nchar)) + str +} diff --git a/R/spell-check.R b/R/spell-check.R index da69236..4f9184f 100644 --- a/R/spell-check.R +++ b/R/spell-check.R @@ -40,6 +40,10 @@ spell_check_package <- function(pkg = ".", vignettes = TRUE, lang = "en_GB", use rd_files <- list.files(file.path(pkg$path, "man"), "\\.rd$", ignore.case = TRUE, full.names = TRUE) rd_lines <- lapply(sort(rd_files), spell_check_file_rd, dict = dict) + # Check Roxygen comments + r_files <- list.files(file.path(pkg$path, "R"), "\\.R$", ignore.case = TRUE, full.names = TRUE) + r_lines <- lapply(sort(r_files), spell_check_file_roxygen, dict = dict, global_options = roxygen2::load_options(pkg$path)) + # Check 'DESCRIPTION' fields pkg_fields <- c("title", "description") pkg_lines <- lapply(pkg_fields, function(x){ @@ -47,8 +51,8 @@ spell_check_package <- function(pkg = ".", vignettes = TRUE, lang = "en_GB", use }) # Combine - all_sources <- c(rd_files, pkg_fields) - all_lines <- c(rd_lines, pkg_lines) + all_sources <- c(r_files, rd_files, pkg_fields) + all_lines <- c(r_lines, rd_lines, pkg_lines) if(isTRUE(vignettes)){ # Markdown vignettes diff --git a/man/spell_check_files.Rd b/man/spell_check_files.Rd index 0b589ad..8ae8581 100644 --- a/man/spell_check_files.Rd +++ b/man/spell_check_files.Rd @@ -32,3 +32,4 @@ spell_check_files(files) Other spelling: \code{\link{spell_check_package}}, \code{\link{wordlist}} } +\concept{spelling} diff --git a/man/spell_check_package.Rd b/man/spell_check_package.Rd index 3be27d5..afe8512 100644 --- a/man/spell_check_package.Rd +++ b/man/spell_check_package.Rd @@ -45,3 +45,4 @@ require installation of a custom dictionary, see \link[hunspell:hunspell]{hunspe Other spelling: \code{\link{spell_check_files}}, \code{\link{wordlist}} } +\concept{spelling} diff --git a/man/wordlist.Rd b/man/wordlist.Rd index df2e94e..35db462 100644 --- a/man/wordlist.Rd +++ b/man/wordlist.Rd @@ -37,3 +37,4 @@ they have been removed from the documentation or added to the \code{lang} dictio Other spelling: \code{\link{spell_check_files}}, \code{\link{spell_check_package}} } +\concept{spelling} diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..22034c4 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +*.dll diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp new file mode 100644 index 0000000..b977f7f --- /dev/null +++ b/src/RcppExports.cpp @@ -0,0 +1,29 @@ +// Generated by using Rcpp::compileAttributes() -> do not edit by hand +// Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 + +#include + +using namespace Rcpp; + +// find_word_positions +Rcpp::DataFrame find_word_positions(CharacterVector lines, CharacterVector words); +RcppExport SEXP _spelling_find_word_positions(SEXP linesSEXP, SEXP wordsSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< CharacterVector >::type lines(linesSEXP); + Rcpp::traits::input_parameter< CharacterVector >::type words(wordsSEXP); + rcpp_result_gen = Rcpp::wrap(find_word_positions(lines, words)); + return rcpp_result_gen; +END_RCPP +} + +static const R_CallMethodDef CallEntries[] = { + {"_spelling_find_word_positions", (DL_FUNC) &_spelling_find_word_positions, 2}, + {NULL, NULL, 0} +}; + +RcppExport void R_init_spelling(DllInfo *dll) { + R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); + R_useDynamicSymbols(dll, FALSE); +} diff --git a/src/find_word_positions.cpp b/src/find_word_positions.cpp new file mode 100644 index 0000000..6100a92 --- /dev/null +++ b/src/find_word_positions.cpp @@ -0,0 +1,39 @@ +#include +#include +using namespace Rcpp; + +// [[Rcpp::export]] +Rcpp::DataFrame find_word_positions(CharacterVector lines, + CharacterVector words) { + std::vector found_words; + std::vector found_lines; + std::vector found_starts; + + for (int i = 0; i < words.size(); ++i) { + const char* word = words.at(i); + size_t len = strlen(word); + bool found = false; + for (int j = 0; j < lines.size(); ++j) { + const char* line = lines.at(j); + for (const char* p = line; (p = strstr(p, word)) != NULL; ++p) { + if ((p == line) || (p != NULL && !isalnum(p[-1]))) { + if (!isalnum(p[len])) { + found = true; + found_words.push_back(word); + found_lines.push_back(j + 1); + found_starts.push_back((int)(p - lines.at(j)) + 1); + } + p += len; + } + } + } + if (!found) { + found_words.push_back(word); + found_lines.push_back(NA_INTEGER); + found_starts.push_back(NA_INTEGER); + } + } + return DataFrame::create(_["word"] = found_words, _["line"] = found_lines, + _["start"] = found_starts, + Rcpp::_["stringsAsFactors"] = false); +}