implement indentation_linter (#1411)

* implement indentation_linter supersedes #497 * xml2::* * fix tests * fix range start (off by one) * ignore indentation level within string constants * fix lints and false positives, handle nested indentation changes * de-lint package, explicit branch for no lints * adapt indent suppression condition * change indentation end path for $function calls * return non-empty file_lines even if no R source is present * ensure Rmd is detected * .Rmd * add use_hybrid_indent = TRUE * except hanging indent if following expr is a code block * smarter hanging indent detection for use_hybrid_indent * suppress redundant lints * update tests * de-lint * move around NEWS * if condition styling * use TODO for consistency/searchability * address review comments * NEWS wording * add more tests, refactor to file-level because of a systematic problem with expression level * refac use_hybrid_indent -> hanging_indent_style * allow compare_branches.R to use custom parameters if desired * improve docs * de-lint Co-authored-by: Michael Chirico <chiricom@google.com>
r-lib · Oct 17, 2022 · c224853 · c224853
1 parent f5e8ea6
commit c224853
Show file tree

Hide file tree

Showing 22 changed files with 917 additions and 75 deletions.
diff --git a/.dev/compare_branches.R b/.dev/compare_branches.R
@@ -344,6 +344,9 @@ get_linter_from_name <- function(linter_name) {
     #   did not make it into the release?
     if (linter_name == "line_length_linter" && !is.integer(formals(linter_name)$length)) {
       eval(call(linter_name, 80L))
+    } else if (endsWith(linter_name, ")")) {
+      # allow custom parameters
+      eval(parse(text = linter_name))
     } else {
       eval(call(linter_name))
     },

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -107,6 +107,7 @@ Collate:
     'ids_with_token.R'
     'ifelse_censor_linter.R'
     'implicit_integer_linter.R'
+    'indentation_linter.R'
     'infix_spaces_linter.R'
     'inner_combine_linter.R'
     'is_lint_level.R'

diff --git a/NAMESPACE b/NAMESPACE
@@ -67,6 +67,7 @@ export(get_source_expressions)
 export(ids_with_token)
 export(ifelse_censor_linter)
 export(implicit_integer_linter)
+export(indentation_linter)
 export(infix_spaces_linter)
 export(inner_combine_linter)
 export(is_lint_level)

diff --git a/NEWS.md b/NEWS.md
@@ -29,6 +29,8 @@
   the style guide on handling this case awaits clarification: https://github.com/tidyverse/style/issues/191.
   (#1346, @MichaelChirico)
 
+* The new `indentation_linter()` is part of the default linters. See "New linters" for more details.
+
 ## New and improved features
 
 * New `get_r_string()` helper to get the R-equivalent value of a string, especially useful for R-4-style raw strings.
@@ -93,6 +95,9 @@
 
 * `routine_registration_linter()` for identifying native routines that don't use registration (`useDynLib` in the `NAMESPACE`; @MichaelChirico)
 
+* `indentation_linter()` for checking that the indentation conforms to 2-space Tidyverse-style (@AshesITR and @dgkf, #1411).
+
+
 ## Notes
 
 * `lint()` continues to support Rmarkdown documents. For users of custom .Rmd engines, e.g.
@@ -164,6 +169,7 @@
 * `get_source_expressions()` no longer fails on R files that match a knitr pattern (#743, #879, #1406, @AshesITR).
 * Parse error lints now appear with the linter name `"error"` instead of `NA` (#1405, @AshesITR).  
   Also, linting no longer runs if the `source_expressions` contain invalid string data that would cause error messages
+  in other linters. 
   in other linters.
 * Prevent `lint()` from hanging on Rmd files with some syntax errors (#1443, @MichaelChirico).
 * `get_source_expressions()` no longer omits trailing non-code lines from knitr files (#1400, #1415, @AshesITR).  

diff --git a/R/backport_linter.R b/R/backport_linter.R
@@ -81,10 +81,14 @@ backport_linter <- function(r_version = getRversion(), except = character()) {
 }
 
 normalize_r_version <- function(r_version) {
-  if (is.character(r_version) &&
-    re_matches(r_version, rex(start, "release" %or%
+  rx_release_spec <- rex(
+    start,
+    "release" %or%
       list("oldrel", maybe("-", digits)) %or%
-      "devel", end))) {
+      "devel",
+    end
+  )
+  if (is.character(r_version) && re_matches(r_version, rx_release_spec)) {
     # Support devel, release, oldrel, oldrel-1, ...
     if (r_version == "oldrel") {
       r_version <- "oldrel-1"

diff --git a/R/extract.R b/R/extract.R
@@ -132,15 +132,8 @@ replace_prefix <- function(lines, prefix_pattern) {
   m <- gregexpr(prefix_pattern, lines)
   non_na <- !is.na(m)
 
-  blanks <- function(n) {
-    vapply(Map(rep.int, rep.int(" ", length(n)), n, USE.NAMES = FALSE),
-      paste, "",
-      collapse = ""
-    )
-  }
-
-  regmatches(lines[non_na], m[non_na]) <-
-    Map(blanks, lapply(regmatches(lines[non_na], m[non_na]), nchar))
+  prefix_lengths <- lapply(regmatches(lines[non_na], m[non_na]), nchar)
+  regmatches(lines[non_na], m[non_na]) <- lapply(prefix_lengths, strrep, x = " ")
 
   lines
 }
diff --git a/R/get_source_expressions.R b/R/get_source_expressions.R
@@ -110,15 +110,14 @@ get_source_expressions <- function(filename, lines = NULL) {
     }
 
     # add global expression
-    expressions[[length(expressions) + 1L]] <-
-      list(
-        filename = filename,
-        file_lines = source_expression$lines,
-        content = source_expression$lines,
-        full_parsed_content = parsed_content,
-        full_xml_parsed_content = xml_parsed_content,
-        terminal_newline = terminal_newline
-      )
+    expressions[[length(expressions) + 1L]] <- list(
+      filename = filename,
+      file_lines = source_expression$lines,
+      content = source_expression$lines,
+      full_parsed_content = parsed_content,
+      full_xml_parsed_content = xml_parsed_content,
+      terminal_newline = terminal_newline
+    )
   }
 
   list(expressions = expressions, error = e, lines = source_expression$lines)
@@ -355,7 +354,8 @@ lint_parse_error_nonstandard <- function(e, source_expression) {
   if (nrow(line_location) == 0L) {
     if (grepl("attempt to use zero-length variable name", e$message, fixed = TRUE)) {
       # empty symbol: ``, ``(), ''(), ""(), fun(''=42), fun(""=42), fun(a=1,""=42)
-      loc <- re_matches(source_expression$content,
+      loc <- re_matches(
+        source_expression$content,
         rex(
           "``" %or%
             list(or("''", '""'), any_spaces, "(") %or%

diff --git a/R/indentation_linter.R b/R/indentation_linter.R
@@ -0,0 +1,231 @@
+#' Check that indentation is consistent
+#'
+#' @param indent Number of spaces, that a code block should be indented by relative to its parent code block.
+#'   Used for multi-line code blocks (`{ ... }`), function calls (`( ... )`) and extractions (`[ ... ]`, `[[ ... ]]`).
+#'   Defaults to 2.
+#' @param hanging_indent_style Indentation style for multi-line function calls with arguments in their first line.
+#'   Defaults to tidyverse style, i.e. a block indent is used if the function call terminates with `)` on a separate
+#'   line and a hanging indent if not.
+#'   Note that function multi-line function calls without arguments on their first line will always be expected to have
+#'   block-indented arguments.
+#'
+#'   ```r
+#'   # complies to any style
+#'   map(
+#'     x,
+#'     f,
+#'     additional_arg = 42
+#'   )
+#'
+#'   # complies to "tidy" and "never"
+#'   map(x, f,
+#'     additional_arg = 42
+#'   )
+#'
+#'   # complies to "always"
+#'   map(x, f,
+#'       additional_arg = 42
+#'   )
+#'
+#'   # complies to "tidy" and "always"
+#'   map(x, f,
+#'       additional_arg = 42)
+#'
+#'   # complies to "never"
+#'   map(x, f,
+#'     additional_arg = 42)
+#'   ```
+#'
+#' @examples
+#' # will produce lints
+#' lint(
+#'   text = "if (TRUE) {\n1 + 1\n}",
+#'   linters = indentation_linter()
+#' )
+#'
+#' lint(
+#'   text = "if (TRUE) {\n    1 + 1\n}",
+#'   linters = indentation_linter()
+#' )
+#'
+#' lint(
+#'   text = "map(x, f,\n  additional_arg = 42\n)",
+#'   linters = indentation_linter(hanging_indent_style = "always")
+#' )
+#'
+#' lint(
+#'   text = "map(x, f,\n    additional_arg = 42)",
+#'   linters = indentation_linter(hanging_indent_style = "never")
+#' )
+#'
+#' # okay
+#' lint(
+#'   text = "map(x, f,\n  additional_arg = 42\n)",
+#'   linters = indentation_linter()
+#' )
+#'
+#' lint(
+#'   text = "if (TRUE) {\n    1 + 1\n}",
+#'   linters = indentation_linter(indent = 4)
+#' )
+#'
+#' @evalRd rd_tags("indentation_linter")
+#' @seealso [linters] for a complete list of linters available in lintr. \cr
+#' <https://style.tidyverse.org/syntax.html#indenting>
+#'
+#' @export
+indentation_linter <- function(indent = 2L, hanging_indent_style = c("tidy", "always", "never")) {
+  paren_tokens_left <- c("OP-LEFT-BRACE", "OP-LEFT-PAREN", "OP-LEFT-BRACKET", "LBB")
+  paren_tokens_right <- c("OP-RIGHT-BRACE", "OP-RIGHT-PAREN", "OP-RIGHT-BRACKET", "OP-RIGHT-BRACKET")
+  infix_tokens <- setdiff(infix_metadata$xml_tag, c("OP-LEFT-BRACE", "OP-COMMA", paren_tokens_left))
+  no_paren_keywords <- c("ELSE", "REPEAT")
+  keyword_tokens <- c("FUNCTION", "IF", "FOR", "WHILE")
+
+  xp_last_on_line <- "@line1 != following-sibling::*[not(self::COMMENT)][1]/@line1"
+
+  hanging_indent_style <- match.arg(hanging_indent_style)
+
+  if (hanging_indent_style == "tidy") {
+    xp_is_not_hanging <- paste(
+      c(
+        glue::glue(
+          "self::{paren_tokens_left}/following-sibling::{paren_tokens_right}[@line1 > preceding-sibling::*[1]/@line2]"
+        ),
+        glue::glue("self::*[{xp_and(paste0('not(self::', paren_tokens_left, ')'))} and {xp_last_on_line}]")
+      ),
+      collapse = " | "
+    )
+  } else if (hanging_indent_style == "always") {
+    xp_is_not_hanging <- glue::glue("self::*[{xp_last_on_line}]")
+  } # "never" makes no use of xp_is_not_hanging, so no definition is necessary
+
+  xp_block_ends <- paste0(
+    "number(",
+    paste(
+      c(
+        glue::glue("self::{paren_tokens_left}/following-sibling::{paren_tokens_right}/preceding-sibling::*[1]/@line2"),
+        glue::glue("self::*[{xp_and(paste0('not(self::', paren_tokens_left, ')'))}]
+                      /following-sibling::SYMBOL_FUNCTION_CALL/parent::expr/following-sibling::expr[1]/@line2"),
+        glue::glue("self::*[
+                      {xp_and(paste0('not(self::', paren_tokens_left, ')'))} and
+                      not(following-sibling::SYMBOL_FUNCTION_CALL)
+                    ]/following-sibling::*[1]/@line2")
+      ),
+      collapse = " | "
+    ),
+    ")"
+  )
+
+  xp_indent_changes <- paste(
+    c(
+      glue::glue("//{paren_tokens_left}[not(@line1 = following-sibling::expr[
+                    @line2 > @line1 and
+                    ({xp_or(paste0('descendant::', paren_tokens_left, '[', xp_last_on_line, ']'))})
+                  ]/@line1)]"),
+      glue::glue("//{infix_tokens}[{xp_last_on_line}]"),
+      glue::glue("//{no_paren_keywords}[{xp_last_on_line}]"),
+      glue::glue("//{keyword_tokens}/following-sibling::OP-RIGHT-PAREN[
+                    {xp_last_on_line} and
+                    not(following-sibling::expr[1][OP-LEFT-BRACE])
+                  ]")
+    ),
+    collapse = " | "
+  )
+
+  xp_multiline_string <- "//STR_CONST[@line1 < @line2]"
+
+  Linter(function(source_expression) {
+    # must run on file level because a line can contain multiple expressions, losing indentation information, e.g.
+    #
+    #> fun(
+    #    a) # comment
+    #
+    # will have "# comment" as a separate expression
+    if (!is_lint_level(source_expression, "file")) {
+      return(list())
+    }
+
+    xml <- source_expression$full_xml_parsed_content
+    # Indentation increases by 1 for:
+    #  - { } blocks that span multiple lines
+    #  - ( ), [ ], or [[ ]] calls that span multiple lines
+    #     + if a token follows (, a hanging indent is required until )
+    #     + if there is no token following ( on the same line, a block indent is required until )
+    #  - binary operators where the second arguments starts on a new line
+
+    indent_levels <- rex::re_matches(
+      source_expression$file_lines,
+      rex::rex(start, any_spaces), locations = TRUE
+    )[, "end"]
+    expected_indent_levels <- integer(length(indent_levels))
+    is_hanging <- logical(length(indent_levels))
+
+    indent_changes <- xml2::xml_find_all(xml, xp_indent_changes)
+    for (change in indent_changes) {
+      if (hanging_indent_style != "never") {
+        change_starts_hanging <- length(xml2::xml_find_first(change, xp_is_not_hanging)) == 0L
+      } else {
+        change_starts_hanging <- FALSE
+      }
+      change_begin <- as.integer(xml2::xml_attr(change, "line1")) + 1L
+      change_end <- xml2::xml_find_num(change, xp_block_ends)
+      if (change_begin <= change_end) {
+        to_indent <- seq(from = change_begin, to = change_end)
+        if (change_starts_hanging) {
+          expected_indent_levels[to_indent] <- as.integer(xml2::xml_attr(change, "col2"))
+          is_hanging[to_indent] <- TRUE
+        } else {
+          expected_indent_levels[to_indent] <- expected_indent_levels[to_indent] + indent
+          is_hanging[to_indent] <- FALSE
+        }
+      }
+    }
+
+    in_str_const <- logical(length(indent_levels))
+    multiline_strings <- xml2::xml_find_all(xml, xp_multiline_string)
+    for (string in multiline_strings) {
+      is_in_str <- seq(
+        from = as.integer(xml2::xml_attr(string, "line1")) + 1L,
+        to = as.integer(xml2::xml_attr(string, "line2"))
+      )
+      in_str_const[is_in_str] <- TRUE
+    }
+
+    # Only lint non-empty lines if the indentation level doesn't match.
+    bad_lines <- which(indent_levels != expected_indent_levels &
+                         nzchar(trimws(source_expression$file_lines)) &
+                         !in_str_const)
+    if (length(bad_lines)) {
+      # Suppress consecutive lints with the same indentation difference, to not generate an excessive number of lints
+      is_consecutive_lint <- c(FALSE, diff(bad_lines) == 1L)
+      indent_diff <- expected_indent_levels[bad_lines] - indent_levels[bad_lines]
+      is_same_diff <- c(FALSE, diff(indent_diff) == 0L)
+
+      bad_lines <- bad_lines[!(is_consecutive_lint & is_same_diff)]
+
+      lint_messages <- sprintf(
+        "%s should be %d spaces but is %d spaces.",
+        ifelse(is_hanging[bad_lines], "Hanging indent", "Indentation"),
+        expected_indent_levels[bad_lines],
+        indent_levels[bad_lines]
+      )
+      lint_lines <- unname(as.integer(names(source_expression$file_lines)[bad_lines]))
+      lint_ranges <- cbind(
+        pmin(expected_indent_levels[bad_lines] + 1L, indent_levels[bad_lines]),
+        pmax(expected_indent_levels[bad_lines], indent_levels[bad_lines])
+      )
+      Map(
+        Lint,
+        filename = source_expression$filename,
+        line_number = lint_lines,
+        column_number = indent_levels[bad_lines],
+        type = "style",
+        message = lint_messages,
+        line = unname(source_expression$file_lines[bad_lines]),
+        ranges = apply(lint_ranges, 1L, list, simplify = FALSE)
+      )
+    } else {
+      list()
+    }
+  })
+}
diff --git a/R/lint.R b/R/lint.R
@@ -543,7 +543,8 @@ checkstyle_output <- function(lints, filename = "lintr_results.xml") {
           style = "info",
           x$type
         ),
-        message = x$message)
+        message = x$message
+      )
     })
   })
 
@@ -665,8 +666,7 @@ sarif_output <- function(lints, filename = "lintr_results.sarif") {
       rule_index_exists <-
         which(sapply(sarif$runs[[1L]]$tool$driver$rules,
                      function(x) x$id == lint$linter))
-      if (length(rule_index_exists) == 0L ||
-          is.na(rule_index_exists[1L])) {
+      if (length(rule_index_exists) == 0L || is.na(rule_index_exists[1L])) {
         rule_index_exists <- 0L
       }
     }

diff --git a/R/make_linter_from_regex.R b/R/make_linter_from_regex.R
@@ -28,8 +28,10 @@ make_linter_from_regex <- function(regex,
           lapply(
             split(line_matches, seq_len(nrow(line_matches))),
             function(.match) {
-              if (is.na(.match[["start"]]) ||
-                .in_ignorable_position(source_expression, line_number, .match)) {
+              if (
+                is.na(.match[["start"]]) ||
+                  .in_ignorable_position(source_expression, line_number, .match)
+              ) {
                 return()
               }
               start <- .match[["start"]]

diff --git a/R/zzz.R b/R/zzz.R
@@ -20,6 +20,7 @@ default_linters <- modify_defaults(
   cyclocomp_linter(),
   equals_na_linter(),
   function_left_parentheses_linter(),
+  indentation_linter(),
   infix_spaces_linter(),
   line_length_linter(),
   no_tab_linter(),