Skip to content

Commit

Permalink
implement indentation_linter (#1411)
Browse files Browse the repository at this point in the history
* implement indentation_linter

supersedes #497

* xml2::*

* fix tests

* fix range start (off by one)

* ignore indentation level within string constants

* fix lints and false positives, handle nested indentation changes

* de-lint package, explicit branch for no lints

* adapt indent suppression condition

* change indentation end path for $function calls

* return non-empty file_lines even if no R source is present

* ensure Rmd is detected

* .Rmd

* add use_hybrid_indent = TRUE

* except hanging indent if following expr is a code block

* smarter hanging indent detection for use_hybrid_indent

* suppress redundant lints

* update tests

* de-lint

* move around NEWS

* if condition styling

* use TODO for consistency/searchability

* address review comments

* NEWS wording

* add more tests, refactor to file-level because of a systematic problem with expression level

* refac use_hybrid_indent -> hanging_indent_style

* allow compare_branches.R to use custom parameters if desired

* improve docs

* de-lint

Co-authored-by: Michael Chirico <chiricom@google.com>
  • Loading branch information
AshesITR and MichaelChirico committed Oct 17, 2022
1 parent f5e8ea6 commit c224853
Show file tree
Hide file tree
Showing 22 changed files with 917 additions and 75 deletions.
3 changes: 3 additions & 0 deletions .dev/compare_branches.R
Expand Up @@ -344,6 +344,9 @@ get_linter_from_name <- function(linter_name) {
# did not make it into the release?
if (linter_name == "line_length_linter" && !is.integer(formals(linter_name)$length)) {
eval(call(linter_name, 80L))
} else if (endsWith(linter_name, ")")) {
# allow custom parameters
eval(parse(text = linter_name))
} else {
eval(call(linter_name))
},
Expand Down
1 change: 1 addition & 0 deletions DESCRIPTION
Expand Up @@ -107,6 +107,7 @@ Collate:
'ids_with_token.R'
'ifelse_censor_linter.R'
'implicit_integer_linter.R'
'indentation_linter.R'
'infix_spaces_linter.R'
'inner_combine_linter.R'
'is_lint_level.R'
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Expand Up @@ -67,6 +67,7 @@ export(get_source_expressions)
export(ids_with_token)
export(ifelse_censor_linter)
export(implicit_integer_linter)
export(indentation_linter)
export(infix_spaces_linter)
export(inner_combine_linter)
export(is_lint_level)
Expand Down
6 changes: 6 additions & 0 deletions NEWS.md
Expand Up @@ -29,6 +29,8 @@
the style guide on handling this case awaits clarification: https://github.com/tidyverse/style/issues/191.
(#1346, @MichaelChirico)

* The new `indentation_linter()` is part of the default linters. See "New linters" for more details.

## New and improved features

* New `get_r_string()` helper to get the R-equivalent value of a string, especially useful for R-4-style raw strings.
Expand Down Expand Up @@ -93,6 +95,9 @@

* `routine_registration_linter()` for identifying native routines that don't use registration (`useDynLib` in the `NAMESPACE`; @MichaelChirico)

* `indentation_linter()` for checking that the indentation conforms to 2-space Tidyverse-style (@AshesITR and @dgkf, #1411).


## Notes

* `lint()` continues to support Rmarkdown documents. For users of custom .Rmd engines, e.g.
Expand Down Expand Up @@ -164,6 +169,7 @@
* `get_source_expressions()` no longer fails on R files that match a knitr pattern (#743, #879, #1406, @AshesITR).
* Parse error lints now appear with the linter name `"error"` instead of `NA` (#1405, @AshesITR).
Also, linting no longer runs if the `source_expressions` contain invalid string data that would cause error messages
in other linters.
in other linters.
* Prevent `lint()` from hanging on Rmd files with some syntax errors (#1443, @MichaelChirico).
* `get_source_expressions()` no longer omits trailing non-code lines from knitr files (#1400, #1415, @AshesITR).
Expand Down
10 changes: 7 additions & 3 deletions R/backport_linter.R
Expand Up @@ -81,10 +81,14 @@ backport_linter <- function(r_version = getRversion(), except = character()) {
}

normalize_r_version <- function(r_version) {
if (is.character(r_version) &&
re_matches(r_version, rex(start, "release" %or%
rx_release_spec <- rex(
start,
"release" %or%
list("oldrel", maybe("-", digits)) %or%
"devel", end))) {
"devel",
end
)
if (is.character(r_version) && re_matches(r_version, rx_release_spec)) {
# Support devel, release, oldrel, oldrel-1, ...
if (r_version == "oldrel") {
r_version <- "oldrel-1"
Expand Down
11 changes: 2 additions & 9 deletions R/extract.R
Expand Up @@ -132,15 +132,8 @@ replace_prefix <- function(lines, prefix_pattern) {
m <- gregexpr(prefix_pattern, lines)
non_na <- !is.na(m)

blanks <- function(n) {
vapply(Map(rep.int, rep.int(" ", length(n)), n, USE.NAMES = FALSE),
paste, "",
collapse = ""
)
}

regmatches(lines[non_na], m[non_na]) <-
Map(blanks, lapply(regmatches(lines[non_na], m[non_na]), nchar))
prefix_lengths <- lapply(regmatches(lines[non_na], m[non_na]), nchar)
regmatches(lines[non_na], m[non_na]) <- lapply(prefix_lengths, strrep, x = " ")

lines
}
20 changes: 10 additions & 10 deletions R/get_source_expressions.R
Expand Up @@ -110,15 +110,14 @@ get_source_expressions <- function(filename, lines = NULL) {
}

# add global expression
expressions[[length(expressions) + 1L]] <-
list(
filename = filename,
file_lines = source_expression$lines,
content = source_expression$lines,
full_parsed_content = parsed_content,
full_xml_parsed_content = xml_parsed_content,
terminal_newline = terminal_newline
)
expressions[[length(expressions) + 1L]] <- list(
filename = filename,
file_lines = source_expression$lines,
content = source_expression$lines,
full_parsed_content = parsed_content,
full_xml_parsed_content = xml_parsed_content,
terminal_newline = terminal_newline
)
}

list(expressions = expressions, error = e, lines = source_expression$lines)
Expand Down Expand Up @@ -355,7 +354,8 @@ lint_parse_error_nonstandard <- function(e, source_expression) {
if (nrow(line_location) == 0L) {
if (grepl("attempt to use zero-length variable name", e$message, fixed = TRUE)) {
# empty symbol: ``, ``(), ''(), ""(), fun(''=42), fun(""=42), fun(a=1,""=42)
loc <- re_matches(source_expression$content,
loc <- re_matches(
source_expression$content,
rex(
"``" %or%
list(or("''", '""'), any_spaces, "(") %or%
Expand Down
231 changes: 231 additions & 0 deletions R/indentation_linter.R
@@ -0,0 +1,231 @@
#' Check that indentation is consistent
#'
#' @param indent Number of spaces, that a code block should be indented by relative to its parent code block.
#' Used for multi-line code blocks (`{ ... }`), function calls (`( ... )`) and extractions (`[ ... ]`, `[[ ... ]]`).
#' Defaults to 2.
#' @param hanging_indent_style Indentation style for multi-line function calls with arguments in their first line.
#' Defaults to tidyverse style, i.e. a block indent is used if the function call terminates with `)` on a separate
#' line and a hanging indent if not.
#' Note that function multi-line function calls without arguments on their first line will always be expected to have
#' block-indented arguments.
#'
#' ```r
#' # complies to any style
#' map(
#' x,
#' f,
#' additional_arg = 42
#' )
#'
#' # complies to "tidy" and "never"
#' map(x, f,
#' additional_arg = 42
#' )
#'
#' # complies to "always"
#' map(x, f,
#' additional_arg = 42
#' )
#'
#' # complies to "tidy" and "always"
#' map(x, f,
#' additional_arg = 42)
#'
#' # complies to "never"
#' map(x, f,
#' additional_arg = 42)
#' ```
#'
#' @examples
#' # will produce lints
#' lint(
#' text = "if (TRUE) {\n1 + 1\n}",
#' linters = indentation_linter()
#' )
#'
#' lint(
#' text = "if (TRUE) {\n 1 + 1\n}",
#' linters = indentation_linter()
#' )
#'
#' lint(
#' text = "map(x, f,\n additional_arg = 42\n)",
#' linters = indentation_linter(hanging_indent_style = "always")
#' )
#'
#' lint(
#' text = "map(x, f,\n additional_arg = 42)",
#' linters = indentation_linter(hanging_indent_style = "never")
#' )
#'
#' # okay
#' lint(
#' text = "map(x, f,\n additional_arg = 42\n)",
#' linters = indentation_linter()
#' )
#'
#' lint(
#' text = "if (TRUE) {\n 1 + 1\n}",
#' linters = indentation_linter(indent = 4)
#' )
#'
#' @evalRd rd_tags("indentation_linter")
#' @seealso [linters] for a complete list of linters available in lintr. \cr
#' <https://style.tidyverse.org/syntax.html#indenting>
#'
#' @export
indentation_linter <- function(indent = 2L, hanging_indent_style = c("tidy", "always", "never")) {
paren_tokens_left <- c("OP-LEFT-BRACE", "OP-LEFT-PAREN", "OP-LEFT-BRACKET", "LBB")
paren_tokens_right <- c("OP-RIGHT-BRACE", "OP-RIGHT-PAREN", "OP-RIGHT-BRACKET", "OP-RIGHT-BRACKET")
infix_tokens <- setdiff(infix_metadata$xml_tag, c("OP-LEFT-BRACE", "OP-COMMA", paren_tokens_left))
no_paren_keywords <- c("ELSE", "REPEAT")
keyword_tokens <- c("FUNCTION", "IF", "FOR", "WHILE")

xp_last_on_line <- "@line1 != following-sibling::*[not(self::COMMENT)][1]/@line1"

hanging_indent_style <- match.arg(hanging_indent_style)

if (hanging_indent_style == "tidy") {
xp_is_not_hanging <- paste(
c(
glue::glue(
"self::{paren_tokens_left}/following-sibling::{paren_tokens_right}[@line1 > preceding-sibling::*[1]/@line2]"
),
glue::glue("self::*[{xp_and(paste0('not(self::', paren_tokens_left, ')'))} and {xp_last_on_line}]")
),
collapse = " | "
)
} else if (hanging_indent_style == "always") {
xp_is_not_hanging <- glue::glue("self::*[{xp_last_on_line}]")
} # "never" makes no use of xp_is_not_hanging, so no definition is necessary

xp_block_ends <- paste0(
"number(",
paste(
c(
glue::glue("self::{paren_tokens_left}/following-sibling::{paren_tokens_right}/preceding-sibling::*[1]/@line2"),
glue::glue("self::*[{xp_and(paste0('not(self::', paren_tokens_left, ')'))}]
/following-sibling::SYMBOL_FUNCTION_CALL/parent::expr/following-sibling::expr[1]/@line2"),
glue::glue("self::*[
{xp_and(paste0('not(self::', paren_tokens_left, ')'))} and
not(following-sibling::SYMBOL_FUNCTION_CALL)
]/following-sibling::*[1]/@line2")
),
collapse = " | "
),
")"
)

xp_indent_changes <- paste(
c(
glue::glue("//{paren_tokens_left}[not(@line1 = following-sibling::expr[
@line2 > @line1 and
({xp_or(paste0('descendant::', paren_tokens_left, '[', xp_last_on_line, ']'))})
]/@line1)]"),
glue::glue("//{infix_tokens}[{xp_last_on_line}]"),
glue::glue("//{no_paren_keywords}[{xp_last_on_line}]"),
glue::glue("//{keyword_tokens}/following-sibling::OP-RIGHT-PAREN[
{xp_last_on_line} and
not(following-sibling::expr[1][OP-LEFT-BRACE])
]")
),
collapse = " | "
)

xp_multiline_string <- "//STR_CONST[@line1 < @line2]"

Linter(function(source_expression) {
# must run on file level because a line can contain multiple expressions, losing indentation information, e.g.
#
#> fun(
# a) # comment
#
# will have "# comment" as a separate expression
if (!is_lint_level(source_expression, "file")) {
return(list())
}

xml <- source_expression$full_xml_parsed_content
# Indentation increases by 1 for:
# - { } blocks that span multiple lines
# - ( ), [ ], or [[ ]] calls that span multiple lines
# + if a token follows (, a hanging indent is required until )
# + if there is no token following ( on the same line, a block indent is required until )
# - binary operators where the second arguments starts on a new line

indent_levels <- rex::re_matches(
source_expression$file_lines,
rex::rex(start, any_spaces), locations = TRUE
)[, "end"]
expected_indent_levels <- integer(length(indent_levels))
is_hanging <- logical(length(indent_levels))

indent_changes <- xml2::xml_find_all(xml, xp_indent_changes)
for (change in indent_changes) {
if (hanging_indent_style != "never") {
change_starts_hanging <- length(xml2::xml_find_first(change, xp_is_not_hanging)) == 0L
} else {
change_starts_hanging <- FALSE
}
change_begin <- as.integer(xml2::xml_attr(change, "line1")) + 1L
change_end <- xml2::xml_find_num(change, xp_block_ends)
if (change_begin <= change_end) {
to_indent <- seq(from = change_begin, to = change_end)
if (change_starts_hanging) {
expected_indent_levels[to_indent] <- as.integer(xml2::xml_attr(change, "col2"))
is_hanging[to_indent] <- TRUE
} else {
expected_indent_levels[to_indent] <- expected_indent_levels[to_indent] + indent
is_hanging[to_indent] <- FALSE
}
}
}

in_str_const <- logical(length(indent_levels))
multiline_strings <- xml2::xml_find_all(xml, xp_multiline_string)
for (string in multiline_strings) {
is_in_str <- seq(
from = as.integer(xml2::xml_attr(string, "line1")) + 1L,
to = as.integer(xml2::xml_attr(string, "line2"))
)
in_str_const[is_in_str] <- TRUE
}

# Only lint non-empty lines if the indentation level doesn't match.
bad_lines <- which(indent_levels != expected_indent_levels &
nzchar(trimws(source_expression$file_lines)) &
!in_str_const)
if (length(bad_lines)) {
# Suppress consecutive lints with the same indentation difference, to not generate an excessive number of lints
is_consecutive_lint <- c(FALSE, diff(bad_lines) == 1L)
indent_diff <- expected_indent_levels[bad_lines] - indent_levels[bad_lines]
is_same_diff <- c(FALSE, diff(indent_diff) == 0L)

bad_lines <- bad_lines[!(is_consecutive_lint & is_same_diff)]

lint_messages <- sprintf(
"%s should be %d spaces but is %d spaces.",
ifelse(is_hanging[bad_lines], "Hanging indent", "Indentation"),
expected_indent_levels[bad_lines],
indent_levels[bad_lines]
)
lint_lines <- unname(as.integer(names(source_expression$file_lines)[bad_lines]))
lint_ranges <- cbind(
pmin(expected_indent_levels[bad_lines] + 1L, indent_levels[bad_lines]),
pmax(expected_indent_levels[bad_lines], indent_levels[bad_lines])
)
Map(
Lint,
filename = source_expression$filename,
line_number = lint_lines,
column_number = indent_levels[bad_lines],
type = "style",
message = lint_messages,
line = unname(source_expression$file_lines[bad_lines]),
ranges = apply(lint_ranges, 1L, list, simplify = FALSE)
)
} else {
list()
}
})
}
6 changes: 3 additions & 3 deletions R/lint.R
Expand Up @@ -543,7 +543,8 @@ checkstyle_output <- function(lints, filename = "lintr_results.xml") {
style = "info",
x$type
),
message = x$message)
message = x$message
)
})
})

Expand Down Expand Up @@ -665,8 +666,7 @@ sarif_output <- function(lints, filename = "lintr_results.sarif") {
rule_index_exists <-
which(sapply(sarif$runs[[1L]]$tool$driver$rules,
function(x) x$id == lint$linter))
if (length(rule_index_exists) == 0L ||
is.na(rule_index_exists[1L])) {
if (length(rule_index_exists) == 0L || is.na(rule_index_exists[1L])) {
rule_index_exists <- 0L
}
}
Expand Down
6 changes: 4 additions & 2 deletions R/make_linter_from_regex.R
Expand Up @@ -28,8 +28,10 @@ make_linter_from_regex <- function(regex,
lapply(
split(line_matches, seq_len(nrow(line_matches))),
function(.match) {
if (is.na(.match[["start"]]) ||
.in_ignorable_position(source_expression, line_number, .match)) {
if (
is.na(.match[["start"]]) ||
.in_ignorable_position(source_expression, line_number, .match)
) {
return()
}
start <- .match[["start"]]
Expand Down
1 change: 1 addition & 0 deletions R/zzz.R
Expand Up @@ -20,6 +20,7 @@ default_linters <- modify_defaults(
cyclocomp_linter(),
equals_na_linter(),
function_left_parentheses_linter(),
indentation_linter(),
infix_spaces_linter(),
line_length_linter(),
no_tab_linter(),
Expand Down

0 comments on commit c224853

Please sign in to comment.