From 9a5e1b349fd2521ef8eedaf92b16785c895aaefa Mon Sep 17 00:00:00 2001 From: Raphael Gaschignard Date: Mon, 16 Nov 2020 10:31:37 +0900 Subject: [PATCH] Avoid doing a regex search for line lengths unless necessary Before this change, we would check a line for pylint: disable-style pragmas to determine whether to check the line length. The regex check itself is very costly (~5% of pylint's total runtime in one codebase), and is much more costly than the check itself. This refactors the pylint check to instead do an approximate line length check on everything, before using the regex to handle exceptional, false negative cases like pragmas being the cause for the line length overflow. This change, in one sample codebase, lowered the check_lines runtime from 5% of the total runtime to 0.35% of the total runtime --- CONTRIBUTORS.txt | 2 ++ ChangeLog | 3 +++ pylint/checkers/format.py | 26 +++++++++++--------------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index 702af289a0..12094a99e8 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -427,3 +427,5 @@ contributors: * Takashi Hirashima: contributor * Joffrey Mander: contributor + +* Raphael Gaschignard: contributor diff --git a/ChangeLog b/ChangeLog index fff8ec92f3..dd3940f96c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -70,6 +70,9 @@ Release date: TBA * Fix minor documentation issues +* Improve the performance of the line length check. + + What's New in Pylint 2.6.0? =========================== diff --git a/pylint/checkers/format.py b/pylint/checkers/format.py index 12544b8964..1b63072509 100644 --- a/pylint/checkers/format.py +++ b/pylint/checkers/format.py @@ -442,9 +442,7 @@ def _check_keyword_parentheses(self, tokens: List[TokenInfo], start: int) -> Non def _prepare_token_dispatcher(self): dispatch = {} - for tokens, handler in [ - (_KEYWORD_TOKENS, self._check_keyword_parentheses), - ]: + for tokens, handler in [(_KEYWORD_TOKENS, self._check_keyword_parentheses)]: for token in tokens: dispatch[token] = handler return dispatch @@ -735,18 +733,20 @@ def check_lines(self, lines: str, lineno: int) -> None: max_chars = self.config.max_line_length - potential_line_length_warning = False + split_lines = self.specific_splitlines(lines) + + for offset, line in enumerate(split_lines): + self.check_line_ending(line, lineno + offset) # hold onto the initial lineno for later - starting_lineno = lineno - for line in self.specific_splitlines(lines): - self.check_line_ending(line, lineno) + potential_line_length_warning = False + for offset, line in enumerate(split_lines): # this check is purposefully simple and doesn't rstrip # since this is running on every line you're checking it's # advantageous to avoid doing a lot of work if len(line) > max_chars: potential_line_length_warning = True - lineno += 1 + break # if there were no lines passing the max_chars config, we don't bother # running the full line check (as we've met an even more strict condition) @@ -762,13 +762,9 @@ def check_lines(self, lines: str, lineno: int) -> None: # The 'pylint: disable whatever' should not be taken into account for line length count lines = self.remove_pylint_option_from_lines(mobj) - # reset the lineno back to its original value - # (since we iterated over stuff earlier) - lineno = starting_lineno - - for line in self.specific_splitlines(lines): - self.check_line_length(line, lineno) - lineno += 1 + # here we re-run specific_splitlines since we have filtered out pylint options above + for offset, line in enumerate(self.specific_splitlines(lines)): + self.check_line_length(line, lineno + offset) def check_indent_level(self, string, expected, line_num): """return the indent level of the string"""