From 982d92d3a6317273452eb504bd536e2d6bd648a4 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Wed, 9 Oct 2019 18:39:28 -0700 Subject: [PATCH 1/2] Re-indent the contents of docstrings when indentation changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Keeping the contents of docstrings completely unchanged when re-indenting (from 2-space intents to 4, for example) can cause incorrect docstring indentation: ``` class MyClass: """Multiline class docstring """ def method(self): """Multiline method docstring """ pass ``` ...becomes: ``` class MyClass: """Multiline class docstring """ def method(self): """Multiline method docstring """ pass ``` This uses the PEP 257 algorithm for determining docstring indentation, and adjusts the contents of docstrings to match their new indentation after `black` is applied. A small normalization is necessary to `assert_equivalent` because the trees are technically no longer precisely equivalent -- some constant strings have changed. When comparing two ASTs, whitespace after newlines within constant strings is thus folded into a single space. Co-authored-by: Luka Zakrajšek --- black.py | 67 ++++++++++++++++++++++++++++- tests/data/docstring.py | 93 +++++++++++++++++++++++++++++++++++++++++ tests/test_black.py | 8 ++++ 3 files changed, 167 insertions(+), 1 deletion(-) create mode 100644 tests/data/docstring.py diff --git a/black.py b/black.py index f283ffc99b3..cee99161d47 100644 --- a/black.py +++ b/black.py @@ -1805,6 +1805,18 @@ def visit_STANDALONE_COMMENT(self, leaf: Leaf) -> Iterator[Line]: yield from self.line() yield from self.visit_default(leaf) + def visit_STRING(self, leaf: Leaf) -> Iterator[Line]: + # Check if it's a docstring + if prev_siblings_are( + leaf.parent, [None, token.NEWLINE, token.INDENT, syms.simple_stmt] + ) and is_multiline_string(leaf): + prefix = " " * self.current_line.depth + docstring = fix_docstring(leaf.value[3:-3], prefix) + leaf.value = leaf.value[0:3] + docstring + leaf.value[-3:] + normalize_string_quotes(leaf) + + yield from self.visit_default(leaf) + def __attrs_post_init__(self) -> None: """You are in a twisty little maze of passages.""" v = self.visit_stmt @@ -2086,6 +2098,22 @@ def preceding_leaf(node: Optional[LN]) -> Optional[Leaf]: return None +def prev_siblings_are(node: Optional[LN], tokens: List[Optional[NodeType]]) -> bool: + """Return if the `node` and its previous siblings match types against the provided + list of tokens; the provided `node`has its type matched against the last element in + the list. `None` can be used as the first element to declare that the start of the + list is anchored at the start of its parent's children.""" + if not tokens: + return True + if tokens[-1] is None: + return node is None + if not node: + return False + if node.type != tokens[-1]: + return False + return prev_siblings_are(node.prev_sibling, tokens[:-1]) + + def child_towards(ancestor: Node, descendant: LN) -> Optional[LN]: """Return the child of `ancestor` that contains `descendant`.""" node: Optional[LN] = descendant @@ -3668,7 +3696,17 @@ def _v(node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0) -> Iterator[st yield from _v(value, depth + 2) else: - yield f"{' ' * (depth+2)}{value!r}, # {value.__class__.__name__}" + # Constant strings may be indented across newlines, if they are + # docstrings; fold spaces after newlines when comparing + if ( + isinstance(node, ast.Constant) + and field == "value" + and isinstance(value, str) + ): + normalized = re.sub(r"\n[ \t]+", "\n ", value) + else: + normalized = value + yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}" yield f"{' ' * depth}) # /{node.__class__.__name__}" @@ -4054,5 +4092,32 @@ def patched_main() -> None: main() +def fix_docstring(docstring: str, prefix: str) -> str: + # https://www.python.org/dev/peps/pep-0257/#handling-docstring-indentation + if not docstring: + return "" + # Convert tabs to spaces (following the normal Python rules) + # and split into a list of lines: + lines = docstring.expandtabs().splitlines() + # Determine minimum indentation (first line doesn't count): + indent = sys.maxsize + for line in lines[1:]: + stripped = line.lstrip() + if stripped: + indent = min(indent, len(line) - len(stripped)) + # Remove indentation (first line is special): + trimmed = [lines[0].strip()] + if indent < sys.maxsize: + last_line_idx = len(lines) - 2 + for i, line in enumerate(lines[1:]): + stripped_line = line[indent:].rstrip() + if stripped_line or i == last_line_idx: + trimmed.append(prefix + stripped_line) + else: + trimmed.append("") + # Return a single string: + return "\n".join(trimmed) + + if __name__ == "__main__": patched_main() diff --git a/tests/data/docstring.py b/tests/data/docstring.py new file mode 100644 index 00000000000..d16d483ca67 --- /dev/null +++ b/tests/data/docstring.py @@ -0,0 +1,93 @@ +class MyClass: + """Multiline + class docstring + """ + + def method(self): + """Multiline + method docstring + """ + pass + + +def foo(): + """This is a docstring with + some lines of text here + """ + return + + +def bar(): + '''This is another docstring + with more lines of text + ''' + return + + +def baz(): + '''"This" is a string with some + embedded "quotes"''' + return + + +def troz(): + '''Indentation with tabs + is just as OK + ''' + return + + +def zort(): + """Another + multiline + docstring + """ + pass + +# output + +class MyClass: + """Multiline + class docstring + """ + + def method(self): + """Multiline + method docstring + """ + pass + + +def foo(): + """This is a docstring with + some lines of text here + """ + return + + +def bar(): + """This is another docstring + with more lines of text + """ + return + + +def baz(): + '''"This" is a string with some + embedded "quotes"''' + return + + +def troz(): + """Indentation with tabs + is just as OK + """ + return + + +def zort(): + """Another + multiline + docstring + """ + pass diff --git a/tests/test_black.py b/tests/test_black.py index 66a0761ecf8..6dafb62ec65 100644 --- a/tests/test_black.py +++ b/tests/test_black.py @@ -365,6 +365,14 @@ def test_string_quotes(self) -> None: black.assert_equivalent(source, not_normalized) black.assert_stable(source, not_normalized, mode=mode) + @patch("black.dump_to_file", dump_to_stderr) + def test_docstring(self) -> None: + source, expected = read_data("docstring") + actual = fs(source) + self.assertFormatEqual(expected, actual) + black.assert_equivalent(source, actual) + black.assert_stable(source, actual, black.FileMode()) + @patch("black.dump_to_file", dump_to_stderr) def test_slices(self) -> None: source, expected = read_data("slices") From b96f9caceaa247ddfaeec0801952c5dc05c11960 Mon Sep 17 00:00:00 2001 From: Alex Vandiver Date: Mon, 14 Oct 2019 22:29:03 -0700 Subject: [PATCH 2/2] Extract the inner `_v` method to decrease complexity This reduces the cyclomatic complexity to a level that makes flake8 happy. --- black.py | 101 ++++++++++++++++++++++++++++--------------------------- 1 file changed, 52 insertions(+), 49 deletions(-) diff --git a/black.py b/black.py index cee99161d47..2f872df4b7c 100644 --- a/black.py +++ b/black.py @@ -3653,62 +3653,65 @@ def _fixup_ast_constants( return node -def assert_equivalent(src: str, dst: str) -> None: - """Raise AssertionError if `src` and `dst` aren't equivalent.""" - - def _v(node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0) -> Iterator[str]: - """Simple visitor generating strings to compare ASTs by content.""" +def _stringify_ast( + node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0 +) -> Iterator[str]: + """Simple visitor generating strings to compare ASTs by content.""" - node = _fixup_ast_constants(node) + node = _fixup_ast_constants(node) - yield f"{' ' * depth}{node.__class__.__name__}(" + yield f"{' ' * depth}{node.__class__.__name__}(" - for field in sorted(node._fields): - # TypeIgnore has only one field 'lineno' which breaks this comparison - type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore) - if sys.version_info >= (3, 8): - type_ignore_classes += (ast.TypeIgnore,) - if isinstance(node, type_ignore_classes): - break + for field in sorted(node._fields): + # TypeIgnore has only one field 'lineno' which breaks this comparison + type_ignore_classes = (ast3.TypeIgnore, ast27.TypeIgnore) + if sys.version_info >= (3, 8): + type_ignore_classes += (ast.TypeIgnore,) + if isinstance(node, type_ignore_classes): + break - try: - value = getattr(node, field) - except AttributeError: - continue + try: + value = getattr(node, field) + except AttributeError: + continue - yield f"{' ' * (depth+1)}{field}=" - - if isinstance(value, list): - for item in value: - # Ignore nested tuples within del statements, because we may insert - # parentheses and they change the AST. - if ( - field == "targets" - and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete)) - and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple)) - ): - for item in item.elts: - yield from _v(item, depth + 2) - elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)): - yield from _v(item, depth + 2) - - elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)): - yield from _v(value, depth + 2) + yield f"{' ' * (depth+1)}{field}=" - else: - # Constant strings may be indented across newlines, if they are - # docstrings; fold spaces after newlines when comparing + if isinstance(value, list): + for item in value: + # Ignore nested tuples within del statements, because we may insert + # parentheses and they change the AST. if ( - isinstance(node, ast.Constant) - and field == "value" - and isinstance(value, str) + field == "targets" + and isinstance(node, (ast.Delete, ast3.Delete, ast27.Delete)) + and isinstance(item, (ast.Tuple, ast3.Tuple, ast27.Tuple)) ): - normalized = re.sub(r"\n[ \t]+", "\n ", value) - else: - normalized = value - yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}" + for item in item.elts: + yield from _stringify_ast(item, depth + 2) + elif isinstance(item, (ast.AST, ast3.AST, ast27.AST)): + yield from _stringify_ast(item, depth + 2) - yield f"{' ' * depth}) # /{node.__class__.__name__}" + elif isinstance(value, (ast.AST, ast3.AST, ast27.AST)): + yield from _stringify_ast(value, depth + 2) + + else: + # Constant strings may be indented across newlines, if they are + # docstrings; fold spaces after newlines when comparing + if ( + isinstance(node, ast.Constant) + and field == "value" + and isinstance(value, str) + ): + normalized = re.sub(r"\n[ \t]+", "\n ", value) + else: + normalized = value + yield f"{' ' * (depth+2)}{normalized!r}, # {value.__class__.__name__}" + + yield f"{' ' * depth}) # /{node.__class__.__name__}" + + +def assert_equivalent(src: str, dst: str) -> None: + """Raise AssertionError if `src` and `dst` aren't equivalent.""" try: src_ast = parse_ast(src) @@ -3728,8 +3731,8 @@ def _v(node: Union[ast.AST, ast3.AST, ast27.AST], depth: int = 0) -> Iterator[st f"This invalid output might be helpful: {log}" ) from None - src_ast_str = "\n".join(_v(src_ast)) - dst_ast_str = "\n".join(_v(dst_ast)) + src_ast_str = "\n".join(_stringify_ast(src_ast)) + dst_ast_str = "\n".join(_stringify_ast(dst_ast)) if src_ast_str != dst_ast_str: log = dump_to_file(diff(src_ast_str, dst_ast_str, "src", "dst")) raise AssertionError(