microsoft · kimadeline · Oct 23, 2020 · Oct 20, 2020 · Oct 20, 2020 · Oct 20, 2020
diff --git a/news/2 Fixes/14048.md b/news/2 Fixes/14048.md
@@ -0,0 +1 @@
+Update the logic for parsing and sending selected code to the REPL.
diff --git a/pythonFiles/normalizeSelection.py b/pythonFiles/normalizeSelection.py
@@ -0,0 +1,128 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import ast
+import textwrap
+import re
+import sys
+
+
+def split_lines(source):
+    """
+    Split selection lines in a version-agnostic way.
+
+    Python grammar only treats \r, \n, and \r\n as newlines.
+    But splitlines() in Python 3 has a much larger list: for example, it also includes \v, \f.
+    As such, this function will split lines across all Python versions.
+    """
+    return re.split(r"[\n\r]+", source)
+
+
+def _get_statements(selection):
+    """
+    Process a multiline selection into a list of its top-level statements.
+    This will remove empty newlines around and within the selection, dedent it,
+    and split it using the result of `ast.parse()`.
+    """
+
+    # Remove blank lines within the selection to prevent the REPL from thinking the block is finished.
+    lines = (line for line in split_lines(selection) if line.strip() != "")
+
+    # Dedent the selection and parse it using the ast module.
+    # Note that leading comments in the selection will be discarded during parsing.
+    source = textwrap.dedent("\n".join(lines))
+    tree = ast.parse(source)
+
+    # We'll need the dedented lines to rebuild the selection.
+    lines = split_lines(source)
+
+    # Get the line ranges for top-level blocks returned from parsing the dedented text
+    # and split the selection accordingly.
+    # tree.body is a list of AST objects, which we rely on to extract top-level statements.
+    # If we supported Python 3.8+ only we could use the lineno and end_lineno attributes of each object
+    # to get the boundaries of each block.
+    # However, earlier Python versions only have the lineno attribute, which is the range start position (1-indexed).
+    # Therefore, to retrieve the end line of each block in a version-agnostic way we need to do
+    # `end = next_block.lineno - 1`
+    # for all blocks except the last one, which will will just run until the last line.
+    ends = [node.lineno - 1 for node in tree.body[1:]] + [len(lines)]
+    for node, end in zip(tree.body, ends):
+        # Given this selection:
+        # 1: if (m > 0 and
+        # 2:        n < 3):
+        # 3:     print('foo')
+        # 4: value = 'bar'
+        #
+        # The first block would have lineno = 1,and the second block lineno = 4
+        start = node.lineno - 1
+        block = "\n".join(lines[start:end])
+
+        # If the block is multiline, add an extra newline character at its end.
+        # This way, when joining blocks back together, there will be a blank line between each multiline statement
+        # and no blank lines between single-line statements, or it would look like this:
+        # >>> x = 22
+        # >>>
+        # >>> total = x + 30
+        # >>>
+        # Note that for the multiline parentheses case this newline is redundant,
+        # since the closing parenthesis terminates the statement already.
+        # This means that for this pattern we'll end up with:
+        # >>> x = [
+        # ...   1
+        # ... ]
+        # >>>
+        # >>> y = [
+        # ...   2
+        # ...]
+        if end - start > 1:
+            block += "\n"
+
+        yield block
+
+
+def normalize_lines(selection):
+    """
+    Normalize the text selection received from the extension and send it to the REPL.
+
+    If it is a single line selection, dedent it, append a newline and send it to the REPL.
+    Otherwise, sanitize the multiline selection before sending it to the REPL:
+    split it in a list of top-level statements
+    and add newlines between each of them to tell the REPL where each block ends.
+    """
+
+    try:
+        # Parse the selection into a list of top-level blocks.
+        # We don't differentiate between single and multiline statements
+        # because it's not a perf bottleneck,
+        # and the overhead from splitting and rejoining strings in the multiline case is one-off.
+        statements = _get_statements(selection)
+
+        # Insert a newline between each top-level statement, and append a newline to the selection.
+        source = "\n".join(statements) + "\n"
+    except:
+        # If there's a problem when parsing statements,
+        # append a blank line to end the block and send it as-is.
+        source = selection + "\n\n"
+
+    # `source` is a unicode instance at this point on Python 2,
+    # so if we used `sys.stdout.write` to send it to the REPL,
+    # Python will implicitly encode it using sys.getdefaultencoding(),
+    # which we don't want.
+    stdout = sys.stdout if sys.version_info < (3,) else sys.stdout.buffer
+    stdout.write(source.encode("utf-8"))
+    stdout.flush()
+
+
+if __name__ == "__main__":
+    # This will fail on a large file.
+    # See https://github.com/microsoft/vscode-python/issues/14471
+    contents = sys.argv[1]
+    try:
+        default_encoding = sys.getdefaultencoding()
+        encoded_contents = contents.encode(default_encoding, "surrogateescape")
+        contents = encoded_contents.decode(default_encoding, "replace")
+    except (UnicodeError, LookupError):
+        pass
+    if isinstance(contents, bytes):
+        contents = contents.decode("utf8")
+    normalize_lines(contents)
diff --git a/pythonFiles/tests/test_normalize_selection.py b/pythonFiles/tests/test_normalize_selection.py
@@ -0,0 +1,191 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+import textwrap
+
+import normalizeSelection
+
+
+class TestNormalizationScript(object):
+    """Unit tests for the normalization script."""
+
+    def test_basicNormalization(self, capsys):
+        src = 'print("this is a test")'
+        expected = src + "\n"
+        normalizeSelection.normalize_lines(src)
+        captured = capsys.readouterr()
+        assert captured.out == expected
+
+    def test_moreThanOneLine(self, capsys):
+        src = textwrap.dedent(
+            """\
+            # Some rando comment
+
+            def show_something():
+                print("Something")
+            """
+        )
+        expected = textwrap.dedent(
+            """\
+            def show_something():
+                print("Something")
+
+            """
+        )
+        normalizeSelection.normalize_lines(src)
+        captured = capsys.readouterr()
+        assert captured.out == expected
+
+    def test_withHangingIndent(self, capsys):
+        src = textwrap.dedent(
+            """\
+            x = 22
+            y = 30
+            z = -10
+            result = x + y + z
+
+            if result == 42:
+                print("The answer to life, the universe, and everything")
+            """
+        )
+        expected = textwrap.dedent(
+            """\
+            x = 22
+            y = 30
+            z = -10
+            result = x + y + z
+            if result == 42:
+                print("The answer to life, the universe, and everything")
+
+            """
+        )
+        normalizeSelection.normalize_lines(src)
+        captured = capsys.readouterr()
+        assert captured.out == expected
+
+    def test_clearOutExtraneousNewlines(self, capsys):
+        src = textwrap.dedent(
+            """\
+            value_x = 22
+
+            value_y = 30
+
+            value_z = -10
+
+            print(value_x + value_y + value_z)
+
+            """
+        )
+        expectedResult = textwrap.dedent(
+            """\
+            value_x = 22
+            value_y = 30
+            value_z = -10
+            print(value_x + value_y + value_z)
+            """
+        )
+        normalizeSelection.normalize_lines(src)
+        result = capsys.readouterr()
+        assert result.out == expectedResult
+
+    def test_clearOutExtraLinesAndWhitespace(self, capsys):
+        src = textwrap.dedent(
+            """\
+            if True:
+                x = 22
+
+                y = 30
+
+                z = -10
+
+            print(x + y + z)
+
+            """
+        )
+        expectedResult = textwrap.dedent(
+            """\
+            if True:
+                x = 22
+                y = 30
+                z = -10
+
+            print(x + y + z)
+            """
+        )
+        normalizeSelection.normalize_lines(src)
+        result = capsys.readouterr()
+        assert result.out == expectedResult
+
+    def test_partialSingleLine(self, capsys):
+        src = "   print('foo')"
+        expected = textwrap.dedent(src) + "\n"
+        normalizeSelection.normalize_lines(src)
+        result = capsys.readouterr()
+        assert result.out == expected
+
+    def test_multiLineWithIndent(self, capsys):
+        src = """\
+
+        if (x > 0
+            and condition == True):
+            print('foo')
+        else:
+
+            print('bar')
+        """
+
+        expectedResult = textwrap.dedent(
+            """\
+        if (x > 0
+            and condition == True):
+            print('foo')
+        else:
+            print('bar')
+
+        """
+        )
+
+        normalizeSelection.normalize_lines(src)
+        result = capsys.readouterr()
+        assert result.out == expectedResult
+
+    def test_multiLineWithComment(self, capsys):
+        src = textwrap.dedent(
+            """\
+
+            def show_something():
+                # A comment
+                print("Something")
+            """
+        )
+        expected = textwrap.dedent(
+            """\
+            def show_something():
+                # A comment
+                print("Something")
+
+            """
+        )
+        normalizeSelection.normalize_lines(src)
+        captured = capsys.readouterr()
+        assert captured.out == expected
+
+    def test_exception(self, capsys):
+        src = "       if True:"
+        expected = src + "\n\n"
+        normalizeSelection.normalize_lines(src)
+        captured = capsys.readouterr()
+        assert captured.out == expected
+
+    def test_multilineException(self, capsys):
+        src = textwrap.dedent(
+            """\
+
+            def show_something():
+                if True:
+            """
+        )
+        expected = src + "\n\n"
+        normalizeSelection.normalize_lines(src)
+        captured = capsys.readouterr()
+        assert captured.out == expected