Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code parsing for run selection in terminal - Python side #14457

Merged
merged 20 commits into from Oct 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions news/2 Fixes/14048.md
@@ -0,0 +1 @@
Update the logic for parsing and sending selected code to the REPL.
128 changes: 128 additions & 0 deletions pythonFiles/normalizeSelection.py
@@ -0,0 +1,128 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import ast
import textwrap
import re
import sys


def split_lines(source):
"""
Split selection lines in a version-agnostic way.

Python grammar only treats \r, \n, and \r\n as newlines.
But splitlines() in Python 3 has a much larger list: for example, it also includes \v, \f.
As such, this function will split lines across all Python versions.
"""
return re.split(r"[\n\r]+", source)


def _get_statements(selection):
"""
Process a multiline selection into a list of its top-level statements.
This will remove empty newlines around and within the selection, dedent it,
and split it using the result of `ast.parse()`.
"""

# Remove blank lines within the selection to prevent the REPL from thinking the block is finished.
lines = (line for line in split_lines(selection) if line.strip() != "")

# Dedent the selection and parse it using the ast module.
# Note that leading comments in the selection will be discarded during parsing.
source = textwrap.dedent("\n".join(lines))
tree = ast.parse(source)

# We'll need the dedented lines to rebuild the selection.
lines = split_lines(source)

# Get the line ranges for top-level blocks returned from parsing the dedented text
# and split the selection accordingly.
# tree.body is a list of AST objects, which we rely on to extract top-level statements.
# If we supported Python 3.8+ only we could use the lineno and end_lineno attributes of each object
# to get the boundaries of each block.
# However, earlier Python versions only have the lineno attribute, which is the range start position (1-indexed).
# Therefore, to retrieve the end line of each block in a version-agnostic way we need to do
# `end = next_block.lineno - 1`
# for all blocks except the last one, which will will just run until the last line.
ends = [node.lineno - 1 for node in tree.body[1:]] + [len(lines)]
for node, end in zip(tree.body, ends):
# Given this selection:
# 1: if (m > 0 and
# 2: n < 3):
# 3: print('foo')
# 4: value = 'bar'
#
# The first block would have lineno = 1,and the second block lineno = 4
start = node.lineno - 1
block = "\n".join(lines[start:end])

# If the block is multiline, add an extra newline character at its end.
# This way, when joining blocks back together, there will be a blank line between each multiline statement
# and no blank lines between single-line statements, or it would look like this:
# >>> x = 22
# >>>
# >>> total = x + 30
# >>>
# Note that for the multiline parentheses case this newline is redundant,
# since the closing parenthesis terminates the statement already.
# This means that for this pattern we'll end up with:
# >>> x = [
# ... 1
# ... ]
# >>>
# >>> y = [
# ... 2
# ...]
if end - start > 1:
block += "\n"

yield block


def normalize_lines(selection):
"""
Normalize the text selection received from the extension and send it to the REPL.

If it is a single line selection, dedent it, append a newline and send it to the REPL.
Otherwise, sanitize the multiline selection before sending it to the REPL:
split it in a list of top-level statements
and add newlines between each of them to tell the REPL where each block ends.
"""

try:
# Parse the selection into a list of top-level blocks.
# We don't differentiate between single and multiline statements
# because it's not a perf bottleneck,
# and the overhead from splitting and rejoining strings in the multiline case is one-off.
statements = _get_statements(selection)

# Insert a newline between each top-level statement, and append a newline to the selection.
source = "\n".join(statements) + "\n"
except:
# If there's a problem when parsing statements,
# append a blank line to end the block and send it as-is.
source = selection + "\n\n"

# `source` is a unicode instance at this point on Python 2,
# so if we used `sys.stdout.write` to send it to the REPL,
# Python will implicitly encode it using sys.getdefaultencoding(),
# which we don't want.
stdout = sys.stdout if sys.version_info < (3,) else sys.stdout.buffer
stdout.write(source.encode("utf-8"))
stdout.flush()


if __name__ == "__main__":
# This will fail on a large file.
# See https://github.com/microsoft/vscode-python/issues/14471
contents = sys.argv[1]
try:
default_encoding = sys.getdefaultencoding()
encoded_contents = contents.encode(default_encoding, "surrogateescape")
contents = encoded_contents.decode(default_encoding, "replace")
except (UnicodeError, LookupError):
pass
if isinstance(contents, bytes):
contents = contents.decode("utf8")
normalize_lines(contents)
191 changes: 191 additions & 0 deletions pythonFiles/tests/test_normalize_selection.py
@@ -0,0 +1,191 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import textwrap

import normalizeSelection


class TestNormalizationScript(object):
"""Unit tests for the normalization script."""

def test_basicNormalization(self, capsys):
src = 'print("this is a test")'
expected = src + "\n"
normalizeSelection.normalize_lines(src)
captured = capsys.readouterr()
assert captured.out == expected

def test_moreThanOneLine(self, capsys):
src = textwrap.dedent(
"""\
# Some rando comment

def show_something():
print("Something")
"""
)
expected = textwrap.dedent(
"""\
def show_something():
print("Something")

"""
)
normalizeSelection.normalize_lines(src)
captured = capsys.readouterr()
assert captured.out == expected

def test_withHangingIndent(self, capsys):
src = textwrap.dedent(
"""\
x = 22
y = 30
z = -10
result = x + y + z

if result == 42:
print("The answer to life, the universe, and everything")
"""
)
expected = textwrap.dedent(
"""\
x = 22
y = 30
z = -10
result = x + y + z
if result == 42:
print("The answer to life, the universe, and everything")

"""
)
normalizeSelection.normalize_lines(src)
captured = capsys.readouterr()
assert captured.out == expected

def test_clearOutExtraneousNewlines(self, capsys):
src = textwrap.dedent(
"""\
value_x = 22

value_y = 30

value_z = -10

print(value_x + value_y + value_z)

"""
)
expectedResult = textwrap.dedent(
"""\
value_x = 22
value_y = 30
value_z = -10
print(value_x + value_y + value_z)
"""
)
normalizeSelection.normalize_lines(src)
result = capsys.readouterr()
assert result.out == expectedResult

def test_clearOutExtraLinesAndWhitespace(self, capsys):
src = textwrap.dedent(
"""\
if True:
x = 22

y = 30

z = -10

print(x + y + z)

"""
)
expectedResult = textwrap.dedent(
"""\
if True:
x = 22
y = 30
z = -10

print(x + y + z)
"""
)
normalizeSelection.normalize_lines(src)
result = capsys.readouterr()
assert result.out == expectedResult

def test_partialSingleLine(self, capsys):
src = " print('foo')"
expected = textwrap.dedent(src) + "\n"
normalizeSelection.normalize_lines(src)
result = capsys.readouterr()
assert result.out == expected

def test_multiLineWithIndent(self, capsys):
src = """\

if (x > 0
and condition == True):
print('foo')
else:

print('bar')
"""

expectedResult = textwrap.dedent(
"""\
if (x > 0
and condition == True):
print('foo')
else:
print('bar')

"""
)

normalizeSelection.normalize_lines(src)
result = capsys.readouterr()
assert result.out == expectedResult

def test_multiLineWithComment(self, capsys):
src = textwrap.dedent(
"""\

def show_something():
# A comment
print("Something")
"""
)
expected = textwrap.dedent(
"""\
def show_something():
# A comment
print("Something")

"""
)
normalizeSelection.normalize_lines(src)
captured = capsys.readouterr()
assert captured.out == expected

def test_exception(self, capsys):
src = " if True:"
expected = src + "\n\n"
normalizeSelection.normalize_lines(src)
captured = capsys.readouterr()
assert captured.out == expected

def test_multilineException(self, capsys):
src = textwrap.dedent(
"""\

def show_something():
if True:
"""
)
expected = src + "\n\n"
normalizeSelection.normalize_lines(src)
captured = capsys.readouterr()
assert captured.out == expected