Skip to content

Commit

Permalink
Fix LST invariant when using <<- in the single-quoted context.
Browse files Browse the repository at this point in the history
Found this bug by testing the arena invariant over the 'wild' corpus.

TODO: Still have a bug in the double-quoted context.

- test/osh2oil.sh here-doc has a failing case for this
- test/arena.sh here-doc also has a failing case
  • Loading branch information
Andy Chu committed Aug 28, 2018
1 parent 58f2496 commit da4a686
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 34 deletions.
6 changes: 3 additions & 3 deletions core/reader.py
Expand Up @@ -95,7 +95,7 @@ def StringLineReader(s, arena):
# internally.


class VirtualLineReader(_Reader):
class HereDocLineReader(_Reader):
"""Used for here docs."""
def __init__(self, lines, arena):
"""
Expand All @@ -110,7 +110,7 @@ def __init__(self, lines, arena):
def GetLine(self):
if self.pos == self.num_lines:
return -1, None
line_id, line = self.lines[self.pos]
line_id, line, start_offset = self.lines[self.pos]

self.pos += 1
return line_id, line
return line_id, line[start_offset:]
4 changes: 2 additions & 2 deletions core/reader_test.py
Expand Up @@ -34,8 +34,8 @@ def testLineReadersAreEquivalent(self):
r2 = reader.FileLineReader(f, a2)

a3 = self.pool.NewArena()
lines = [(0, 'one\n'), (1, 'two')]
r3 = reader.VirtualLineReader(lines, a3)
lines = [(0, 'one\n', 0), (1, 'two', 0)]
r3 = reader.HereDocLineReader(lines, a3)

for a in [a1, a2, a3]:
a.PushSource('reader_test.py')
Expand Down
34 changes: 24 additions & 10 deletions osh/cmd_parse.py
Expand Up @@ -13,6 +13,7 @@
from asdl import const

from core import braces
from core import reader
from core import word
from core import util

Expand Down Expand Up @@ -86,7 +87,8 @@ def _MaybeReadHereDocs(self):
for h in self.pending_here_docs:
here_end_line = None
here_end_line_id = -1
lines = []
here_end_start_offset = 0
here_lines = []

# "If any character in word is quoted, the delimiter shall be formed by
# performing quote removal on word, and the here-document lines shall not
Expand Down Expand Up @@ -115,37 +117,49 @@ def _MaybeReadHereDocs(self):

# If op is <<-, strip off ALL leading tabs -- not spaces, and not just
# the first tab.
start_offset = 0
if h.op.id == Id.Redir_DLessDash:
line = line.lstrip('\t')
if line.rstrip() == delimiter:
here_end_line = line
n = len(line)
i = 0
while i < n:
if line[i] != '\t':
break
i += 1
start_offset = i

if line[start_offset:].rstrip() == delimiter:
here_end_line_id = line_id
here_end_line = line
here_end_start_offset = start_offset
break

lines.append((line_id, line))
here_lines.append((line_id, line, start_offset))

line_reader = reader.HereDocLineReader(here_lines, self.arena)

parts = []
if delim_quoted: # << 'EOF'
# Create a line_span and a token for each line.
tokens = []
for line_id, line in lines:
line_span = ast.line_span(line_id, 0, len(line))
for line_id, line, start_offset in here_lines:
line_span = ast.line_span(line_id, start_offset, len(line))
span_id = self.arena.AddLineSpan(line_span)
t = ast.token(Id.Lit_Chars, line, span_id)
t = ast.token(Id.Lit_Chars, line[start_offset:], span_id)
tokens.append(t)
# LiteralPart for each line.
h.stdin_parts = [ast.LiteralPart(t) for t in tokens]

else:
from osh import parse_lib # Avoid circular import
w_parser = parse_lib.MakeWordParserForHereDoc(lines, self.arena)
w_parser = parse_lib.MakeWordParserForHereDoc(line_reader, self.arena)

# NOTE: There can be different kinds of parse errors in here docs.
w = w_parser.ReadHereDocBody(h.stdin_parts)

# Create a span with the end terminator. Maintains the invariant that
# the spans "add up".
line_span = ast.line_span(here_end_line_id, 0, len(here_end_line))
line_span = ast.line_span(
here_end_line_id, here_end_start_offset, len(here_end_line))
h.here_end_span_id = self.arena.AddLineSpan(line_span)

del self.pending_here_docs[:] # No .clear() until Python 3.3.
Expand Down
15 changes: 10 additions & 5 deletions osh/osh.asdl
Expand Up @@ -2,7 +2,13 @@
--
-- Invariant: the source text can be reconstructed byte-for-byte from this
-- tree.
--
--
-- Exceptions:
-- * <<- here docs with leading tabs, since we don't want those for
-- conversion. We don't want files with mixed tabs and spaces.
-- * Found to be not strictly necessary for oil conversion
-- * foo() { } vs function foo { } -- ksh
--
-- The AST is composed of the builtin ASDL types (string, int, bool) and our
-- application type 'id', which is core.id_kind.Id.

Expand All @@ -28,17 +34,16 @@
-- * 1 + 2*3 vs. 1 + (2*3) or even 1 + ((2*3))
-- * [[ (1 == 1) ]] vs [[ 1 == 1 ]]
-- * $(( 1 + 2 )) vs $[1 + 2] (bash-specific, used by Aboriginal)
--
-- Found to be not strictly necessary for oil conversion
-- * foo() { } vs function foo { } -- ksh
-- TODO:
-- * remove redundancy of 'string val' in token. It should use the span.

module osh
{
-- A portion of a line, used for error messages.
-- TODO: Need arena_id, for different files
line_span = (int line_id, int col, int length)

-- A primitive token. NOTE: val is redundant with 'loc' for now. If we
-- A primitive token. NOTE: val is redundant with 'span_id' for now. If we
-- rewrite the parser in C++, we might care for memory footprint. But for
-- now this is convenient.
-- NOTE: identical strings can shared, if we care.
Expand Down
3 changes: 1 addition & 2 deletions osh/parse_lib.py
Expand Up @@ -64,8 +64,7 @@ def MakeParserForCompletion(code_str, arena):
return w_parser, c_parser


def MakeWordParserForHereDoc(lines, arena):
line_reader = reader.VirtualLineReader(lines, arena)
def MakeWordParserForHereDoc(line_reader, arena):
line_lexer = lexer.LineLexer(match.MATCHER, '', arena)
lx = lexer.Lexer(line_lexer, line_reader)
return word_parse.WordParser(lx, line_reader)
Expand Down
34 changes: 27 additions & 7 deletions test/arena.sh
Expand Up @@ -7,24 +7,44 @@ set -o nounset
set -o pipefail
set -o errexit

source test/common.sh
# TODO: Need include guard in test/common.sh! I ran into this before.
#source test/common.sh # for all-passing, etc.
source test/wild-runner.sh # For MANIFEST, etc.

compare() {
_compare() {
local path=$1

mkdir -p _tmp/arena
bin/osh --parse-and-print-arena $path > _tmp/arena/left.txt
diff -u $path _tmp/arena/left.txt
if diff -u $path _tmp/arena/left.txt; then
echo "$path"
else
return 1
fi
}

here-doc() {
compare test/arena/here-dq.sh
compare test/arena/here-sq.sh
compare test/arena/here-multiple.sh
_compare test/arena/here-dq.sh
_compare test/arena/here-sq.sh
_compare test/arena/here-multiple.sh
_compare test/arena/here-dq-indented.sh
}

tilde() {
compare test/arena/tilde.sh
_compare test/arena/tilde.sh
}

_compare-wild() {
local rel_path=$1
local abs_path=$2

_compare $abs_path
}

# Run on wild corpus
wild() {
wc -l $MANIFEST
cat $MANIFEST | xargs -n 2 -- $0 _compare-wild
}

readonly -a PASSING=(
Expand Down
7 changes: 7 additions & 0 deletions test/arena/here-dq-indented.sh
@@ -0,0 +1,7 @@
#!/bin/bash

cat <<-ONE
indented
body
ONE
OSH
24 changes: 19 additions & 5 deletions test/osh2oil.sh
Expand Up @@ -300,6 +300,7 @@ OIL
}

here-doc() {
# DQ context
osh0-oil3 << 'OSH' 3<< 'OIL'
cat <<ONE
echo $hi
Expand All @@ -310,6 +311,7 @@ echo $hi
"""
OIL
# SQ context
osh0-oil3 << 'OSH' 3<< 'OIL'
cat <<'ONE'
single quoted
Expand All @@ -320,19 +322,31 @@ single quoted
'''
OIL
# <<- is indented
# <<- in DQ context
osh0-oil3 << 'OSH' 3<< 'OIL'
cat <<-'ONE'
cat <<-ONE
indented
body
ONE
OSH
cat << '''
cat << """
indented
body
"""
OIL
# <<- in SQ context
osh0-oil3 << 'OSH' 3<< 'OIL'
cat <<-'ONE'
indented
body
'''
ONE
OSH
cat << '''
indented
body
'''
OIL
}
here-string() {
Expand Down

0 comments on commit da4a686

Please sign in to comment.