Skip to content

Commit

Permalink
New functions to detect and parse assignments.
Browse files Browse the repository at this point in the history
Wrote tests for them, but haven't hooked them up to the main codebase
yet.  The new _MakeAssignPair function invokes the arithmetic parser
where necessary.

Wrote a new _AppendMoreEnv function, using the 'prepared' or 'detected'
tuple.

Also:

- Notes on OSH architecture.  'alias' and array location assignment made
  things less clean.
  • Loading branch information
Andy Chu committed Sep 15, 2018
1 parent 3812c98 commit ea80189
Show file tree
Hide file tree
Showing 11 changed files with 323 additions and 43 deletions.
20 changes: 10 additions & 10 deletions core/alloc.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ def GetLineSpan(self, span_id):
try:
return self.spans[span_id]
except IndexError:
util.log('Span ID out of range: %d', span_id)
util.log('Span ID out of range: %d is greater than %d', span_id,
len(self.spans))
raise

def LastSpanId(self):
Expand All @@ -123,16 +124,15 @@ def GetDebugInfo(self, line_id):
return path, line_num


def CompletionArena(pool):
"""A temporary arena that only exists for a function call?"""
arena = pool.NewArena()
arena.PushSource('<temp completion buffer>')
return arena
def SideArena(source_name):
"""A new arena outside the main one.
For completion, $PS1 and $PS4, a[x++]=1, etc.

def PluginArena(source_name):
"""For PS4, etc."""
# TODO: Should there only be one pool? This isn't worked out yet.
Translation takes advantage of the fact that arenas have contiguous span_ids.
"""
# TODO: Should there only be one pool? This isn't worked out yet. Or just
# get rid of the pool concept?
pool = Pool()
arena = pool.NewArena()
arena.PushSource(source_name)
Expand Down
2 changes: 1 addition & 1 deletion core/cmd_exec.py
Original file line number Diff line number Diff line change
Expand Up @@ -1421,7 +1421,7 @@ def __init__(self, parse_ctx, exec_opts, mem, word_ev):
self.mem = mem
self.word_ev = word_ev

self.arena = alloc.PluginArena('<$PS4>')
self.arena = alloc.SideArena('<$PS4>')
self.parse_cache = {} # PS4 value -> CompoundWord. PS4 is scoped.

def _EvalPS4(self):
Expand Down
2 changes: 1 addition & 1 deletion core/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ def __init__(self, pool, ev, comp_lookup, var_comp, parse_ctx):
self.parser = DummyParser() # TODO: remove

def Matches(self, buf, status_out):
arena = alloc.CompletionArena(self.pool)
arena = alloc.SideArena('<completion>')
w_parser, c_parser = self.parse_ctx.MakeParserForCompletion(buf, arena)
comp_type, prefix, comp_words = _GetCompletionType(
w_parser, c_parser, self.ev, status_out)
Expand Down
45 changes: 27 additions & 18 deletions core/word.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ def AsFuncName(w):
def AsArithVarName(w):
"""Returns a string if this word looks like an arith var; otherwise False.
NOTE: This can't be combined with LooksLikeAssignment because VarLike and
NOTE: This can't be combined with DetectAssignment because VarLike and
ArithVarLike must be different tokens. Otherwise _ReadCompoundWord will be
confused between array assigments foo=(1 2) and function calls foo(1, 2).
"""
Expand Down Expand Up @@ -378,14 +378,18 @@ def IsVarLike(w):
return _LiteralPartId(w.parts[0]) == Id.Lit_VarLike


def LooksLikeAssignment(w):
def DetectAssignment_OLD(w):
"""Tests whether a word looks like FOO=bar or FOO[x]=bar.
Returns:
(string, op, CompoundWord) if it looks like FOO=bar
False if it doesn't
(token left, # Lit_VarLike, Lit_ArrayLhsOpen, or Undefined_Tok
token? right, # Lit_ArrayLhsClose if it was detected
part_offset) # where to start the token, 0
TODO: could use assign_parse
Or (spid, k, (spid1, spid2), op, v)
spid1 and spid2 are [ and ]
Expand Down Expand Up @@ -440,23 +444,28 @@ def LooksLikeAssignment(w):
return name, op, rhs


# TODO:
# - local/declare should use this.
# - Doesn't work with 'readonly' or 'export'
# - global is parsed at the top level with LhsIndexedLike.
def LooksLikeLhsIndex(s):
"""Tests if a STRING looks like a[x + 1]=b
# After EvalStatic, do another around of lexing at runtime.
# Use osh/lex.py.
Returns:
(string, arith_expr) if it looks like a[x + 1]=b
LhsIndexedName?
False if it doesn't
def DetectAssignment(w):
"""
# PROBLEM: What arena tokens to use?
"""
assert w.tag == word_e.CompoundWord
n = len(w.parts)
if n == 0:
return None, None, 0

part0 = w.parts[0]
id0 = _LiteralPartId(part0)
if id0 == Id.Lit_VarLike:
return part0.token, None, 1 # everything after first token is the value

if id0 == Id.Lit_ArrayLhsOpen:
if n < 3: # a[]= can't be valid
return None, None, 0
for i in xrange(2, n):
if _LiteralPartId(w.parts[i]) == Id.Lit_ArrayLhsClose:
return part0.token, w.parts[i].token, i+1

# Nothing detected. Could be 'foobar' or a[x+1+2/' without the closing ].
return None, None, 0


def KeywordToken(w):
Expand Down
88 changes: 86 additions & 2 deletions core/word_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,97 @@

import unittest

from osh import word_parse_test
from osh.meta import Id
from core.util import log

from core import word # module under test


def _Detect(test, word_str, expected):
# TODO: This function could be moved to test_lib.
log('-'*80)
arena, w = word_parse_test._assertReadWordWithArena(test, word_str)

actual = word.DetectAssignment(w)
left_token, close_token, part_offset = actual

expected_left, expected_close, expected_part_offset = expected

print(left_token, close_token, part_offset)
print()

if expected_left is None:
test.assertEqual(None, left_token)
else:
test.assertEqual(expected_left, left_token.id)

if expected_close is None:
test.assertEqual(None, close_token)
else:
test.assertEqual(expected_left, left_token.id)

test.assertEqual(expected_part_offset, part_offset)

# Test that we can reparse niput
from osh import cmd_parse
from osh import parse_lib
from core import alloc

parse_ctx = parse_lib.ParseContext(arena, {})

if left_token and left_token.id in (Id.Lit_VarLike, Id.Lit_ArrayLhsOpen):
more_env = []
preparsed = (left_token, close_token, part_offset, w)
try:
cmd_parse._AppendMoreEnv([preparsed], more_env)
except Exception as e:
log('Error: %s', e)
else:
log('more_env: %s', more_env)

try:
assign_pair = cmd_parse._MakeAssignPair(parse_ctx, preparsed)
except Exception as e:
log('Error: %s', e)
else:
log('assign_pair: %s', assign_pair)


class WordTest(unittest.TestCase):

def testFoo(self):
print(word)
def testDetectLocation(self):
CASES = [
('foobar', (None, None, 0)),
('a[x', (None, None, 0)),

# Empty is not valid, there has to be at least one token.
('a[]=$foo$bar', (None, None, 0)),
('a[]+=$foo$bar', (None, None, 0)),

('s=1', (Id.Lit_VarLike, None, 1)),
('s+=1', (Id.Lit_VarLike, None, 1)),
('a[x]=1', (Id.Lit_ArrayLhsOpen, Id.Lit_ArrayLhsClose, 3)),
('a[x]+=1', (Id.Lit_ArrayLhsOpen, Id.Lit_ArrayLhsClose, 3)),
('a[x++]+=1', (Id.Lit_ArrayLhsOpen, Id.Lit_ArrayLhsClose, 5)),

('a=(1 2 3)', (Id.Lit_VarLike, None, 1)),
('a+=(1 2 3)', (Id.Lit_VarLike, None, 1)),

# EmptyWord on RHS
('s=', (Id.Lit_VarLike, None, 1)),
('a[x]=', (Id.Lit_ArrayLhsOpen, Id.Lit_ArrayLhsClose, 3)),

# Tilde sub
('s=~foo', (Id.Lit_VarLike, None, 1)),
('a[x]=~', (Id.Lit_ArrayLhsOpen, Id.Lit_ArrayLhsClose, 3)),
]
for word_str, expected in CASES:
_Detect(self, word_str, expected)

# These don't parse, as they shouldn't. But not the best error message.
#w = assertReadWord(self, 'a[x]=(1 2 3)')
#w = assertReadWord(self, 'a[x]+=(1 2 3)')


if __name__ == '__main__':
Expand Down
79 changes: 79 additions & 0 deletions doc/architecture-notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
Notes on OSH Architecture
-------------------------

## Where we (unfortunately) must re-parse previously parsed text

- alias expansion
- Array assignment like `a[x+1]=foo` (because breaking word boundaries like
`a[x + 1]=foo` causes a lot of problems, and I don't see it used.)

Each of these cases has implications for translation, because we break the
"arena invariant".

## Parser Lookahead

- `func() { echo hi; }` vs.
- `func=() # an array`

## Where the arena invariant is broken

- Here docs with <<-. The leading tab is lost, because we don't need it for
translation.

## Where VirtualLineReader is used

This isn't re-parsing, but it's re-reading.

- alias expansion
- HereDoc

## Where parsers are instantiated

- See `osh/parse_lib.py` and its callers.

## Where code strings are evaluated

- source and eval
- trap
- PS1 and PS4 (WordParser is used)
- completion hooks registered by `complete -F ls_complete_func ls`

## Parse errors at runtime (need line numbers)

- [ -a -a -a ]
- command line flag usage errors

## Where unicode is respected

- ${#s} -- length in code points
- ${s:1:2} -- offsets in code points
- ${x#?} and family (not yet implemented)

## Parse-time and Runtime Pairs

- echo -e '\x00\n' and echo $'\x00\n' (shared in OSH)
- test / [ and [[ (shared in OSH)

### Other Pairs

- expr and $(( )) (expr not in shell)
- later: find and our own language


## Dependencies

- Optional: readline

## Borrowed Code

- All of OPy:
- pgen2
- compiler2 from stdlib
- byterun
- ASDL front end from CPython (heavily refactored)
- core/tdop.py: Heavily adapted from tinypy

## Generated Code

- See `build/dev.sh`

0 comments on commit ea80189

Please sign in to comment.