Skip to content

Commit

Permalink
[j8] Start parser
Browse files Browse the repository at this point in the history
Can parse dicts, but getting a weird spec test error
  • Loading branch information
Andy C committed Dec 27, 2023
1 parent e2e3980 commit 7b11180
Show file tree
Hide file tree
Showing 7 changed files with 173 additions and 19 deletions.
26 changes: 16 additions & 10 deletions builtin/json_ysh.py
Expand Up @@ -140,18 +140,24 @@ def Run(self, cmd_val):
posix.strerror(e.err_num))
return 1

if mylib.PYTHON:
try:
obj = yajl.loads(contents)
except ValueError as e:
self.errfmt.Print_('json read: %s' % e,
blame_loc=action_loc)
return 1

# TODO: use token directly
val = cpython._PyObjToValue(obj)
if self.is_j8:
p = j8.Parser(contents)
val = p.Parse()
self.mem.SetPlace(place, val, blame_loc)

else:
if mylib.PYTHON:
try:
obj = yajl.loads(contents)
except ValueError as e:
self.errfmt.Print_('json read: %s' % e,
blame_loc=action_loc)
return 1

# TODO: use token directly
val = cpython._PyObjToValue(obj)
self.mem.SetPlace(place, val, blame_loc)

else:
raise error.Usage(_JSON_ACTION_ERROR, action_loc)

Expand Down
120 changes: 113 additions & 7 deletions data_lang/j8.py
Expand Up @@ -30,19 +30,20 @@
<> is for non-J8 errors? For the = oeprator
"""

from _devbuild.gen.id_kind_asdl import Id, Id_t, Id_str
from _devbuild.gen.value_asdl import (value, value_e, value_t)

from asdl import format as fmt
from core import vm
from data_lang import pyj8
from data_lang import qsn
from mycpp import mylib
from mycpp.mylib import tagswitch, iteritems, log
from mycpp.mylib import tagswitch, iteritems, NewDict, log

_ = log
unused = pyj8

from typing import cast, Dict
from typing import cast, Dict, List, Tuple


class PrettyPrinter(object):
Expand Down Expand Up @@ -379,13 +380,118 @@ def Print(self, val, level=0):

class Parser(object):

def Parse(self, s):
# type: (str) -> value_t
def __init__(self, s):
# type: (str) -> None
self.s = s
self.lexer = pyj8.LexerDecoder(s)

self.tok_id = Id.Undefined_Tok
self.start_pos = 0
self.end_pos = 0
self.decoded = ''

def _Next(self):
# type: () -> None
self.start_pos = self.end_pos
self.tok_id, self.end_pos, self.decoded = self.lexer.Next()
#log('NEXT %s %s %s', Id_str(self.tok_id), self.end_pos, self.decoded or '-')

def _Eat(self, tok_id):
# type: (Id_t) -> None

# TODO: Need location info
assert self.tok_id == tok_id, "Expected %s, got %s" % (Id_str(tok_id),
Id_str(self.tok_id))
self._Next()

def _ParsePair(self):
# type: () -> Tuple[str, value_t]
if self.tok_id != Id.J8_AnyString:
raise AssertionError(Id_str(self.tok_id))
k = self.decoded
self._Next()

self._Eat(Id.J8_Colon)

v = self._ParseValue()
return k, v

def _ParseDict(self):
# type: () -> value_t
"""
key_value = string ':' value
Dict = '{' '}'
| '{' key_value (',' key_value)* '}'
"""
# precondition
assert self.tok_id == Id.J8_LBrace, Id_str(self.tok_id)

d = NewDict() # type: Dict[str, value_t]

self._Next()
if self.tok_id == Id.J8_RBrace:
return value.Dict(d)

k, v = self._ParsePair()
d[k] = v

while self.tok_id == Id.J8_Comma:
self._Next()
k, v = self._ParsePair()
d[k] = v

self._Eat(Id.J8_RBrace)

return value.Dict(d)

def _ParseList(self):
# type: () -> value_t
assert self.tok_id == Id.J8_LBracket, Id_str(self.tok_id)

items = [] # type: List[value_t]

return value.List(items)

def _ParseValue(self):
# type: () -> value_t
if self.tok_id == Id.J8_LBrace:
return self._ParseDict()

elif self.tok_id == Id.J8_LBracket:
return self._ParseList()

elif self.tok_id == Id.J8_Null:
self._Next()
return value.Null

elif self.tok_id == Id.J8_Bool:
val = value.Bool(self.s[self.start_pos] == 't')
self._Next()
return val

elif self.tok_id == Id.J8_Number:
self._Next()
# TODO: distinguish Int vs. Float
return value.Null

# UString, BString too
elif self.tok_id == Id.J8_AnyString:
val = value.Str(self.decoded)
self._Next()
return val

else:
part = self.s[self.start_pos:self.end_pos]
raise AssertionError('Unexpected token %s %r' % (Id_str(self.tok_id), part))

def Parse(self):
# type: () -> value_t
"""
Raises exception on error?
- Can parse either J8 or JSON strings
"""
# TODO: feed it to lexer first, then parser
return None
self._Next()
return self._ParseValue()

8 changes: 8 additions & 0 deletions data_lang/pyj8.py
Expand Up @@ -189,6 +189,14 @@ def __init__(self, s):
self.decoded = mylib.BufWriter()

def Next(self):
# type: () -> Tuple[Id_t, int, Optional[str]]
while True:
tok_id, end_pos, decoded = self._Next()
if tok_id != Id.Ignored_Space:
break
return tok_id, end_pos, decoded

def _Next(self):
# type: () -> Tuple[Id_t, int, Optional[str]]
"""
Note: match_func will return Id.Eol_Tok repeatedly the terminating NUL
Expand Down
1 change: 1 addition & 0 deletions frontend/id_kind_def.py
Expand Up @@ -703,6 +703,7 @@ def AddKinds(spec):
'Colon',

# Parsed
'Null',
'Bool',
'Number', # Int and Float

Expand Down
2 changes: 2 additions & 0 deletions frontend/lexer_def.py
Expand Up @@ -532,6 +532,8 @@ def R(pat, tok_type):
C('}', Id.J8_RBrace),
C(',', Id.J8_Comma),
C(':', Id.J8_Colon),

C('null', Id.J8_Null),
C('true', Id.J8_Bool),
C('false', Id.J8_Bool),

Expand Down
27 changes: 27 additions & 0 deletions spec/testdata/j8-read.sh
@@ -0,0 +1,27 @@
echo '{ }' | j8 read
= _reply

exit

echo '{"k": "v"}' | j8 read
= _reply

#exit

echo '{"k": null}' | j8 read
= _reply

#exit

echo '{"k": 1, "k2": 2}' | j8 read
= _reply

#exit

echo '{"k": {"k2": null}}' | j8 read
= _reply

#exit

echo '{"k": {"k2": "v2"}, "k3": null}' | j8 read
= _reply
8 changes: 6 additions & 2 deletions spec/ysh-json.test.sh
@@ -1,4 +1,4 @@
## oils_failures_allowed: 1
## oils_failures_allowed: 2
## tags: dev-minimal

#### usage errors
Expand Down Expand Up @@ -237,7 +237,11 @@ b"\u{1}\yff\u{1f}"
## END


#### j8 read

# Avoid conflict on stdin from spec test framework?

$SH $REPO_ROOT/spec/testdata/j8-read.sh


## STDOUT:
## END

0 comments on commit 7b11180

Please sign in to comment.