Skip to content

Commit

Permalink
[j8] Translate j8.Parser to C++
Browse files Browse the repository at this point in the history
It just needed a pyj8.PartIsUtf8() stub.

There are still too many allocations.
  • Loading branch information
Andy C committed Jan 5, 2024
1 parent c505aac commit 3d5776e
Show file tree
Hide file tree
Showing 14 changed files with 351 additions and 315 deletions.
3 changes: 2 additions & 1 deletion bin/NINJA_subgraph.py
Expand Up @@ -74,8 +74,9 @@ def NinjaGraph(ru):
deps=[
'//bin/text_files',
'//cpp/core',
'//cpp/libc',
'//cpp/data_lang',
'//cpp/fanos',
'//cpp/libc',
'//cpp/osh',
'//cpp/pgen2',
'//cpp/pylib',
Expand Down
40 changes: 18 additions & 22 deletions builtin/func_misc.py
Expand Up @@ -440,27 +440,23 @@ def Call(self, rd):
s = rd.PosStr()
rd.Done()

if mylib.PYTHON: # TODO: translate j8.Parser
p = j8.Parser(s)

try:
if self.is_j8:
val = p.ParseJ8()
else:
val = p.ParseJson()
except error.Decode as e:
# Right now I'm not exposing the original string, because that
# could lead to a memory leak in the _error Dict.
# The message quotes part of the string, and we could improve
# that. We could have a substring with context.
props = {
'start_pos': value.Int(e.start_pos),
'end_pos': value.Int(e.end_pos),
} # type: Dict[str, value_t]
# status code 4 is special, for encode/decode errors.
raise error.Structured(4, e.Message(), rd.LeftParenToken(),
props)
else:
val = value.Null
p = j8.Parser(s)

try:
if self.is_j8:
val = p.ParseJ8()
else:
val = p.ParseJson()
except error.Decode as e:
# Right now I'm not exposing the original string, because that
# could lead to a memory leak in the _error Dict.
# The message quotes part of the string, and we could improve
# that. We could have a substring with context.
props = {
'start_pos': value.Int(e.start_pos),
'end_pos': value.Int(e.end_pos),
} # type: Dict[str, value_t]
# status code 4 is special, for encode/decode errors.
raise error.Structured(4, e.Message(), rd.LeftParenToken(), props)

return val
28 changes: 13 additions & 15 deletions builtin/json_ysh.py
Expand Up @@ -129,21 +129,19 @@ def Run(self, cmd_val):
posix.strerror(e.err_num))
return 1

if mylib.PYTHON:
p = j8.Parser(contents)
try:
if self.is_j8:
val = p.ParseJ8()
else:
val = p.ParseJson()
except error.Decode as err:
# TODO: Need to show position info
self.errfmt.Print_('%s read: %s' %
(self.name, err.Message()),
blame_loc=action_loc)
return 1

self.mem.SetPlace(place, val, blame_loc)
p = j8.Parser(contents)
try:
if self.is_j8:
val = p.ParseJ8()
else:
val = p.ParseJson()
except error.Decode as err:
# TODO: Need to show position info
self.errfmt.Print_('%s read: %s' % (self.name, err.Message()),
blame_loc=action_loc)
return 1

self.mem.SetPlace(place, val, blame_loc)

else:
raise error.Usage(_JSON_ACTION_ERROR, action_loc)
Expand Down
7 changes: 7 additions & 0 deletions cpp/NINJA_subgraph.py
Expand Up @@ -80,6 +80,13 @@ def NinjaGraph(ru):
('clang', 'tsan'),
])

ru.cc_library(
'//cpp/data_lang',
srcs=[
'cpp/data_lang.cc',
],
)

# Note: depends on code generated by re2c
ru.cc_library(
'//cpp/frontend_match',
Expand Down
12 changes: 12 additions & 0 deletions cpp/data_lang.cc
@@ -0,0 +1,12 @@
// data_lang.cc

#include "cpp/data_lang.h"

namespace pyj8 {

bool PartIsUtf8(BigStr* s, int start, int end) {
// TODO
return false;
}

} // namespace pyj8
12 changes: 12 additions & 0 deletions cpp/data_lang.h
@@ -0,0 +1,12 @@
#ifndef DATA_LANG_H
#define DATA_LANG_H

#include "mycpp/runtime.h"

namespace pyj8 {

bool PartIsUtf8(BigStr* s, int start, int end);

} // namespace pyj8

#endif // DATA_LANG_H
24 changes: 24 additions & 0 deletions cpp/frontend_match.cc
Expand Up @@ -87,6 +87,30 @@ Id_t BracketOther(BigStr* s) {
len(s));
}

Tuple2<Id_t, int> MatchJ8Token(BigStr* s, int pos) {
int id;
int end_pos;
::MatchJ8Token(reinterpret_cast<const unsigned char*>(s->data_), len(s), pos,
&id, &end_pos);
return Tuple2<Id_t, int>(static_cast<Id_t>(id), end_pos);
}

Tuple2<Id_t, int> MatchJ8StrToken(BigStr* s, int pos) {
int id;
int end_pos;
::MatchJ8StrToken(reinterpret_cast<const unsigned char*>(s->data_), len(s),
pos, &id, &end_pos);
return Tuple2<Id_t, int>(static_cast<Id_t>(id), end_pos);
}

Tuple2<Id_t, int> MatchJsonStrToken(BigStr* s, int pos) {
int id;
int end_pos;
::MatchJsonStrToken(reinterpret_cast<const unsigned char*>(s->data_), len(s),
pos, &id, &end_pos);
return Tuple2<Id_t, int>(static_cast<Id_t>(id), end_pos);
}

bool IsValidVarName(BigStr* s) {
return ::IsValidVarName(reinterpret_cast<const unsigned char*>(s->data_),
len(s));
Expand Down
4 changes: 4 additions & 0 deletions cpp/frontend_match.h
Expand Up @@ -59,6 +59,10 @@ Id_t BracketUnary(BigStr* s);
Id_t BracketBinary(BigStr* s);
Id_t BracketOther(BigStr* s);

Tuple2<Id_t, int> MatchJ8Token(BigStr* s, int pos);
Tuple2<Id_t, int> MatchJ8StrToken(BigStr* s, int pos);
Tuple2<Id_t, int> MatchJsonStrToken(BigStr* s, int pos);

//
// Other Matching Functions
//
Expand Down
1 change: 1 addition & 0 deletions cpp/preamble.h
Expand Up @@ -19,6 +19,7 @@
#include "_gen/frontend/types.asdl.h"
#include "_gen/ysh/grammar_nt.h"
#include "cpp/core.h"
#include "cpp/data_lang.h"
#include "cpp/fanos.h"
#include "cpp/frontend_flag_spec.h"
#include "cpp/frontend_match.h"
Expand Down

0 comments on commit 3d5776e

Please sign in to comment.