Skip to content

Commit

Permalink
Revert "[refactor] Move EchoLexer to new SimplerLexer2"
Browse files Browse the repository at this point in the history
This reverts commit 7bd2132.

We ended up not using SimpleLexer2 for data_lang/j8.

Even though SimpleLexer has more allocations, I think we need real
Tokens (which are currently heap objects) anyway.

There is a TODO to give precise error messages for BadBackslash in echo,
and I think parse errors in globs too.  Like unmatched brackets could
have precise location info, for strict_glob.
  • Loading branch information
Andy C committed Dec 29, 2023
1 parent 7bd2132 commit 7cca7dc
Show file tree
Hide file tree
Showing 6 changed files with 14 additions and 99 deletions.
10 changes: 4 additions & 6 deletions builtin/io_osh.py
Expand Up @@ -69,18 +69,17 @@ def Run(self, cmd_val):

if arg.e:
new_argv = [] # type: List[str]
for arg_str in argv:
for a in argv:
parts = [] # type: List[str]
lex = match.EchoLexer(arg_str)
pos = 0
lex = match.EchoLexer(a)
while not backslash_c:
id_, end_pos = lex.Next()
id_, s = lex.Next()
if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator
break

# Note: DummyToken is OK because EvalCStringToken() doesn't have any
# syntax errors.
tok = lexer.DummyToken(id_, arg_str[pos:end_pos])
tok = lexer.DummyToken(id_, s)
p = word_compile.EvalCStringToken(tok)

# Unusual behavior: '\c' prints what is there and aborts processing!
Expand All @@ -89,7 +88,6 @@ def Run(self, cmd_val):
break

parts.append(p)
pos = end_pos

new_argv.append(''.join(parts))
if backslash_c: # no more args either
Expand Down
6 changes: 2 additions & 4 deletions builtin/printf_osh.py
Expand Up @@ -291,15 +291,14 @@ def _Format(self, parts, varargs, locs, out):

c_parts = [] # type: List[str]
lex = match.EchoLexer(s)
pos = 0
while True:
id_, end_pos = lex.Next()
id_, tok_val = lex.Next()
if id_ == Id.Eol_Tok: # Note: This is really a NUL terminator
break

# Note: DummyToken is OK because EvalCStringToken() doesn't have
# any syntax errors.
tok = lexer.DummyToken(id_, s[pos:end_pos])
tok = lexer.DummyToken(id_, tok_val)
p = word_compile.EvalCStringToken(tok)

# Unusual behavior: '\c' aborts processing!
Expand All @@ -308,7 +307,6 @@ def _Format(self, parts, varargs, locs, out):
break

c_parts.append(p)
pos = end_pos
s = ''.join(c_parts)

elif part.type.id == Id.Format_Time or typ in 'diouxX':
Expand Down
39 changes: 2 additions & 37 deletions cpp/frontend_match.cc
Expand Up @@ -52,41 +52,6 @@ List<Tuple2<Id_t, BigStr*>*>* SimpleLexer::Tokens() {
return tokens;
}

Tuple2<Id_t, int> SimpleLexer2::Next() {
int id;
int end_pos;
match_func_(reinterpret_cast<const unsigned char*>(s_->data_), len(s_), pos_,
&id, &end_pos);

pos_ = end_pos;
return Tuple2<Id_t, int>(static_cast<Id_t>(id), end_pos);
}

List<Tuple2<Id_t, BigStr*>*>* SimpleLexer2::Tokens() {
auto tokens = NewList<Tuple2<Id_t, BigStr*>*>();
int pos = 0;
while (true) {
auto tup2 = Next();
Id_t id = tup2.at0();
int end_pos = tup2.at1();

if (id == Id::Eol_Tok) {
break;
}
log("pos %d end_pos %d", pos, end_pos);

int len = end_pos - pos_;
BigStr* tok_val = NewStr(len);
memcpy(tok_val->data_, s_->data_ + pos_, len); // copy the list item
tok_val->data_[len] = '\0';

// It's annoying that we have to put it on the heap
tokens->append(Alloc<Tuple2<Id_t, BigStr*>>(id, tok_val));
pos = end_pos;
}
return tokens;
}

SimpleLexer* BraceRangeLexer(BigStr* s) {
return Alloc<SimpleLexer>(&MatchBraceRangeToken, s);
}
Expand All @@ -95,8 +60,8 @@ SimpleLexer* GlobLexer(BigStr* s) {
return Alloc<SimpleLexer>(&MatchGlobToken, s);
}

SimpleLexer2* EchoLexer(BigStr* s) {
return Alloc<SimpleLexer2>(&MatchEchoToken, s);
SimpleLexer* EchoLexer(BigStr* s) {
return Alloc<SimpleLexer>(&MatchEchoToken, s);
}

List<Tuple2<Id_t, BigStr*>*>* HistoryTokens(BigStr* s) {
Expand Down
25 changes: 1 addition & 24 deletions cpp/frontend_match.h
Expand Up @@ -44,36 +44,13 @@ class SimpleLexer {
int pos_;
};

class SimpleLexer2 {
public:
SimpleLexer2(MatchFunc match_func, BigStr* s)
: match_func_(match_func), s_(s), pos_(0) {
}

Tuple2<Id_t, int> Next();
List<Tuple2<Id_t, BigStr*>*>* Tokens();

static constexpr ObjHeader obj_header() {
return ObjHeader::ClassFixed(field_mask(), sizeof(SimpleLexer2));
}

static constexpr uint32_t field_mask() {
return maskbit(offsetof(SimpleLexer2, s_));
}

private:
MatchFunc match_func_;
BigStr* s_;
int pos_;
};

//
// Secondary Lexers
//

SimpleLexer* BraceRangeLexer(BigStr* s);
SimpleLexer* GlobLexer(BigStr* s);
SimpleLexer2* EchoLexer(BigStr* s);
SimpleLexer* EchoLexer(BigStr* s);

List<Tuple2<Id_t, BigStr*>*>* HistoryTokens(BigStr* s);
List<Tuple2<Id_t, BigStr*>*>* Ps1Tokens(BigStr* s);
Expand Down
23 changes: 0 additions & 23 deletions cpp/frontend_match_test.cc
Expand Up @@ -30,28 +30,6 @@ TEST lexer_test() {
PASS();
}

TEST lexer2_test() {
match::SimpleLexer2* lex = match::EchoLexer(StrFromC("hi \\t there \\n"));

List<Tuple2<Id_t, BigStr*>*>* toks = lex->Tokens();
for (int i = 0; i < len(toks); i++) {
auto* t = toks->at(i);
int id = t->at0();
if (id == id__Eol_Tok) {
break;
}
log("id = %d", id);
log("val = %s", t->at1()->data_);
}

match::SimpleLexer* lex2 = match::BraceRangeLexer(kEmptyString);
auto t = lex2->Next();
int id = t.at0();
ASSERT_EQ(Id::Eol_Tok, id);

PASS();
}

TEST func_test() {
ASSERT_EQ(Id::BoolUnary_G, match::BracketUnary(StrFromC("-G")));
ASSERT_EQ(Id::Undefined_Tok, match::BracketUnary(StrFromC("-Gz")));
Expand Down Expand Up @@ -99,7 +77,6 @@ int main(int argc, char** argv) {
GREATEST_MAIN_BEGIN();

RUN_TEST(lexer_test);
RUN_TEST(lexer2_test);
RUN_TEST(func_test);
RUN_TEST(for_test_coverage);

Expand Down
10 changes: 5 additions & 5 deletions frontend/match.py
Expand Up @@ -252,10 +252,10 @@ def Tokens(self):
return tokens


# Iterated over in builtin/io_osh.py
# Iterated over in osh/builtin_pure.py
def EchoLexer(s):
# type: (str) -> SimpleLexer2
return SimpleLexer2(ECHO_MATCHER, s)
# type: (str) -> SimpleLexer
return SimpleLexer(ECHO_MATCHER, s)


def BraceRangeLexer(s):
Expand Down Expand Up @@ -283,13 +283,13 @@ def J8StrLexer(s):

def HistoryTokens(s):
# type: (str) -> List[Tuple[Id_t, str]]
lex = SimpleLexer2(HISTORY_MATCHER, s)
lex = SimpleLexer(HISTORY_MATCHER, s)
return lex.Tokens()


def Ps1Tokens(s):
# type: (str) -> List[Tuple[Id_t, str]]
lex = SimpleLexer2(PS1_MATCHER, s)
lex = SimpleLexer(PS1_MATCHER, s)
return lex.Tokens()


Expand Down

0 comments on commit 7cca7dc

Please sign in to comment.