Permalink
Browse files

added substitution captures for lists, and proper verification of the…

… two new bytecodes
  • Loading branch information...
1 parent 28e86c8 commit f9d85985396adbf505571665e7130b453230b96a Fabio Mascarenhas committed Feb 10, 2009
Showing with 197 additions and 37 deletions.
  1. +6 −7 README
  2. +47 −0 list.lua
  3. +144 −28 lpeg.c
  4. +0 −2 test.lua
View
13 README
@@ -7,13 +7,12 @@ of items. This means that the pattern "abc" matches both the string
"abc" and the list { "a", "b", "c" }. The special thing about lists is
that its items can themselves be streams (other lists or strings).
-Simple captures in lists are working, the examples below show how they
-operate (produce the empty list, the single captured element, or a sublist),
-but substitution and dynamic captures are not supported on lists yet.
-
-Oh, and the verifier just validates both new opcodes without checking anything,
-I am going to add proper validation later. The compiler is generating correct code
-so this is not such a big deal right now. :-)
+List captures are working, the examples below show how they
+operate (produce the empty list, the single captured element, or a sublist).
+Substitution captures on a list slice produce a list with replaced elements,
+dynamic captures receive the list being matched and the current position, and
+must return the next position to match as first result (or true to keep in the
+same place).
It's backward compatible with LPEG 0.9, but I have a new RE module called listre
that has a more list-friendly syntax, although I am not happy with its syntax yet,
View
@@ -354,4 +354,51 @@ assert(p:match{{ "foo", "bar", "one", "two", "three", "baz", "boo" }} == "foo")
print("+")
+ops = { add = function (x, y) return x + y end,
+ sub = function (x, y) return x - y end,
+ mul = function (x, y) return x * y end,
+ div = function (x, y) return x / y end, }
+
+p = re.compile([[ exp <- { . -> '%0', <exp>, <exp> } -> eval / { "num", <.> } ]],
+ { eval = function (op, x, y)
+ return ops[op](x, y)
+ end })
+
+assert(p:match{{ "add", { "div", { "num", 8 }, { "num", 2 } }, { "num", 3 } }} == 7)
+
+print("+")
+
+p = re.compile([[ exp <- { <.> -> '%1', <exp>, <exp> } -> eval / { "num", <.> } ]],
+ { eval = function (op, x, y)
+ return ops[op](x, y)
+ end })
+
+assert(p:match{{ "add", { "div", { "num", 8 }, { "num", 2 } }, { "num", 3 } }} == 7)
+
+print("+")
+
+p = re.compile([[ { "foo", <~ "bar", (!"baz" .)*, "baz" -> 'foo' ~>, "boo" } ]])
+
+assert(select(5, p:match{{ "foo", "bar", "one", "two", "three", "baz", "boo" }}) == "foo")
+assert(#p:match{{ "foo", "bar", "one", "two", "three", "baz", "boo" }} == 5)
+
+print("+")
+
+p = re.compile([[ { "foo", <~ "bar", (!"baz" .)*, "baz" -> 'foo', "boo", "biz" ~>, "zoo" } ]])
+
+assert(select(5, p:match{{ "foo", "bar", "one", "two", "three", "baz", "boo", "biz", "zoo" }}) == "foo")
+assert(#p:match{{ "foo", "bar", "one", "two", "three", "baz", "boo", "biz", "zoo" }} == 7)
+assert(select(6, p:match{{ "foo", "bar", "one", "two", "three", "baz", "boo", "biz", "zoo" }}) == "boo")
+
+print("+")
+
+p = re.compile([[ { "foo", <~ "bar", (!"baz" .)*, "baz" -> upper, "boo", "biz" ~>, "zoo" } ]],
+ { upper = string.upper })
+
+assert(select(5, p:match{{ "foo", "bar", "one", "two", "three", "baz", "boo", "biz", "zoo" }}) == "BAZ")
+assert(#p:match{{ "foo", "bar", "one", "two", "three", "baz", "boo", "biz", "zoo" }} == 7)
+assert(select(6, p:match{{ "foo", "bar", "one", "two", "three", "baz", "boo", "biz", "zoo" }}) == "boo")
+
+print("+")
+
print("OK")
View
172 lpeg.c
@@ -808,10 +808,12 @@ static Stream *match (lua_State *L,
static int verify (lua_State *L, Instruction *op, const Instruction *p,
Instruction *e, int postable, int rule) {
- Stream dummy;
+ Stream dummy_s;
+ Stream dummy_l;
Stack back[MAXBACK];
int backtop = 0; /* point to first empty slot in back */
- dummy.kind = Sstring;
+ dummy_s.kind = Sstring;
+ dummy_l.kind = Slist;
while (p != e) {
switch ((Opcode)p->i.code) {
case IRet: {
@@ -822,10 +824,19 @@ static int verify (lua_State *L, Instruction *op, const Instruction *p,
if (backtop >= MAXBACK)
return luaL_error(L, "too many pending calls/choices");
back[backtop].p = dest(0, p);
- back[backtop++].s = dummy;
+ back[backtop++].s = dummy_s;
p++;
continue;
}
+ case IOpen:
+ {
+ if (backtop >= MAXBACK)
+ return luaL_error(L, "too many pending calls/choices");
+ back[backtop].p = NULL;
+ back[backtop++].s = dummy_l;
+ p++;
+ continue;
+ }
case ICall: {
assert((p + 1)->i.code != IRet); /* no tail call */
if (backtop >= MAXBACK)
@@ -865,9 +876,11 @@ static int verify (lua_State *L, Instruction *op, const Instruction *p,
continue;
}
}
- case IOpen:
case IClose:
{
+ assert(backtop > 0);
+ backtop--;
+ assert(back[backtop].s.kind == Slist);
p++;
continue;
}
@@ -895,7 +908,7 @@ static int verify (lua_State *L, Instruction *op, const Instruction *p,
do {
if (backtop-- == 0)
return 1; /* no more backtracking */
- } while (back[backtop].s.kind == Sempty);
+ } while (back[backtop].s.kind == Sempty || back[backtop].p == NULL);
p = back[backtop].p;
continue;
}
@@ -910,7 +923,7 @@ static int verify (lua_State *L, Instruction *op, const Instruction *p,
goto fail; /* be liberal in this case */
}
case IFunc: {
- Stream *r = (p+1)->f((p+2)->buff, &dummy);
+ Stream *r = (p+1)->f((p+2)->buff, &dummy_s);
if (r == NULL) goto fail;
p += p->i.offset;
continue;
@@ -2205,8 +2218,21 @@ typedef struct StrAux {
static int getstrcaps (CapState *cs, StrAux *cps, int n) {
int k = n++;
+ int skind = cs->cap->s.kind;
cps[k].isstring = 1;
- cps[k].u.s.s = cs->cap->s.u.s.s;
+ switch(skind) {
+ case Sstring: cps[k].u.s.s = cs->cap->s.u.s.s; break;
+ case Slist: {
+ size_t l;
+ lua_rawgeti(cs->L, plistidx(cs->ptop), cs->cap->s.u.l.ref);
+ lua_rawgeti(cs->L, -1, cs->cap->s.u.l.cur);
+ cps[k].u.s.s = lua_tolstring(cs->L, -1, &l);
+ cps[k].u.s.e = cps[k].u.s.s + l;
+ lua_pop(cs->L, 2);
+ break;
+ }
+ default: luaL_error(cs->L, "no string captures for this stream");
+ }
if (!isfullcap(cs->cap++)) {
while (!isclosecap(cs->cap)) {
if (n >= MAXSTRCAPS) /* too many captures? */
@@ -2222,7 +2248,7 @@ static int getstrcaps (CapState *cs, StrAux *cps, int n) {
}
cs->cap++; /* skip close */
}
- cps[k].u.s.e = closeaddr(cs->cap - 1);
+ if(skind == Sstring) cps[k].u.s.e = closeaddr(cs->cap - 1);
return n;
}
@@ -2232,6 +2258,11 @@ static int getstrcaps (CapState *cs, StrAux *cps, int n) {
*/
static int addonestring (luaL_Buffer *b, CapState *cs, const char *what);
+/*
+** push next capture
+*/
+static int pushoneitem (int tab, CapState *cs, const char *what);
+
static void stringcap (luaL_Buffer *b, CapState *cs) {
StrAux cps[MAXSTRCAPS];
@@ -2261,22 +2292,67 @@ static void stringcap (luaL_Buffer *b, CapState *cs) {
}
}
+typedef struct SubstAux {
+ union {
+ luaL_Buffer *b;
+ int tab;
+ } u;
+} SubstAux;
-static void substcap (luaL_Buffer *b, CapState *cs) {
- const char *curr = cs->cap->s.u.s.s;
- if (isfullcap(cs->cap)) /* no nested captures? */
- luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */
- else {
- cs->cap++;
- while (!isclosecap(cs->cap)) {
- const char *next = cs->cap->s.u.s.s;
- luaL_addlstring(b, curr, next - curr); /* add text up to capture */
- if (addonestring(b, cs, "replacement") == 0) /* no capture value? */
- curr = next; /* keep original text in final result */
- else
- curr = closeaddr(cs->cap - 1); /* continue after match */
+static void substcap (SubstAux *sa, CapState *cs) {
+ switch(cs->cap->s.kind) {
+ case Sstring: {
+ luaL_Buffer *b = sa->u.b;
+ const char *curr = cs->cap->s.u.s.s;
+ if (isfullcap(cs->cap)) /* no nested captures? */
+ luaL_addlstring(b, curr, cs->cap->siz - 1); /* keep original text */
+ else {
+ cs->cap++;
+ while (!isclosecap(cs->cap)) {
+ const char *next = cs->cap->s.u.s.s;
+ luaL_addlstring(b, curr, next - curr); /* add text up to capture */
+ if (addonestring(b, cs, "replacement") == 0) /* no capture value? */
+ curr = next; /* keep original text in final result */
+ else
+ curr = closeaddr(cs->cap - 1); /* continue after match */
+ }
+ luaL_addlstring(b, curr, cs->cap->s.u.s.s - curr); /* add last piece of text */
+ }
+ break;
+ }
+ case Slist: {
+ int tabidx = sa->u.tab;
+ int last = lua_objlen(cs->L, tabidx);
+ int curr = cs->cap->s.u.l.cur;
+ if (isfullcap(cs->cap)) { /* no nested captures? keep original */
+ lua_rawgeti(cs->L, plistidx(cs->ptop), cs->cap->s.u.l.ref);
+ lua_rawgeti(cs->L, -1, cs->cap->s.u.l.cur);
+ lua_rawseti(cs->L, tabidx, ++last);
+ lua_pop(cs->L, 1);
+ } else {
+ cs->cap++;
+ lua_rawgeti(cs->L, plistidx(cs->ptop), cs->cap->s.u.l.ref);
+ while (!isclosecap(cs->cap)) {
+ int next = cs->cap->s.u.l.cur;
+ for(; curr < next; curr++) { /* add items up to capture */
+ lua_rawgeti(cs->L, -1, curr);
+ lua_rawseti(cs->L, tabidx, ++last);
+ }
+ if (pushoneitem(tabidx, cs, "replacement") == 0) /* no capture value? */
+ curr = next;
+ else {
+ lua_rawseti(cs->L, tabidx, ++last);
+ curr = (cs->cap-1)->s.u.l.cur + 1;
+ }
+ }
+ for(; curr < cs->cap->s.u.l.cur; curr++) {
+ lua_rawgeti(cs->L, -1, curr);
+ lua_rawseti(cs->L, tabidx, ++last);
+ }
+ lua_pop(cs->L, 1);
+ }
+ break;
}
- luaL_addlstring(b, curr, cs->cap->s.u.s.s - curr); /* add last piece of text */
}
cs->cap++; /* go to next capture */
}
@@ -2287,9 +2363,12 @@ static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
case Cstring:
stringcap(b, cs); /* add capture directly to buffer */
return 1;
- case Csubst:
- substcap(b, cs); /* add capture directly to buffer */
+ case Csubst: {
+ SubstAux sa;
+ sa.u.b = b;
+ substcap(&sa, cs); /* add capture directly to buffer */
return 1;
+ }
default: {
lua_State *L = cs->L;
int n = pushcapture(cs);
@@ -2304,6 +2383,29 @@ static int addonestring (luaL_Buffer *b, CapState *cs, const char *what) {
}
}
+static int pushoneitem (int tab, CapState *cs, const char *what) {
+ switch (captype(cs->cap)) {
+ case Cstring: {
+ luaL_Buffer b;
+ luaL_buffinit(cs->L, &b);
+ stringcap(&b, cs); /* add capture directly to buffer */
+ luaL_pushresult(&b);
+ return 1;
+ }
+ case Csubst: {
+ SubstAux sa; sa.u.tab = tab;
+ substcap(&sa, cs); /* add capture directly to buffer */
+ return 1;
+ }
+ default: {
+ lua_State *L = cs->L;
+ int n = pushcapture(cs);
+ if(n > 1) lua_pop(L, n - 1);
+ return n;
+ }
+ }
+}
+
static int pushcapture (CapState *cs) {
luaL_checkstack(cs->L, 4, "too many captures");
@@ -2348,10 +2450,24 @@ static int pushcapture (CapState *cs) {
return 1;
}
case Csubst: {
- luaL_Buffer b;
- luaL_buffinit(cs->L, &b);
- substcap(&b, cs);
- luaL_pushresult(&b);
+ switch(cs->cap->s.kind) {
+ case Sstring: {
+ SubstAux sa;
+ luaL_Buffer b;
+ luaL_buffinit(cs->L, &b);
+ sa.u.b = &b;
+ substcap(&sa, cs);
+ luaL_pushresult(&b);
+ break;
+ }
+ case Slist: {
+ SubstAux sa;
+ lua_newtable(cs->L);
+ sa.u.tab = lua_gettop(cs->L);
+ substcap(&sa, cs);
+ break;
+ }
+ }
return 1;
}
case Cgroup: {
View
@@ -479,7 +479,6 @@ assert(m.match(m.Cmt(m.Cg(m.Carg(3), "a") *
end) * "a",
"a", 1, false, 100, 1000) == 2*1001 + 3*100)
-
-- tests for Lua functions
t = {}
@@ -705,7 +704,6 @@ badgrammar({ -(lpeg.V(1) * 'a') }, "rule '1'")
assert(m.match({'a' * -lpeg.V(1)}, "aaa") == 2)
assert(m.match({'a' * -lpeg.V(1)}, "aaaa") == nil)
-
-- simple tests for maximum sizes:
local p = m.P"a"
for i=1,14 do p = p * p end

0 comments on commit f9d8598

Please sign in to comment.