Skip to content

Commit

Permalink
[eggex] Fix bug where unmatched groups wouldn't be null
Browse files Browse the repository at this point in the history
A test for the "lexer pattern" caught this.
  • Loading branch information
Andy C committed Dec 16, 2023
1 parent 0c052f5 commit 87410d9
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 7 deletions.
4 changes: 4 additions & 0 deletions builtin/func_eggex.py
Expand Up @@ -10,9 +10,12 @@
from core import state
from core import vm
from frontend import typed_args
from mycpp.mylib import log

from typing import List

_ = log

G = 0 # _match() _group()
S = 1 # _start()
E = 2 # _end()
Expand Down Expand Up @@ -91,6 +94,7 @@ def Call(self, rd):

rd.Done()

#log('group %d, s %r indices %s', i, m.s, m.indices)
return _GetMatch(m.s, m.indices, i, self.to_return,
rd.LeftParenToken())

Expand Down
13 changes: 11 additions & 2 deletions cpp/libc.cc
Expand Up @@ -126,8 +126,17 @@ List<int>* regex_search(BigStr* pattern, int flags, BigStr* str, int pos) {
if (match) {
int i;
for (i = 0; i < num_groups; i++) {
indices->append(pmatch[i].rm_so + pos);
indices->append(pmatch[i].rm_eo + pos);
int start = pmatch[i].rm_so;
if (start != -1) {
start += pos;
}
indices->append(start);

int end = pmatch[i].rm_eo;
if (end != -1) {
end += pos;
}
indices->append(end);
}
}

Expand Down
16 changes: 11 additions & 5 deletions pyext/libc.c
Expand Up @@ -216,11 +216,17 @@ func_regex_search(PyObject *self, PyObject *args) {
if (match == 0) {
int i;
for (i = 0; i < num_groups; i++) {
PyObject *start = PyInt_FromLong(pmatch[i].rm_so + pos);
PyList_SetItem(ret, 2*i, start);

PyObject *end = PyInt_FromLong(pmatch[i].rm_eo + pos);
PyList_SetItem(ret, 2*i + 1, end);
int start = pmatch[i].rm_so;
if (start != -1) {
start += pos;
}
PyList_SetItem(ret, 2*i, PyInt_FromLong(start));

int end = pmatch[i].rm_eo;
if (end != -1) {
end += pos;
}
PyList_SetItem(ret, 2*i + 1, PyInt_FromLong(end));
}
}

Expand Down
45 changes: 45 additions & 0 deletions spec/ysh-regex-api.test.sh
Expand Up @@ -193,6 +193,51 @@ pat=([[:digit:]]+)-
34-
## END

#### Str->leftMatch() can implement lexer pattern

shopt -s ysh:upgrade

var lexer = / <capture d+> | <capture [a-z]+> | <capture s+> /
#echo $lexer

var s = 'ab 12'

# This isn't OK
#var s = 'ab + 12 - cd'

var pos = 0

while (true) {
echo "pos=$pos"

# TODO: use leftMatch()
var m = s->search(lexer, pos=pos)
if (not m) {
break
}
# TODO: add groups()
#var groups = [m => group(1), m => group(2), m => group(3)]
#json write --pretty=F (groups)
echo "$[m => group(1)]/$[m => group(2)]/$[m => group(3)]/"

echo

setvar pos = m => end(0)
}

## STDOUT:
pos=0
null/ab/null/

pos=2
null/null/ /

pos=3
12/null/null/

pos=5
## END


#### Named captures with _match
shopt -s ysh:all
Expand Down

0 comments on commit 87410d9

Please sign in to comment.