Skip to content

Commit

Permalink
regsub converter: empty match advances by 1 byte to avoid loops
Browse files Browse the repository at this point in the history
  • Loading branch information
dirk-zimoch committed Feb 18, 2019
1 parent acf7efc commit 04906a5
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 39 deletions.
7 changes: 7 additions & 0 deletions docs/formats.html
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,13 @@ <h2>14. Regular Expresion Substitution Pseudo-Converter (<code>%#/<em>regex</em>
Without <em>precision</em> (or 0), all matches are replaced.
</p>
<p>
When replacing multiple matches, the next match is searched directly after the currently
replaced string, so that the <em>subst</em> string itself will never be modified recursively.
<span class="new">
However if an empty string is matched, searching advances by 1 character in order to
avoid matching the same empty string again.</span>
</p>
<p>
In input this converter pre-processes data received from the device before
following converters read it.
Converters preceding this one will read unmodified input.
Expand Down
87 changes: 48 additions & 39 deletions src/RegexpConverter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -197,54 +197,63 @@ static void regsubst(const StreamFormat& fmt, StreamBuffer& buffer, size_t start
debug("pcre_exec: no match\n");
break;
}
if (!(fmt.flags & sign_flag) && n < fmt.prec) // without + flag
{
// do not yet replace this match
c += ovector[1];
continue;
}
// replace subexpressions
l = ovector[1] - ovector[0];
debug("before [%d]= \"%s\"\n", ovector[0], buffer.expand(start+c,ovector[0])());
debug("match [%d]= \"%s\"\n", l, buffer.expand(start+c+ovector[0],l)());
for (r = 1; r < rc; r++)
debug("sub%d = \"%s\"\n", r, buffer.expand(start+c+ovector[r*2], ovector[r*2+1]-ovector[r*2])());
debug("after = \"%s\"\n", buffer.expand(start+c+ovector[1])());
s = subst;
debug("subs = \"%s\"\n", s.expand()());
for (r = 0; r < (int)s.length(); r++)

// no prec: replace all matches
// prec with + flag: replace first prec matches
// prec without + flag: replace only match number prec

if ((fmt.flags & sign_flag) || n >= fmt.prec)
{
debug("check \"%s\"\n", s.expand(r)());
if (s[r] == esc)
// replace subexpressions
debug("before [%d]= \"%s\"\n", ovector[0], buffer.expand(start+c,ovector[0])());
debug("match [%d]= \"%s\"\n", l, buffer.expand(start+c+ovector[0],l)());
for (r = 1; r < rc; r++)
debug("sub%d = \"%s\"\n", r, buffer.expand(start+c+ovector[r*2], ovector[r*2+1]-ovector[r*2])());
debug("after = \"%s\"\n", buffer.expand(start+c+ovector[1])());
s = subst;
debug("subs = \"%s\"\n", s.expand()());
for (r = 0; r < (int)s.length(); r++)
{
unsigned char ch = s[r+1];
debug("found escaped \\%u, in range 1-%d?\n", ch, rc-1);
if (ch != 0 && ch < rc) // escaped 1 - 9 : replace with subexpr
debug("check \"%s\"\n", s.expand(r)());
if (s[r] == esc)
{
ch *= 2;
rl = ovector[ch+1] - ovector[ch];
debug("yes, replace \\%d: \"%s\"\n", ch/2, buffer.expand(start+c+ovector[ch], rl)());
s.replace(r, 2, buffer(start+c+ovector[ch]), rl);
r += rl - 1;
unsigned char ch = s[r+1];
debug("found escaped \\%u, in range 1-%d?\n", ch, rc-1);
if (ch != 0 && ch < rc) // escaped 1 - 9 : replace with subexpr
{
ch *= 2;
rl = ovector[ch+1] - ovector[ch];
debug("yes, replace \\%d: \"%s\"\n", ch/2, buffer.expand(start+c+ovector[ch], rl)());
s.replace(r, 2, buffer(start+c+ovector[ch]), rl);
r += rl - 1;
}
else
{
debug("no, use literal \\%u\n", ch);
s.remove(r, 1); // just remove escape
}
}
else
else if (s[r] == '&') // unescaped & : replace with match
{
debug("no, use literal \\%u\n", ch);
s.remove(r, 1); // just remove escape
debug("replace &: \"%s\"\n", buffer.expand(start+c+ovector[0], l)());
s.replace(r, 1, buffer(start+c+ovector[0]), l);
r += l - 1;
}
else continue;
debug("subs = \"%s\"\n", s.expand()());
}
else if (s[r] == '&') // unescaped & : replace with match
{
debug("replace &: \"%s\"\n", buffer.expand(start+c+ovector[0], l)());
s.replace(r, 1, buffer(start+c+ovector[0]), l);
r += l - 1;
}
else continue;
debug("subs = \"%s\"\n", s.expand()());
buffer.replace(start+c+ovector[0], l, s);
length -= l;
length += s.length();
c += s.length();
}
c += ovector[0];
if (l == 0)
{
debug("pcre_exec: empty match\n");
c++; // Empty strings may lead to an endless loop. Match them only once.
}
buffer.replace(start+c+ovector[0], l, s);
length += s.length() - l;
c += ovector[0] + s.length();
if (n == fmt.prec) // max match reached
{
debug("pcre_exec: max match %d reached\n", n);
Expand Down

0 comments on commit 04906a5

Please sign in to comment.