Skip to content

Commit

Permalink
regcomp.pl - fixup intflags debug data to handle gaps properly
Browse files Browse the repository at this point in the history
We were not handling gaps in the sequence properly, and effectively
showing the wrong flag names or missing the last flag. Now we die if there
are any collisions or if any of the PREGf defines set more than one bit.
This also adds some crude tests to validate that intflags serialization is
working properly.

Note, extflags handles more complex scenarios and seems to handle this
gracefully already, hence the reason I haven't touched it as well.

This also tweaks a comment in lexical_debug.t which part of this was
cribbed from.
  • Loading branch information
demerphq authored and pjacklam committed May 20, 2023
1 parent 17e99cc commit 4641d56
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 20 deletions.
2 changes: 2 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -4514,6 +4514,8 @@ ext/re/re.pm re extension Perl module
ext/re/re.xs re extension external subroutines
ext/re/re_comp.h re extension wrapper for regcomp.h
ext/re/re_top.h re extension symbol hiding header
ext/re/t/intflags.pl Program used by intflags.t
ext/re/t/intflags.t Test that intflags are serialized properly
ext/re/t/lexical_debug.pl generate debug output for lexical re 'debug'
ext/re/t/lexical_debug.t test that lexical re 'debug' works
ext/re/t/qr.t test that qr// is a Regexp
Expand Down
2 changes: 1 addition & 1 deletion ext/re/re.pm
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ package re;
use strict;
use warnings;

our $VERSION = "0.43";
our $VERSION = "0.44";
our @ISA = qw(Exporter);
our @EXPORT_OK = qw{
is_regexp regexp_pattern
Expand Down
16 changes: 16 additions & 0 deletions ext/re/t/intflags.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
use re 'Debug' => qw(DUMP FLAGS);
our $count;
my $code= '(?{$count++})';
my @p= (
qr/(foo)(?1)?/,
qr/\Gfoo/,
qr/.*foo/,
qr/^foo/,
qr/(foo(*THEN)bar|food)/,
qr/a.*b.*/,
qr/a{1,4}\Gfoo/,
qr/a+/,
do { use re 'eval'; qr/a$code/},
);

print STDERR "-OK-\n";
25 changes: 25 additions & 0 deletions ext/re/t/intflags.t
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!./perl

BEGIN {
require Config;
if (($Config::Config{'extensions'} !~ /\bre\b/) ){
print "1..0 # Skip -- Perl configured without re module\n";
exit 0;
}
}

use strict;

# must use a BEGIN or the prototypes wont be respected meaning
# tests could pass that shouldn't.
BEGIN { require "../../t/test.pl"; }
my $out = runperl(progfile => "t/intflags.pl", stderr => 1 );
like($out,qr/-OK-\n/, "intflags.pl ran to completion");

my %seen;
foreach my $line (split /\n/, $out) {
$line=~s/^r->intflags:\s+// or next;
length($_) and $seen{$_}++ for split /\s+/, $line;
}
is(0+keys %seen,14);
done_testing;
2 changes: 1 addition & 1 deletion ext/re/t/lexical_debug.t
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ BEGIN {
use strict;

# must use a BEGIN or the prototypes wont be respected meaning
# tests could pass that shouldn't
# tests could pass that shouldn't
BEGIN { require "../../t/test.pl"; }
my $out = runperl(progfile => "t/lexical_debug.pl", stderr => 1 );

Expand Down
23 changes: 19 additions & 4 deletions regen/regcomp.pl
Original file line number Diff line number Diff line change
Expand Up @@ -726,8 +726,10 @@ sub print_reg_intflags_name {
my %reverse;
my $REG_INTFLAGS_NAME_SIZE= 0;
my $hp= HeaderParser->new();
my $last_val = 0;
foreach my $file ("regcomp.h") {
$hp->read_file($file);
my @bit_tuples;
foreach my $line_info (@{$hp->lines}) {
next unless $line_info->{type} eq "content"
and $line_info->{sub_type} eq "#define";
Expand All @@ -745,13 +747,26 @@ sub print_reg_intflags_name {
my $hex= $3;
my $comment= $4;
my $val= hex($hex);
my $bin= sprintf "%b", $val;
if ($bin=~/1.*?1/) { die "Not expecting multiple bits in PREGf" }
my $bit= length($bin) - 1 ;
$comment= $comment ? " - $comment" : "";

printf $out qq(\t%-30s/* 0x%08x - %s%s */\n), qq("$abbr",),
$val, $define, $comment;
$REG_INTFLAGS_NAME_SIZE++;
if ($bit_tuples[$bit]) {
die "Duplicate PREGf bit '$bit': $define $val ($hex)";
}
$bit_tuples[$bit]= [ $bit, $val, $abbr, $define, $comment ];
}
}
foreach my $i (0..$#bit_tuples) {
my $bit_tuple= $bit_tuples[$i];
if (!$bit_tuple) {
$bit_tuple= [ $i, 1<<$i, "", "", "*UNUSED*" ];
}
my ($bit, $val, $abbr, $define, $comment)= @$bit_tuple;
printf $out qq(\t%-30s/* (1<<%2d) - 0x%08x - %s%s */\n),
qq("$abbr",), $bit, $val, $define, $comment;
}
$REG_INTFLAGS_NAME_SIZE=0+@bit_tuples;
}

print $out <<EOP;
Expand Down
29 changes: 15 additions & 14 deletions regnodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -2862,24 +2862,25 @@ EXTCONST char * const PL_reg_extflags_name[] = {
EXTCONST char * PL_reg_intflags_name[];
#else
EXTCONST char * const PL_reg_intflags_name[] = {
"SKIP", /* 0x00000001 - PREGf_SKIP */
"IMPLICIT", /* 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */
"NAUGHTY", /* 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */
"VERBARG_SEEN", /* 0x00000008 - PREGf_VERBARG_SEEN */
"CUTGROUP_SEEN", /* 0x00000010 - PREGf_CUTGROUP_SEEN */
"USE_RE_EVAL", /* 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */
"NOSCAN", /* 0x00000040 - PREGf_NOSCAN */
"GPOS_SEEN", /* 0x00000100 - PREGf_GPOS_SEEN */
"GPOS_FLOAT", /* 0x00000200 - PREGf_GPOS_FLOAT */
"ANCH_MBOL", /* 0x00000400 - PREGf_ANCH_MBOL */
"ANCH_SBOL", /* 0x00000800 - PREGf_ANCH_SBOL */
"ANCH_GPOS", /* 0x00001000 - PREGf_ANCH_GPOS */
"RECURSE_SEEN", /* 0x00002000 - PREGf_RECURSE_SEEN */
"SKIP", /* (1<< 0) - 0x00000001 - PREGf_SKIP */
"IMPLICIT", /* (1<< 1) - 0x00000002 - PREGf_IMPLICIT - Converted .* to ^.* */
"NAUGHTY", /* (1<< 2) - 0x00000004 - PREGf_NAUGHTY - how exponential is this pattern? */
"VERBARG_SEEN", /* (1<< 3) - 0x00000008 - PREGf_VERBARG_SEEN */
"CUTGROUP_SEEN", /* (1<< 4) - 0x00000010 - PREGf_CUTGROUP_SEEN */
"USE_RE_EVAL", /* (1<< 5) - 0x00000020 - PREGf_USE_RE_EVAL - compiled with "use re 'eval'" */
"NOSCAN", /* (1<< 6) - 0x00000040 - PREGf_NOSCAN */
"", /* (1<< 7) - 0x00000080 - *UNUSED* */
"GPOS_SEEN", /* (1<< 8) - 0x00000100 - PREGf_GPOS_SEEN */
"GPOS_FLOAT", /* (1<< 9) - 0x00000200 - PREGf_GPOS_FLOAT */
"ANCH_MBOL", /* (1<<10) - 0x00000400 - PREGf_ANCH_MBOL */
"ANCH_SBOL", /* (1<<11) - 0x00000800 - PREGf_ANCH_SBOL */
"ANCH_GPOS", /* (1<<12) - 0x00001000 - PREGf_ANCH_GPOS */
"RECURSE_SEEN", /* (1<<13) - 0x00002000 - PREGf_RECURSE_SEEN */
};
#endif /* DOINIT */

#ifdef DEBUGGING
# define REG_INTFLAGS_NAME_SIZE 13
# define REG_INTFLAGS_NAME_SIZE 14
#endif

/* The following have no fixed length. U8 so we can do strchr() on it. */
Expand Down

0 comments on commit 4641d56

Please sign in to comment.