Skip to content

Commit

Permalink
study_chunk: avoid mutating regexp program within GOSUB
Browse files Browse the repository at this point in the history
gh16947 and gh17743: studying GOSUB may restudy in an inner call
(via a mix of recursion and enframing) something that an outer call
is in the middle of looking at.  Let the outer frame deal with it.

(CVE-2020-12723)

(cherry picked from commit c4033e740bd18d9fbe3456a9db2ec2053cdc5271)
  • Loading branch information
hvds authored and steve-m-hay committed May 17, 2020
1 parent 3295b48 commit 66bbb51
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 23 deletions.
2 changes: 1 addition & 1 deletion embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -2480,7 +2480,7 @@ Es |SSize_t|study_chunk |NN RExC_state_t *pRExC_state \
|NULLOK struct scan_data_t *data \
|I32 stopparen|U32 recursed_depth \
|NULLOK regnode_ssc *and_withp \
|U32 flags|U32 depth
|U32 flags|U32 depth|bool was_mutate_ok
Es |void |rck_elide_nothing|NN regnode *node
EsR |SV * |get_ANYOFM_contents|NN const regnode * n
EsRn |U32 |add_data |NN RExC_state_t* const pRExC_state \
Expand Down
2 changes: 1 addition & 1 deletion embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -1239,7 +1239,7 @@
#define ssc_is_cp_posixl_init S_ssc_is_cp_posixl_init
#define ssc_or(a,b,c) S_ssc_or(aTHX_ a,b,c)
#define ssc_union(a,b,c) S_ssc_union(aTHX_ a,b,c)
#define study_chunk(a,b,c,d,e,f,g,h,i,j,k) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k)
#define study_chunk(a,b,c,d,e,f,g,h,i,j,k,l) S_study_chunk(aTHX_ a,b,c,d,e,f,g,h,i,j,k,l)
# endif
# if defined(PERL_IN_REGCOMP_C) || defined (PERL_IN_DUMP_C)
#define _invlist_dump(a,b,c,d) Perl__invlist_dump(aTHX_ a,b,c,d)
Expand Down
2 changes: 1 addition & 1 deletion proto.h
Original file line number Diff line number Diff line change
Expand Up @@ -5659,7 +5659,7 @@ PERL_STATIC_INLINE void S_ssc_union(pTHX_ regnode_ssc *ssc, SV* const invlist, c
#define PERL_ARGS_ASSERT_SSC_UNION \
assert(ssc); assert(invlist)
#endif
STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth);
STATIC SSize_t S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, SSize_t *minlenp, SSize_t *deltap, regnode *last, struct scan_data_t *data, I32 stopparen, U32 recursed_depth, regnode_ssc *and_withp, U32 flags, U32 depth, bool was_mutate_ok);
#define PERL_ARGS_ASSERT_STUDY_CHUNK \
assert(pRExC_state); assert(scanp); assert(minlenp); assert(deltap); assert(last)
#endif
Expand Down
54 changes: 35 additions & 19 deletions regcomp.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ typedef struct scan_frame {
regnode *next_regnode; /* next node to process when last is reached */
U32 prev_recursed_depth;
I32 stopparen; /* what stopparen do we use */
bool in_gosub; /* this or an outer frame is for GOSUB */

struct scan_frame *this_prev_frame; /* this previous frame */
struct scan_frame *prev_frame; /* previous frame */
Expand Down Expand Up @@ -4497,7 +4498,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
I32 stopparen,
U32 recursed_depth,
regnode_ssc *and_withp,
U32 flags, U32 depth)
U32 flags, U32 depth, bool was_mutate_ok)
/* scanp: Start here (read-write). */
/* deltap: Write maxlen-minlen here. */
/* last: Stop before this one. */
Expand Down Expand Up @@ -4576,6 +4577,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
node length to get a real minimum (because
the folded version may be shorter) */
bool unfolded_multi_char = FALSE;
/* avoid mutating ops if we are anywhere within the recursed or
* enframed handling for a GOSUB: the outermost level will handle it.
*/
bool mutate_ok = was_mutate_ok && !(frame && frame->in_gosub);
/* Peephole optimizer: */
DEBUG_STUDYDATA("Peep", data, depth, is_inf);
DEBUG_PEEP("Peep", scan, depth, flags);
Expand All @@ -4586,7 +4591,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
* parsing code, as each (?:..) is handled by a different invocation of
* reg() -- Yves
*/
JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);
if (mutate_ok)
JOIN_EXACT(scan,&min_subtract, &unfolded_multi_char, 0);

/* Follow the next-chain of the current node and optimize
away all the NOTHINGs from it.
Expand Down Expand Up @@ -4618,7 +4624,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
/* DEFINEP study_chunk() recursion */
(void)study_chunk(pRExC_state, &scan, &minlen,
&deltanext, next, &data_fake, stopparen,
recursed_depth, NULL, f, depth+1);
recursed_depth, NULL, f, depth+1, mutate_ok);

scan = next;
} else
Expand Down Expand Up @@ -4686,7 +4692,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
/* recurse study_chunk() for each BRANCH in an alternation */
minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, next, &data_fake, stopparen,
recursed_depth, NULL, f, depth+1);
recursed_depth, NULL, f, depth+1,
mutate_ok);

if (min1 > minnext)
min1 = minnext;
Expand Down Expand Up @@ -4753,9 +4760,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
}
}

if (PERL_ENABLE_TRIE_OPTIMISATION &&
OP( startbranch ) == BRANCH )
{
if (PERL_ENABLE_TRIE_OPTIMISATION
&& OP(startbranch) == BRANCH
&& mutate_ok
) {
/* demq.

Assuming this was/is a branch we are dealing with: 'scan'
Expand Down Expand Up @@ -5210,6 +5218,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
newframe->stopparen = stopparen;
newframe->prev_recursed_depth = recursed_depth;
newframe->this_prev_frame= frame;
newframe->in_gosub = (
(frame && frame->in_gosub) || OP(scan) == GOSUB
);

DEBUG_STUDYDATA("frame-new", data, depth, is_inf);
DEBUG_PEEP("fnew", scan, depth, flags);
Expand Down Expand Up @@ -5367,16 +5378,17 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,

/* This temporary node can now be turned into EXACTFU, and
* must, as regexec.c doesn't handle it */
if (OP(next) == EXACTFU_S_EDGE) {
if (OP(next) == EXACTFU_S_EDGE && mutate_ok) {
OP(next) = EXACTFU;
}

if ( STR_LEN(next) == 1
&& isALPHA_A(* STRING(next))
&& ( OP(next) == EXACTFAA
|| ( OP(next) == EXACTFU
&& ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next)))))
{
&& ! HAS_NONLATIN1_SIMPLE_FOLD_CLOSURE(* STRING(next))))
&& mutate_ok
) {
/* These differ in just one bit */
U8 mask = ~ ('A' ^ 'a');

Expand Down Expand Up @@ -5463,7 +5475,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
(mincount == 0
? (f & ~SCF_DO_SUBSTR)
: f)
,depth+1);
, depth+1, mutate_ok);

if (flags & SCF_DO_STCLASS)
data->start_class = oclass;
Expand Down Expand Up @@ -5529,7 +5541,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
if ( OP(oscan) == CURLYX && data
&& data->flags & SF_IN_PAR
&& !(data->flags & SF_HAS_EVAL)
&& !deltanext && minnext == 1 ) {
&& !deltanext && minnext == 1
&& mutate_ok
) {
/* Try to optimize to CURLYN. */
regnode *nxt = NEXTOPER(oscan) + EXTRA_STEP_2ARGS;
regnode * const nxt1 = nxt;
Expand Down Expand Up @@ -5579,10 +5593,10 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
&& !(data->flags & SF_HAS_EVAL)
&& !deltanext /* atom is fixed width */
&& minnext != 0 /* CURLYM can't handle zero width */

/* Nor characters whose fold at run-time may be
* multi-character */
&& ! (RExC_seen & REG_UNFOLDED_MULTI_SEEN)
&& mutate_ok
) {
/* XXXX How to optimize if data == 0? */
/* Optimize to a simpler form. */
Expand Down Expand Up @@ -5635,7 +5649,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
/* recurse study_chunk() on optimised CURLYX => CURLYM */
study_chunk(pRExC_state, &nxt1, minlenp, &deltanext, nxt,
NULL, stopparen, recursed_depth, NULL, 0,
depth+1);
depth+1, mutate_ok);
}
else
oscan->flags = 0;
Expand Down Expand Up @@ -6040,7 +6054,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
/* recurse study_chunk() for lookahead body */
minnext = study_chunk(pRExC_state, &nscan, minlenp, &deltanext,
last, &data_fake, stopparen,
recursed_depth, NULL, f, depth+1);
recursed_depth, NULL, f, depth+1,
mutate_ok);
if (scan->flags) {
if ( deltanext < 0
|| deltanext > (I32) U8_MAX
Expand Down Expand Up @@ -6145,7 +6160,7 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
*minnextp = study_chunk(pRExC_state, &nscan, minnextp,
&deltanext, last, &data_fake,
stopparen, recursed_depth, NULL,
f, depth+1);
f, depth+1, mutate_ok);
if (scan->flags) {
assert(0); /* This code has never been tested since this
is normally not compiled */
Expand Down Expand Up @@ -6313,7 +6328,8 @@ Perl_re_printf( aTHX_ "LHS=%" UVuf " RHS=%" UVuf "\n",
/* optimise study_chunk() for TRIE */
minnext = study_chunk(pRExC_state, &scan, minlenp,
&deltanext, (regnode *)nextbranch, &data_fake,
stopparen, recursed_depth, NULL, f, depth+1);
stopparen, recursed_depth, NULL, f, depth+1,
mutate_ok);
}
if (nextbranch && PL_regkind[OP(nextbranch)]==BRANCH)
nextbranch= regnext((regnode*)nextbranch);
Expand Down Expand Up @@ -8106,7 +8122,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
&data, -1, 0, NULL,
SCF_DO_SUBSTR | SCF_WHILEM_VISITED_POS | stclass_flag
| (restudied ? SCF_TRIE_DOING_RESTUDY : 0),
0);
0, TRUE);


CHECK_RESTUDY_GOTO_butfirst(LEAVE_with_name("study_chunk"));
Expand Down Expand Up @@ -8235,7 +8251,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
SCF_DO_STCLASS_AND|SCF_WHILEM_VISITED_POS|(restudied
? SCF_TRIE_DOING_RESTUDY
: 0),
0);
0, TRUE);

CHECK_RESTUDY_GOTO_butfirst(NOOP);

Expand Down
26 changes: 25 additions & 1 deletion t/re/pat.t
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ BEGIN {
skip_all('no re module') unless defined &DynaLoader::boot_DynaLoader;
skip_all_without_unicode_tables();

plan tests => 869; # Update this when adding/deleting tests.
plan tests => 873; # Update this when adding/deleting tests.

run_tests() unless caller;

Expand Down Expand Up @@ -2141,6 +2141,30 @@ SKIP:
ok($result, "regexp correctly matched");
}

# gh16947: test regexp corruption (GOSUB)
{
fresh_perl_is(q{
'xy' =~ /x(?0)|x(?|y|y)/ && print 'ok'
}, 'ok', {}, 'gh16947: test regexp corruption (GOSUB)');
}
# gh16947: test fix doesn't break SUSPEND
{
fresh_perl_is(q{ 'sx' =~ m{ss++}i; print 'ok' },
'ok', {}, "gh16947: test fix doesn't break SUSPEND");
}

# gh17743: more regexp corruption via GOSUB
{
fresh_perl_is(q{
"0" =~ /((0(?0)|000(?|0000|0000)(?0))|)/; print "ok"
}, 'ok', {}, 'gh17743: test regexp corruption (1)');

fresh_perl_is(q{
"000000000000" =~ /(0(())(0((?0)())|000(?|\x{ef}\x{bf}\x{bd}|\x{ef}\x{bf}\x{bd}))|)/;
print "ok"
}, 'ok', {}, 'gh17743: test regexp corruption (2)');
}

} # End of sub run_tests

1;
Expand Down

0 comments on commit 66bbb51

Please sign in to comment.