From c0c2df29dfd1327cf928c3c54123570e2b49a505 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20M=C3=BCller?= Date: Sat, 11 Apr 2026 22:15:14 +0200 Subject: [PATCH 1/2] ``: Avoid generating empty groups when parsing empty alternatives --- stl/inc/regex | 58 ++++++++----------- .../std/tests/VSO_0000000_regex_use/test.cpp | 22 +++++++ 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/stl/inc/regex b/stl/inc/regex index 770f84b718..84e136778d 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -1618,7 +1618,7 @@ public: _Node_base* _Begin_capture_group(unsigned int _Idx); void _Add_backreference(unsigned int _Idx); _Node_base* _Begin_if(_Node_base* _Start); - void _Else_if(_Node_base*, _Node_base*); + void _Else_if2(_Node_base*, _Node_base*); void _Add_rep(int _Min, int _Max, bool _Greedy); void _Negate(); _Root_node* _End_pattern(); @@ -2238,7 +2238,7 @@ private: void _Do_assert_group(bool); bool _Wrapped_disjunction(); void _Quantifier(); - bool _Alternative(); + void _Alternative2(); void _Disjunction(); void _Calculate_loop_simplicity(_Node_base* _Nx, _Node_base* _Ne, _Node_rep* _Outer_rep, bool _Nonreentrant); @@ -3661,21 +3661,25 @@ _Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Begin_if(_Node_base* _Start) { } template -void _Builder2<_FwdIt, _Elem, _RxTraits>::_Else_if(_Node_base* _Start, _Node_base* _End) { // add else node - _Node_if* _Parent = static_cast<_Node_if*>(_Start->_Next); - _Node_base* _First = _End->_Next; - _End->_Next = nullptr; - _Node_base* _Last = _Current; - _Current = _End; - _End->_Next = nullptr; - _Last->_Next = _End; +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Else_if2(_Node_base* const _Start, _Node_base* const _End) { // add else node + auto _Parent = static_cast<_Node_if*>(_Start->_Next); while (_Parent->_Child) { _Parent = _Parent->_Child; } - _Parent->_Child = new _Node_if(_End); - _Parent->_Child->_Next = _First; - _First->_Prev = _Parent->_Child; + const auto _Elseif_node = new _Node_if(_End); + _Parent->_Child = _Elseif_node; + const auto _First = _End->_Next; + if (_First) { + _End->_Next = nullptr; + _Elseif_node->_Next = _First; + _First->_Prev = _Elseif_node; + _Current->_Next = _End; + } else { // empty alternative + _Elseif_node->_Next = _End; + } + + _Current = _End; } template @@ -5812,13 +5816,12 @@ void _Parser2<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier } template -bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alternative +void _Parser2<_FwdIt, _Elem, _RxTraits>::_Alternative2() { // check for valid alternative bool _Found = false; - for (;;) { // concatenate valid elements + while (_Mchar != _Meta_eos && _Mchar != _Meta_bar + && (_Mchar != _Meta_rpar || _Disj_count == 0)) { // concatenate valid elements bool _Quant = true; - if (_Mchar == _Meta_eos || _Mchar == _Meta_bar || (_Mchar == _Meta_rpar && _Disj_count != 0)) { - return _Found; - } else if (_Mchar == _Meta_rpar && !(_L_flags & _L_paren_bal)) { + if (_Mchar == _Meta_rpar && !(_L_flags & _L_paren_bal)) { _Error(regex_constants::error_paren); } else if (_Mchar == _Meta_dot) { // add dot node _Nfa._Add_dot(); @@ -5883,26 +5886,15 @@ bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alt template void _Parser2<_FwdIt, _Elem, _RxTraits>::_Disjunction() { // check for valid disjunction _Node_base* _Pos1 = _Nfa._Getmark(); - if (!_Alternative()) { - if (_Mchar != _Meta_bar) { - return; // zero-length alternative not followed by '|' - } - // zero-length leading alternative - _Node_base* _Pos3 = _Nfa._Begin_group(); - _Nfa._End_group(_Pos3); - } + _Alternative2(); - if (_Mchar == _Meta_bar) { + if (_Mchar == _Meta_bar) { // at least one more alternative _Node_base* _Pos2 = _Nfa._Begin_if(_Pos1); do { // append terms as long as we keep finding | characters _Next(); - if (!_Alternative()) { // zero-length trailing alternative - _Node_base* _Pos3 = _Nfa._Begin_group(); - _Nfa._End_group(_Pos3); - } - - _Nfa._Else_if(_Pos1, _Pos2); + _Alternative2(); + _Nfa._Else_if2(_Pos1, _Pos2); } while (_Mchar == _Meta_bar); } } diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp index 1f82199935..3e0aba575e 100644 --- a/tests/std/tests/VSO_0000000_regex_use/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp @@ -2579,6 +2579,27 @@ void test_gh_6191() { } } +void test_gh_6249() { + // GH-6248: Avoid generating empty groups when parsing empty alternatives + for (const string re : {"|a|b", "a||b", "a|b|"}) { + g_regexTester.should_match("a", re); + g_regexTester.should_match("b", re); + g_regexTester.should_match("", re); + g_regexTester.should_not_match("c", re); + g_regexTester.should_not_match("ab", re); + } + + for (const string re : {"(?:|a|b)c", "(?:a||b)c", "(?:a|b|)c"}) { + g_regexTester.should_match("ac", re); + g_regexTester.should_match("bc", re); + g_regexTester.should_match("c", re); + g_regexTester.should_not_match("", re); + g_regexTester.should_not_match("a", re); + g_regexTester.should_not_match("b", re); + g_regexTester.should_not_match("abc", re); + } +} + int main() { test_dev10_449367_case_insensitivity_should_work(); test_dev11_462743_regex_collate_should_not_disable_regex_icase(); @@ -2646,6 +2667,7 @@ int main() { test_gh_6181(); test_gh_6189(); test_gh_6191(); + test_gh_6249(); return g_regexTester.result(); } From 65f52e17399c797a88ad5cdb1a264c3b539719c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julian=20M=C3=BCller?= Date: Sat, 11 Apr 2026 22:59:15 +0200 Subject: [PATCH 2/2] fix comment --- tests/std/tests/VSO_0000000_regex_use/test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp index 3e0aba575e..1ece896659 100644 --- a/tests/std/tests/VSO_0000000_regex_use/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp @@ -2580,7 +2580,7 @@ void test_gh_6191() { } void test_gh_6249() { - // GH-6248: Avoid generating empty groups when parsing empty alternatives + // GH-6249: Avoid generating empty groups when parsing empty alternatives for (const string re : {"|a|b", "a||b", "a|b|"}) { g_regexTester.should_match("a", re); g_regexTester.should_match("b", re);