From 706ea92e7972b4d75537f5e8e7e9887ffd32a8df Mon Sep 17 00:00:00 2001 From: Alexander Bulancov <6594487+trinistr@users.noreply.github.com> Date: Tue, 3 Feb 2026 22:43:58 +0300 Subject: [PATCH 1/2] Add more tests for Regexp.linear_time? > The cache-based optimization now supports lookarounds and atomic groupings. That is, match for Regexp containing these extensions can now also be performed in linear time to the length of the input string. However, these cannot contain captures and cannot be nested. [Feature #19725] --- core/regexp/linear_time_spec.rb | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/core/regexp/linear_time_spec.rb b/core/regexp/linear_time_spec.rb index 2f3f81ed2..63b2f15f7 100644 --- a/core/regexp/linear_time_spec.rb +++ b/core/regexp/linear_time_spec.rb @@ -25,7 +25,27 @@ }.should complain(/warning: flags ignored/) end - it "returns true for positive lookarounds" do - Regexp.linear_time?(/(?:(?=a*)a)*/).should == true + it "returns true for positive lookahead" do + Regexp.linear_time?(/a*(?:(?=a*)a)*b/).should == true + end + + it "returns true for positive lookbehind" do + Regexp.linear_time?(/a*(?:(?<=a)a*)*b/).should == true + end + + it "returns true for negative lookahead" do + Regexp.linear_time?(/a*(?:(?!a*)a*)*b/).should == true + end + + it "returns true for negative lookbehind" do + Regexp.linear_time?(/a*(?:(?a)a*)*b/).should == true + end + + it "returns true for possessive quantifiers" do + Regexp.linear_time?(/a*(?:(?:a)?+a*)*b/).should == true end end From 5cf429dc49abf80fafff480600635c30e685584c Mon Sep 17 00:00:00 2001 From: Benoit Daloze Date: Wed, 4 Feb 2026 10:52:40 +0100 Subject: [PATCH 2/2] Expand Regexp.linear_time? specs and clearly categorize specs which only pass on some Regexp engines --- core/regexp/linear_time_spec.rb | 45 +++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 8 deletions(-) diff --git a/core/regexp/linear_time_spec.rb b/core/regexp/linear_time_spec.rb index 63b2f15f7..f70021dfe 100644 --- a/core/regexp/linear_time_spec.rb +++ b/core/regexp/linear_time_spec.rb @@ -33,19 +33,48 @@ Regexp.linear_time?(/a*(?:(?<=a)a*)*b/).should == true end - it "returns true for negative lookahead" do - Regexp.linear_time?(/a*(?:(?!a*)a*)*b/).should == true - end - it "returns true for negative lookbehind" do Regexp.linear_time?(/a*(?:(?a)a*)*b/).should == true + # There are two known ways to make Regexp linear: + # * Using a DFA (deterministic finite-state automaton) Regexp engine, which always matches in linear time (e.g. TruffleRuby with TRegex) + # * Caching position and state to avoid catastrophic backtracking (e.g. CRuby: https://bugs.ruby-lang.org/issues/19104) + # + # Both approach should be allowed and given that DFA Regexp engines + # are much faster there should be no specs preventing using them. + uses_regexp_caching = RUBY_ENGINE == 'ruby' + uses_dfa_regexp_engine = !uses_regexp_caching + + # The following specs should not be relied upon, + # they are here only to illustrate differences between Regexp engines. + guard -> { uses_regexp_caching } do + it "returns true for negative lookahead" do + Regexp.linear_time?(/a*(?:(?!a*)a*)*b/).should == true + end + + it "returns true for atomic groups" do + Regexp.linear_time?(/a*(?:(?>a)a*)*b/).should == true + end + + it "returns true for possessive quantifiers" do + Regexp.linear_time?(/a*(?:(?:a)?+a*)*b/).should == true + end + + it "returns true for positive lookbehind with capture group" do + Regexp.linear_time?(/.(?<=(a))/).should == true + end end - it "returns true for possessive quantifiers" do - Regexp.linear_time?(/a*(?:(?:a)?+a*)*b/).should == true + # The following specs should not be relied upon, + # they are here only to illustrate differences between Regexp engines. + guard -> { uses_dfa_regexp_engine } do + it "returns true for non-recursive subexpression call" do + Regexp.linear_time?(/(?a){0}\g/).should == true + end + + it "returns true for positive lookahead with capture group" do + Regexp.linear_time?(/x+(?=(a))/).should == true + end end end