From 3656fd84cd1231c42928ef5333f4888b02e6658d Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Tue, 23 Sep 2025 11:12:12 -0500 Subject: [PATCH 1/3] Start of line in negative lookahead optimization fix Don't use the content of negative lookaheads when determining whether a start-of-line assertion can require that match has to start at the beginning of the subject. Fixes swiftlang/swift#81789. rdar://152119639 --- Sources/_StringProcessing/Regex/DSLTree.swift | 8 ++++++++ Tests/RegexTests/MatchTests.swift | 15 ++++++++++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 8d6a5fbc..5971cd93 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -791,6 +791,10 @@ extension DSLTree.Node { // Groups (and other parent nodes) defer to the child. case .nonCapturingGroup(let kind, let child): + // Don't let a negative lookahead affect this - need to continue to next sibling + if kind.isNegativeLookahead { + return nil + } options.beginScope() defer { options.endScope() } if case .changeMatchingOptions(let sequence) = kind.ast { @@ -902,6 +906,10 @@ extension DSLTree { public static var negativeLookahead: Self { .init(ast: .negativeLookahead) } + + internal var isNegativeLookahead: Bool { + self.ast == .negativeLookahead + } } @_spi(RegexBuilder) diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 27302cda..307ee3f9 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -46,7 +46,7 @@ func _firstMatch( ) throws -> (String, [String?])? { var regex = try Regex(regexStr, syntax: syntax).matchingSemantics(semanticLevel) let result = try regex.firstMatch(in: input) - + func validateSubstring(_ substringInput: Substring) throws { // Sometimes the characters we add to a substring merge with existing // string members. This messes up cross-validation, so skip the test. @@ -1629,6 +1629,14 @@ extension RegexTests { // engines generally enforce that lookbehinds are fixed width firstMatchTest( #"\d{3}(? Date: Mon, 29 Sep 2025 12:52:46 -0500 Subject: [PATCH 2/3] Add test cases for only-match-at-start with lookaheads --- Tests/RegexTests/CompileTests.swift | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Tests/RegexTests/CompileTests.swift b/Tests/RegexTests/CompileTests.swift index 7ea38490..d3129130 100644 --- a/Tests/RegexTests/CompileTests.swift +++ b/Tests/RegexTests/CompileTests.swift @@ -589,5 +589,9 @@ extension RegexTests { try expectCanOnlyMatchAtStart("(foo)?^bar", true) // The initial group must match "" try expectCanOnlyMatchAtStart("(?:foo)?^bar", true) try expectCanOnlyMatchAtStart("(foo)+^bar", false) // This can't actually match anywhere + + // Test lookahead assertions with anchor + try expectCanOnlyMatchAtStart("(?=^)foo", true) + try expectCanOnlyMatchAtStart("(?!^)foo", false) } } From 33adee45a6d82fd6f6fb818c1fc12a473cff9c7e Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 29 Sep 2025 13:03:39 -0500 Subject: [PATCH 3/3] Use correct issue number --- Tests/RegexTests/MatchTests.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 307ee3f9..e20beeaf 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -2884,7 +2884,8 @@ extension RegexTests { ) } - func testIssue81789() throws { + func testIssue815() throws { + // Original report from https://github.com/swiftlang/swift-experimental-string-processing/issues/815 let matches = "dispatchWithName".matches(of: #/(?!^)(With(?!No)|For|In|At|To)(?=[A-Z])/#) XCTAssert(matches[0].output == ("With", "With")) }