From f838794b49bdfd24fded977fd9d67a566485561c Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Mon, 20 Oct 2025 14:44:45 -0500 Subject: [PATCH 1/5] Implement direct AST -> DSLTree conversion --- Sources/_StringProcessing/Compiler.swift | 37 +++-- .../_StringProcessing/LiteralPrinter.swift | 156 +++++++++++++++++- .../Regex/ASTConversion.swift | 114 +++++++++++++ .../Regex/AnyRegexOutput.swift | 4 +- Sources/_StringProcessing/Regex/Core.swift | 81 ++++++++- Sources/_StringProcessing/Regex/DSLList.swift | 36 ++++ Sources/_StringProcessing/Regex/DSLTree.swift | 102 ++++++++++++ Sources/_StringProcessing/Regex/Options.swift | 6 +- .../Utility/RegexFactory.swift | 59 ++++--- .../Utility/TypeVerification.swift | 2 +- Tests/RegexTests/DSLListTests.swift | 8 +- Tests/RegexTests/MatchTests.swift | 21 +-- Tests/RegexTests/OptimizationTests.swift | 8 +- 13 files changed, 550 insertions(+), 84 deletions(-) diff --git a/Sources/_StringProcessing/Compiler.swift b/Sources/_StringProcessing/Compiler.swift index 25e6e4cf6..b34e0e5f7 100644 --- a/Sources/_StringProcessing/Compiler.swift +++ b/Sources/_StringProcessing/Compiler.swift @@ -12,21 +12,30 @@ internal import _RegexParser class Compiler { - let tree: DSLTree + let tree: DSLList // TODO: Or are these stored on the tree? var options = MatchingOptions() private var compileOptions: _CompileOptions = .default init(ast: AST) { - self.tree = ast.dslTree + self.tree = DSLList(tree: ast.dslTree) } init(tree: DSLTree) { - self.tree = tree + self.tree = DSLList(tree: tree) + } + + init(list: DSLList) { + self.tree = list } init(tree: DSLTree, compileOptions: _CompileOptions) { + self.tree = DSLList(tree: tree) + self.compileOptions = compileOptions + } + + init(tree: DSLList, compileOptions: _CompileOptions) { self.tree = tree self.compileOptions = compileOptions } @@ -42,18 +51,20 @@ class Compiler { compileOptions: compileOptions, captureList: tree.captureList) - return try codegen.emitRoot(tree.root) + fatalError() +// return try codegen.emitRoot(tree.root) } __consuming func emitViaList() throws -> MEProgram { // TODO: Handle global options - var dslList = DSLList(tree: tree) +// var dslList = DSLList(tree: tree) var codegen = ByteCodeGen( options: options, compileOptions: compileOptions, captureList: tree.captureList) - return try codegen.emitRoot(&dslList) + var tree = tree + return try codegen.emitRoot(&tree) } } @@ -105,20 +116,22 @@ func _compileRegex( _ syntax: SyntaxOptions = .traditional, _ semanticLevel: RegexSemanticLevel? = nil ) throws -> MEProgram { - let ast = try parse(regex, syntax) - let dsl: DSLTree + var ast = try parse(regex, syntax) + let dsl: DSLList switch semanticLevel?.base { case .graphemeCluster: let sequence = AST.MatchingOptionSequence(adding: [.init(.graphemeClusterSemantics, location: .fake)]) - dsl = DSLTree(.nonCapturingGroup(.init(ast: .changeMatchingOptions(sequence)), ast.dslTree.root)) + ast.root = AST.Node.group(AST.Group(.init(faking: .changeMatchingOptions(sequence)), ast.root, .fake)) + dsl = DSLList(ast: ast) case .unicodeScalar: let sequence = AST.MatchingOptionSequence(adding: [.init(.unicodeScalarSemantics, location: .fake)]) - dsl = DSLTree(.nonCapturingGroup(.init(ast: .changeMatchingOptions(sequence)), ast.dslTree.root)) + ast.root = AST.Node.group(AST.Group(.init(faking: .changeMatchingOptions(sequence)), ast.root, .fake)) + dsl = DSLList(ast: ast) case .none: - dsl = ast.dslTree + dsl = DSLList(ast: ast) } - let program = try Compiler(tree: dsl).emit() + let program = try Compiler(list: dsl).emit() return program } diff --git a/Sources/_StringProcessing/LiteralPrinter.swift b/Sources/_StringProcessing/LiteralPrinter.swift index e1dc3fa23..89a28910a 100644 --- a/Sources/_StringProcessing/LiteralPrinter.swift +++ b/Sources/_StringProcessing/LiteralPrinter.swift @@ -36,7 +36,8 @@ extension Regex { @available(SwiftStdlib 6.0, *) public var _literalPattern: String? { var gen = LiteralPrinter(options: MatchingOptions()) - gen.outputNode(self.program.tree.root) + var list = self.program.tree.nodes[...] + try? gen.outputList(&list) return gen.canonicalLiteralString } } @@ -83,6 +84,159 @@ fileprivate struct LiteralPrinter { mutating func saveInconvertible(_ node: DSLTree.Node) { segments.append(.inconvertible(node)) } + + mutating func inconvertible(_ node: DSLTree.Node) throws { + segments.append(.inconvertible(node)) + throw Incovertible.error + } +} + +extension LiteralPrinter { + enum Incovertible: Error { + case error + } + + mutating func outputList(_ list: inout ArraySlice) throws { + guard let node = list.popFirst() else { + return + } + + switch node { + case let .orderedChoice(children): + try outputAlternation(&list, count: children.count) + case let .concatenation(children): + try outputConcatenation(&list, count: children.count) + + case let .capture(name, nil, _, nil): + options.beginScope() + defer { options.endScope() } + try outputCapture(&list, name: name) + case .capture: + // Captures that use a reference or a transform are unsupported + try inconvertible(node) + return + + case let .nonCapturingGroup(kind, _): + guard let kindPattern = kind._patternString else { + try inconvertible(node) + return + } + options.beginScope() + defer { options.endScope() } + + output(kindPattern) + if case .changeMatchingOptions(let optionSequence) = kind.ast { + options.apply(optionSequence) + } + try outputList(&list) + output(")") + + case .ignoreCapturesInTypedOutput(_), + .limitCaptureNesting(_): + try outputList(&list) + case let .quantification(amount, kind, _): + try outputQuantification(&list, amount: amount, kind: kind) + case let .customCharacterClass(charClass): + outputCustomCharacterClass(charClass) + case let .atom(atom): + outputAtom(atom) + case let .quotedLiteral(literal): + output(prepareQuotedLiteral(literal)) + + case .trivia(_): + // TODO: Include trivia? + return + case .empty: + return + + case .conditional, .absentFunction, .consumer, .matcher, .characterPredicate: + saveInconvertible(node) + } + } + + mutating func outputAlternation(_ list: inout ArraySlice, count: Int) throws { + for i in 0.., count: Int) throws { + for _ in 0.., name: String?) throws { + if let name { + output("(?<\(name)>") + } else { + output("(") + } + try outputList(&list) + output(")") + } + + func requiresGrouping(_ list: ArraySlice) -> Bool { + guard let node = list.first else { return false } // malformed? + switch node { + case .concatenation(let children): + switch children.count { + case 0: + return false + case 1: + return requiresGrouping(list.dropFirst()) + default: + return true + } + + case .quotedLiteral(let literal): + return prepareQuotedLiteral(literal).count > 1 + + default: + return false + } + } + + mutating func outputQuantification( + _ list: inout ArraySlice, + amount: DSLTree._AST.QuantificationAmount, + kind: DSLTree.QuantificationKind + ) throws { + // RegexBuilder regexes can have children that need + if requiresGrouping(list) { + output("(?:") + try outputList(&list) + output(")") + } else { + try outputList(&list) + } + + switch amount.ast { + case .zeroOrMore: + output("*") + case .oneOrMore: + output("+") + case .zeroOrOne: + output("?") + case let .exactly(n): + output("{\(n.value!)}") + case let .nOrMore(n): + output("{\(n.value!),}") + case let .upToN(n): + output("{,\(n.value!)}") + case let .range(low, high): + output("{\(low.value!),\(high.value!)}") + #if RESILIENT_LIBRARIES + @unknown default: + fatalError() + #endif + } + + outputQuantificationKind(kind) + } } extension LiteralPrinter { diff --git a/Sources/_StringProcessing/Regex/ASTConversion.swift b/Sources/_StringProcessing/Regex/ASTConversion.swift index fbb189559..e1370a5a9 100644 --- a/Sources/_StringProcessing/Regex/ASTConversion.swift +++ b/Sources/_StringProcessing/Regex/ASTConversion.swift @@ -17,6 +17,120 @@ extension AST { } } +extension AST.Node { + func convert(into list: inout [DSLTree.Node]) throws { + switch self { + case .alternation(let alternation): + list.append(.orderedChoice(Array(repeating: TEMP_FAKE_NODE, count: alternation.children.count))) + for child in alternation.children { + try child.convert(into: &list) + } + case .concatenation(let concatenation): + let coalesced = self.coalescedChildren + list.append(.concatenation(Array(repeating: TEMP_FAKE_NODE, count: coalesced.count))) + for child in coalesced { + try child.convert(into: &list) + } + case .group(let group): + let child = group.child + switch group.kind.value { + case .capture: + list.append(.capture(TEMP_FAKE_NODE)) + try child.convert(into: &list) + case .namedCapture(let name): + list.append(.capture(name: name.value, TEMP_FAKE_NODE)) + try child.convert(into: &list) + case .balancedCapture: + throw Unsupported("TODO: balanced captures") + default: + list.append(.nonCapturingGroup(.init(ast: group.kind.value), TEMP_FAKE_NODE)) + try child.convert(into: &list) + } + case .conditional(let conditional): + list.append(.conditional(.init(ast: conditional.condition.kind), TEMP_FAKE_NODE, TEMP_FAKE_NODE)) + try conditional.trueBranch.convert(into: &list) + try conditional.falseBranch.convert(into: &list) + case .quantification(let quant): + list.append( + .quantification(.init(ast: quant.amount.value), .syntax(.init(ast: quant.kind.value)), TEMP_FAKE_NODE)) + try quant.child.convert(into: &list) + case .quote(let node): + list.append(.quotedLiteral(node.literal)) + case .trivia(let node): + list.append(.trivia(node.contents)) + case .interpolation(_): + throw Unsupported("TODO: interpolation") + case .atom(let atom): + switch atom.kind { + case .scalarSequence(let seq): + // The DSL doesn't have an equivalent node for scalar sequences. Splat + // them into a concatenation of scalars. + // list.append(.concatenation(Array(repeating: TEMP_FAKE_NODE, count: seq.scalarValues.count))) + list.append(.quotedLiteral(String(seq.scalarValues))) + default: + list.append(.atom(atom.dslTreeAtom)) + } + case .customCharacterClass(let ccc): + list.append(.customCharacterClass(ccc.dslTreeClass)) + case .absentFunction(let abs): + // TODO: What should this map to? + list.append(.absentFunction(.init(ast: abs))) + case .empty(_): + list.append(.empty) + } + } + + var coalescedChildren: [AST.Node] { + // Before converting a concatenation in a tree to list form, we need to + // flatten out any nested concatenations, and coalesce any adjacent + // characters and scalars, forming quoted literals of their contents, + // over which we can perform grapheme breaking. + + func flatten(_ node: AST.Node) -> [AST.Node] { + switch node { + case .concatenation(let concat): + return concat.children.flatMap(flatten) + default: + return [node] + } + } + + switch self { + case .alternation(let v): return v.children + case .concatenation(let v): + let children = v.children + .flatMap(flatten) + .coalescing(with: "", into: { AST.Node.quote(.init($0, .fake)) }) { str, node in + switch node { + case .atom(let a): + guard let c = a.literalCharacterValue else { return false } + str.append(c) + return true + case .quote(let q): + str += q.literal + return true + case .trivia: + // Trivia can be completely ignored if we've already coalesced + // something. + return !str.isEmpty + default: + return false + } + } + return children + + case .group(let group): + return [group.child] + case .conditional(let conditional): + return [conditional.trueBranch, conditional.falseBranch] + case .quantification(let quant): + return [quant.child] + case .quote, .trivia, .interpolation, .atom, .customCharacterClass, .absentFunction, .empty: + return [] + } + } +} + extension AST.Node { /// Converts an AST node to a `convertedRegexLiteral` node. var dslTreeNode: DSLTree.Node { diff --git a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift index ae8193804..8b754c1ca 100644 --- a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift +++ b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift @@ -284,7 +284,7 @@ extension Regex where Output == AnyRegexOutput { /// - Parameter regex: A regular expression to convert to use a dynamic /// capture list. public init(_ regex: Regex) { - self.init(node: regex.root) + self.init(list: regex.list) } } @@ -331,7 +331,7 @@ extension Regex { _ regex: Regex, as outputType: Output.Type = Output.self ) { - self.init(node: regex.root) + self.init(list: regex.list) guard _verifyType().0 else { return nil diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift index 11445531c..3921274e1 100644 --- a/Sources/_StringProcessing/Regex/Core.swift +++ b/Sources/_StringProcessing/Regex/Core.swift @@ -11,6 +11,8 @@ internal import _RegexParser +let TEMP_FAKE_NODE = DSLTree.Node.empty + /// A type that represents a regular expression. /// /// You can use types that conform to `RegexComponent` as parameters to string @@ -93,6 +95,9 @@ public struct Regex: RegexComponent { var hasCapture: Bool { program.tree.hasCapture } + var hasChildren: Bool { + program.tree.hasChildren + } init(ast: AST) { self.program = Program(ast: ast) @@ -148,7 +153,7 @@ extension Regex { /// FIXME: If Regex is the unit of composition, then it should be a Node instead, /// and we should have a separate type that handled both global options and, /// likely, compilation/caching. - let tree: DSLTree + var tree: DSLList /// OptionSet of compiler options for testing purposes fileprivate var compileOptions: _CompileOptions = .default @@ -191,11 +196,15 @@ extension Regex { } init(ast: AST) { - self.tree = ast.dslTree + self.tree = DSLList(ast: ast) } init(tree: DSLTree) { - self.tree = tree + self.tree = DSLList(tree: tree) + } + + init(list: DSLList) { + self.tree = list } } @@ -214,12 +223,70 @@ extension Regex { @available(SwiftStdlib 5.7, *) extension Regex { - var root: DSLTree.Node { - program.tree.root - } +// var root: DSLTree.Node { +// program.tree.root +// } + var list: DSLList { + program.tree + } + init(node: DSLTree.Node) { - self.program = Program(tree: .init(node)) + self.program = Program(list: .init(node)) + } + + init(list: DSLList) { + self.program = Program(list: list) + } + + func appending(_ node: DSLTree.Node) -> Regex { + var list = program.tree + list.append(node) + return Regex(list: list) + } + + func appending(contentsOf node: some Collection) -> Regex { + var list = program.tree + list.append(contentsOf: node) + return Regex(list: list) + } + + func concatenating(_ other: some Collection) -> Regex { + var nodes = program.tree.nodes + switch nodes[0] { + case .concatenation(let children): + nodes[0] = .concatenation(Array(repeating: TEMP_FAKE_NODE, count: children.count + 1)) + nodes.append(contentsOf: other) + default: + nodes.insert(.concatenation(Array(repeating: TEMP_FAKE_NODE, count: 2)), at: 0) + nodes.append(contentsOf: other) + } + return Regex(list: DSLList(nodes)) + } + + func alternating(with other: some Collection) -> Regex { + var nodes = program.tree.nodes + switch nodes[0] { + case .orderedChoice(let children): + nodes[0] = .orderedChoice(Array(repeating: TEMP_FAKE_NODE, count: children.count + 1)) + nodes.append(contentsOf: other) + default: + nodes.insert(.orderedChoice(Array(repeating: TEMP_FAKE_NODE, count: 2)), at: 0) + nodes.append(contentsOf: other) + } + return Regex(list: DSLList(nodes)) + } + + func prepending(_ node: DSLTree.Node) -> Regex { + var list = program.tree + list.prepend(node) + return Regex(list: list) + } + + func prepending(contentsOf node: some Collection) -> Regex { + var list = program.tree + list.prepend(contentsOf: node) + return Regex(list: list) } } diff --git a/Sources/_StringProcessing/Regex/DSLList.swift b/Sources/_StringProcessing/Regex/DSLList.swift index f8d09a953..034bc9ee9 100644 --- a/Sources/_StringProcessing/Regex/DSLList.swift +++ b/Sources/_StringProcessing/Regex/DSLList.swift @@ -9,9 +9,18 @@ // //===----------------------------------------------------------------------===// +internal import _RegexParser + struct DSLList { var nodes: [DSLTree.Node] + // experimental + var hasCapture: Bool = false + var hasChildren: Bool = false + var captureList: CaptureList { + .Builder.build(self) + } + init(_ initial: DSLTree.Node) { self.nodes = [initial] } @@ -23,6 +32,33 @@ struct DSLList { init(tree: DSLTree) { self.nodes = Array(tree.depthFirst) } + + init(ast: AST) { + self.nodes = [] + try! ast.root.convert(into: &nodes) + } + + var first: DSLTree.Node { + nodes.first ?? .empty + } +} + +extension DSLList { + mutating func append(_ node: DSLTree.Node) { + nodes.append(node) + } + + mutating func append(contentsOf other: some Sequence) { + nodes.append(contentsOf: other) + } + + mutating func prepend(_ node: DSLTree.Node) { + nodes.insert(node, at: 0) + } + + mutating func prepend(contentsOf other: some Collection) { + nodes.insert(contentsOf: other, at: 0) + } } extension DSLTree.Node { diff --git a/Sources/_StringProcessing/Regex/DSLTree.swift b/Sources/_StringProcessing/Regex/DSLTree.swift index 55d8902fa..6209039ec 100644 --- a/Sources/_StringProcessing/Regex/DSLTree.swift +++ b/Sources/_StringProcessing/Regex/DSLTree.swift @@ -769,6 +769,91 @@ extension CaptureList.Builder { builder.addCaptures(of: dsl.root, optionalNesting: .init(canNest: true), visibleInTypedOutput: true) return builder.captures } + + mutating func addCaptures( + in list: inout ArraySlice, optionalNesting nesting: OptionalNesting, visibleInTypedOutput: Bool + ) { + guard let node = list.popFirst() else { return } + switch node { + case let .orderedChoice(children): + for _ in 0.. CaptureList { + var builder = Self() + builder.captures.append( + .init(type: dsl.first.wholeMatchType, optionalDepth: 0, visibleInTypedOutput: true, .fake)) + var nodes = dsl.nodes[...] + builder.addCaptures(in: &nodes, optionalNesting: .init(canNest: true), visibleInTypedOutput: true) + return builder.captures + } } extension DSLTree.Node { @@ -806,6 +891,23 @@ extension DSLTree.Node { } } +extension DSLList { + + /// Returns the output-defining node, peering through any output-forwarding + /// nodes. + var outputDefiningNode: DSLTree.Node? { + nodes.first(where: { !$0.isOutputForwarding }) + } + + /// Returns the type of the whole match, i.e. `.0` element type of the output. + var wholeMatchType: Any.Type { + if case .matcher(let type, _) = outputDefiningNode { + return type + } + return Substring.self + } +} + extension DSLTree.Node { /// Implementation for `canOnlyMatchAtStart`, which maintains the option /// state. diff --git a/Sources/_StringProcessing/Regex/Options.swift b/Sources/_StringProcessing/Regex/Options.swift index 6911af911..0aca30068 100644 --- a/Sources/_StringProcessing/Regex/Options.swift +++ b/Sources/_StringProcessing/Regex/Options.swift @@ -294,7 +294,9 @@ extension RegexComponent { let sequence = shouldAdd ? AST.MatchingOptionSequence(adding: [.init(option, location: .fake)]) : AST.MatchingOptionSequence(removing: [.init(option, location: .fake)]) - return Regex(node: .nonCapturingGroup( - .init(ast: .changeMatchingOptions(sequence)), regex.root)) + + var list = regex.program.tree + list.nodes.insert(.nonCapturingGroup(.init(ast: .changeMatchingOptions(sequence)), TEMP_FAKE_NODE), at: 0) + return Regex(list: list) } } diff --git a/Sources/_StringProcessing/Utility/RegexFactory.swift b/Sources/_StringProcessing/Utility/RegexFactory.swift index 0c224e159..9364207ed 100644 --- a/Sources/_StringProcessing/Utility/RegexFactory.swift +++ b/Sources/_StringProcessing/Utility/RegexFactory.swift @@ -26,9 +26,9 @@ public struct _RegexFactory { _ child: some RegexComponent ) -> Regex { // Don't wrap `child` again if it's a leaf node. - child.regex.root.hasChildNodes - ? .init(node: .ignoreCapturesInTypedOutput(child.regex.root)) - : .init(node: child.regex.root) + child.regex.list.hasChildren + ? child.regex.prepending(.ignoreCapturesInTypedOutput(TEMP_FAKE_NODE)) as Regex + : .init(list: child.regex.program.tree) } @available(SwiftStdlib 5.7, *) @@ -36,7 +36,7 @@ public struct _RegexFactory { _ left: some RegexComponent, _ right: some RegexComponent ) -> Regex { - .init(node: left.regex.root.appending(right.regex.root)) + left.regex.concatenating(right.regex.program.tree.nodes) } @available(SwiftStdlib 5.7, *) @@ -44,7 +44,7 @@ public struct _RegexFactory { _ left: some RegexComponent, _ right: some RegexComponent ) -> Regex { - .init(node: left.regex.root.appendingAlternationCase(right.regex.root)) + left.regex.alternating(with: right.regex.program.tree.nodes) } @_spi(RegexBuilder) @@ -107,7 +107,7 @@ public struct _RegexFactory { _ behavior: RegexRepetitionBehavior? = nil ) -> Regex { let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.dslTreeKind) } ?? .default - return .init(node: .quantification(.zeroOrOne, kind, component.regex.root)) + return component.regex.prepending(.quantification(.zeroOrOne, kind, TEMP_FAKE_NODE)) } @available(SwiftStdlib 5.7, *) @@ -116,7 +116,7 @@ public struct _RegexFactory { _ behavior: RegexRepetitionBehavior? = nil ) -> Regex { let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.dslTreeKind) } ?? .default - return .init(node: .quantification(.zeroOrMore, kind, component.regex.root)) + return component.regex.prepending(.quantification(.zeroOrMore, kind, TEMP_FAKE_NODE)) } @available(SwiftStdlib 5.7, *) @@ -125,7 +125,7 @@ public struct _RegexFactory { _ behavior: RegexRepetitionBehavior? = nil ) -> Regex { let kind: DSLTree.QuantificationKind = behavior.map { .explicit($0.dslTreeKind) } ?? .default - return .init(node: .quantification(.oneOrMore, kind, component.regex.root)) + return component.regex.prepending(.quantification(.oneOrMore, kind, TEMP_FAKE_NODE)) } @available(SwiftStdlib 5.7, *) @@ -133,7 +133,7 @@ public struct _RegexFactory { _ count: Int, _ component: some RegexComponent ) -> Regex { - .init(node: .quantification(.exactly(count), .default, component.regex.root)) + component.regex.prepending(.quantification(.exactly(count), .default, TEMP_FAKE_NODE)) } @available(SwiftStdlib 5.7, *) @@ -142,14 +142,14 @@ public struct _RegexFactory { _ behavior: RegexRepetitionBehavior?, _ component: some RegexComponent ) -> Regex { - .init(node: .repeating(range, behavior, component.regex.root)) + component.regex.prepending(.repeating(range, behavior, TEMP_FAKE_NODE)) } @available(SwiftStdlib 5.7, *) public func atomicNonCapturing( _ component: some RegexComponent ) -> Regex { - .init(node: .nonCapturingGroup(.atomicNonCapturing, component.regex.root)) + component.regex.prepending(.nonCapturingGroup(.atomicNonCapturing, TEMP_FAKE_NODE)) } @_spi(RegexBuilder) @@ -157,7 +157,7 @@ public struct _RegexFactory { public func lookaheadNonCapturing( _ component: some RegexComponent ) -> Regex { - .init(node: .nonCapturingGroup(.lookahead, component.regex.root)) + component.regex.prepending(.nonCapturingGroup(.lookahead, TEMP_FAKE_NODE)) } @_spi(RegexBuilder) @@ -165,21 +165,21 @@ public struct _RegexFactory { public func negativeLookaheadNonCapturing( _ component: some RegexComponent ) -> Regex { - .init(node: .nonCapturingGroup(.negativeLookahead, component.regex.root)) + component.regex.prepending(.nonCapturingGroup(.negativeLookahead, TEMP_FAKE_NODE)) } @available(SwiftStdlib 5.7, *) public func orderedChoice( _ component: some RegexComponent ) -> Regex { - .init(node: .orderedChoice([component.regex.root])) + component.regex.prepending(.orderedChoice([TEMP_FAKE_NODE])) } @available(SwiftStdlib 5.7, *) public func capture( - _ r: some RegexComponent + _ component: some RegexComponent ) -> Regex { - .init(node: .capture(r.regex.root)) + component.regex.prepending(.capture(TEMP_FAKE_NODE)) } @available(SwiftStdlib 5.7, *) @@ -187,10 +187,7 @@ public struct _RegexFactory { _ component: some RegexComponent, _ reference: Int ) -> Regex { - .init(node: .capture( - reference: ReferenceID(reference), - component.regex.root - )) + component.regex.prepending(.capture(reference: ReferenceID(reference), TEMP_FAKE_NODE)) } @available(SwiftStdlib 5.7, *) @@ -199,11 +196,12 @@ public struct _RegexFactory { _ reference: Int? = nil, _ transform: @escaping (W) throws -> NewCapture ) -> Regex { - .init(node: .capture( - reference: reference.map { ReferenceID($0) }, - component.regex.root, - CaptureTransform(transform) - )) + component.regex.prepending( + .capture( + reference: reference.map { ReferenceID($0) }, + TEMP_FAKE_NODE, + CaptureTransform(transform) + )) } @available(SwiftStdlib 5.7, *) @@ -212,10 +210,11 @@ public struct _RegexFactory { _ reference: Int? = nil, _ transform: @escaping (W) throws -> NewCapture? ) -> Regex { - .init(node: .capture( - reference: reference.map { ReferenceID($0) }, - component.regex.root, - CaptureTransform(transform) - )) + component.regex.prepending( + .capture( + reference: reference.map { ReferenceID($0) }, + TEMP_FAKE_NODE, + CaptureTransform(transform) + )) } } diff --git a/Sources/_StringProcessing/Utility/TypeVerification.swift b/Sources/_StringProcessing/Utility/TypeVerification.swift index 11796d1e3..9f1539973 100644 --- a/Sources/_StringProcessing/Utility/TypeVerification.swift +++ b/Sources/_StringProcessing/Utility/TypeVerification.swift @@ -41,7 +41,7 @@ extension Regex { // If we have no captures, then our Regex must be Regex. if tupleElements.count == 1 { - let wholeMatchType = program.tree.root.wholeMatchType + let wholeMatchType = program.tree.wholeMatchType return (Output.self == wholeMatchType, wholeMatchType) } diff --git a/Tests/RegexTests/DSLListTests.swift b/Tests/RegexTests/DSLListTests.swift index d8acec737..b29f2f412 100644 --- a/Tests/RegexTests/DSLListTests.swift +++ b/Tests/RegexTests/DSLListTests.swift @@ -21,17 +21,15 @@ struct DSLListTests { (#/a(?:b+)c*/#, 8), // literal, concat, a, noncap grp, quant, b, quant, c ]) func convertedNodeCount(regex: Regex, nodeCount: Int) { - let dslList = DSLList(tree: regex.program.tree) + let dslList = regex.program.tree #expect(dslList.nodes.count == nodeCount) } @Test(arguments: [#/a|b/#, #/a+b?c/#, #/abc/#, #/a(?:b+)c*/#, #/;[\r\n]/#, #/(?=(?:[1-9]|(?:a|b)))/#]) func compilationComparison(regex: Regex) throws { - let listCompiler = Compiler(tree: regex.program.tree) + let listCompiler = Compiler(list: regex.program.tree) let listProgram = try listCompiler.emitViaList() - let treeCompiler = Compiler(tree: regex.program.tree) - let treeProgram = try treeCompiler.emit() - #expect(treeProgram.instructions == listProgram.instructions) +// #expect(treeProgram.instructions == listProgram.instructions) } } diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index a87112b9e..62c97b8f2 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -37,19 +37,6 @@ func _roundTripLiteral( return remadeRegex } -// Validate that the given regex compiles to the same instructions whether -// as a tree (original) or a list (new). We need to compile with optimizations -// disabled, since new optimizations are primarily landing in list compilation. -func _validateListCompilation( - _ regex: Regex -) throws -> Bool { - let treeCompiler = Compiler(tree: regex.program.tree, compileOptions: .disableOptimizations) - let treeProgram = try treeCompiler.emitViaTree() - let listCompiler = Compiler(tree: regex.program.tree, compileOptions: .disableOptimizations) - let listProgram = try listCompiler.emitViaList() - return treeProgram.instructions == listProgram.instructions -} - func _firstMatch( _ regexStr: String, input: String, @@ -62,12 +49,6 @@ func _firstMatch( var regex = try Regex(regexStr, syntax: syntax).matchingSemantics(semanticLevel) let result = try regex.firstMatch(in: input) - if try !_validateListCompilation(regex) { - XCTFail( - "List compilation failed for '\(regexStr)'", - file: file, line: line) - } - func validateSubstring(_ substringInput: Substring) throws { // Sometimes the characters we add to a substring merge with existing // string members. This messes up cross-validation, so skip the test. @@ -117,7 +98,7 @@ func _firstMatch( .substring switch (result?[0].substring, roundTripResult) { case let (match?, rtMatch?): - XCTAssertEqual(match, rtMatch) + XCTAssertEqual(match, rtMatch, file: file, line: line) case (nil, nil): break // okay case let (match?, _): diff --git a/Tests/RegexTests/OptimizationTests.swift b/Tests/RegexTests/OptimizationTests.swift index 0fff0ebb1..c5e04bc9d 100644 --- a/Tests/RegexTests/OptimizationTests.swift +++ b/Tests/RegexTests/OptimizationTests.swift @@ -17,7 +17,7 @@ import Testing @available(macOS 9999, *) @Test(arguments: [#/a/#, #/a+/#, #/(?:a+)/#, #/(?:a)+/#, #/(?m)a+/#, #/ab?c/#, #/(?:a+)+$/#, #/(?:(?:a+b)+b)/#]) func requiredFirstAtom(pattern: Regex) throws { - let list = DSLList(tree: pattern.program.tree) + let list = pattern.program.tree let atom = list.requiredFirstAtom(allowOptionsChanges: true) #expect(atom?.literalCharacterValue == "a", "Missing first character atom in '\(pattern._literalPattern!)'") } @@ -25,7 +25,7 @@ import Testing @available(macOS 9999, *) @Test(arguments: [#/a?/#, #/(?:a|b)/#, #/[a]/#, #/a?bc/#]) func noRequiredFirstAtom(pattern: Regex) throws { - let list = DSLList(tree: pattern.program.tree) + let list = pattern.program.tree let atom = list.requiredFirstAtom(allowOptionsChanges: true) #expect(atom == nil, "Unexpected required first atom in '\(pattern._literalPattern!)'") } @@ -33,7 +33,7 @@ import Testing @available(macOS 9999, *) @Test(arguments: [#/a+b/#, #/a*b/#, #/\w+\s/#, #/(?:a+b|b+a)/#, #/\d+a/#, #/a+A/#]) func autoPossessify(pattern: Regex) throws { - var list = DSLList(tree: pattern.program.tree) + var list = pattern.program.tree list.autoPossessify() for node in list.nodes { switch node { @@ -53,7 +53,7 @@ import Testing #/(?:(?:ab)+b)/#, // single atom quantifications only ]) func noAutoPossessify(pattern: Regex) throws { - var list = DSLList(tree: pattern.program.tree) + var list = pattern.program.tree list.autoPossessify() for node in list.nodes { switch node { From a8f7ee9df55fd5a969c59c5dc502c8a4ac1d92b4 Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Wed, 22 Oct 2025 14:23:52 -0500 Subject: [PATCH 2/5] Minor test improvements --- Tests/RegexTests/LiteralPrinterTests.swift | 3 +++ Tests/RegexTests/MatchTests.swift | 8 +++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/Tests/RegexTests/LiteralPrinterTests.swift b/Tests/RegexTests/LiteralPrinterTests.swift index dd15d8cd1..69f273fd5 100644 --- a/Tests/RegexTests/LiteralPrinterTests.swift +++ b/Tests/RegexTests/LiteralPrinterTests.swift @@ -41,6 +41,9 @@ extension RegexTests { } func testUnicodeEscapes() throws { + let regex0 = #/[a]\u0301/# + _literalTest(regex0, expected: #"[a]\u0301"#) + let regex = #/\r\n\t cafe\u{301} \u{1D11E}/# _literalTest(regex, expected: #"\r\n\t cafe\u0301 \U0001D11E"#) } diff --git a/Tests/RegexTests/MatchTests.swift b/Tests/RegexTests/MatchTests.swift index 62c97b8f2..d67041c55 100644 --- a/Tests/RegexTests/MatchTests.swift +++ b/Tests/RegexTests/MatchTests.swift @@ -26,14 +26,16 @@ struct MatchError: Error { @available(SwiftStdlib 6.0, *) func _roundTripLiteral( _ regexStr: String, - syntax: SyntaxOptions + syntax: SyntaxOptions, + file: StaticString = #file, + line: UInt = #line ) throws -> Regex? { guard let pattern = try Regex(regexStr, syntax: syntax)._literalPattern else { return nil } let remadeRegex = try Regex(pattern) - XCTAssertEqual(pattern, remadeRegex._literalPattern) + XCTAssertEqual(pattern, remadeRegex._literalPattern, file: file, line: line) return remadeRegex } @@ -91,7 +93,7 @@ func _firstMatch( } if #available(SwiftStdlib 6.0, *) { - let roundTripRegex = try? _roundTripLiteral(regexStr, syntax: syntax) + let roundTripRegex = try? _roundTripLiteral(regexStr, syntax: syntax, file: file, line: line) let roundTripResult = try? roundTripRegex? .matchingSemantics(semanticLevel) .firstMatch(in: input)?[0] From 45084796a4abf4821850aaaa5612ec0a987c446a Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Wed, 22 Oct 2025 14:24:35 -0500 Subject: [PATCH 3/5] DSLList generation fixes Was missing the AST-sourced indicator node plus a DSLList-based implementation of `guaranteesForwardProgress`. --- .../ByteCodeGen+DSLList.swift | 57 ++++++++++++++++++- Sources/_StringProcessing/Regex/DSLList.swift | 2 +- 2 files changed, 56 insertions(+), 3 deletions(-) diff --git a/Sources/_StringProcessing/ByteCodeGen+DSLList.swift b/Sources/_StringProcessing/ByteCodeGen+DSLList.swift index 330018878..3394b319f 100644 --- a/Sources/_StringProcessing/ByteCodeGen+DSLList.swift +++ b/Sources/_StringProcessing/ByteCodeGen+DSLList.swift @@ -351,6 +351,59 @@ fileprivate extension Compiler.ByteCodeGen { } } + func _guaranteesForwardProgressImpl(_ list: ArraySlice, position: inout Int) -> Bool { + guard position < list.endIndex else { return false } + let node = list[position] + position += 1 + switch node { + case .orderedChoice(let children): + return (0.. 0 else { return false } + return _guaranteesForwardProgressImpl(list, position: &position) + case .limitCaptureNesting, .ignoreCapturesInTypedOutput: + return _guaranteesForwardProgressImpl(list, position: &position) + default: return false + } + } + + func guaranteesForwardProgress(_ list: ArraySlice) -> Bool { + var pos = list.startIndex + return _guaranteesForwardProgressImpl(list, position: &pos) + } + mutating func emitQuantification( _ amount: AST.Quantification.Amount, _ kind: DSLTree.QuantificationKind, @@ -526,8 +579,8 @@ fileprivate extension Compiler.ByteCodeGen { let startPosition: PositionRegister? // FIXME: forward progress check?! let emitPositionChecking = - (!optimizationsEnabled || (list.first?.guaranteesForwardProgress != true)) && - maxExtraTrips == nil + (!optimizationsEnabled || !guaranteesForwardProgress(list)) + && maxExtraTrips == nil if emitPositionChecking { startPosition = builder.makePositionRegister() diff --git a/Sources/_StringProcessing/Regex/DSLList.swift b/Sources/_StringProcessing/Regex/DSLList.swift index 034bc9ee9..40f08a502 100644 --- a/Sources/_StringProcessing/Regex/DSLList.swift +++ b/Sources/_StringProcessing/Regex/DSLList.swift @@ -34,7 +34,7 @@ struct DSLList { } init(ast: AST) { - self.nodes = [] + self.nodes = [.limitCaptureNesting(TEMP_FAKE_NODE)] try! ast.root.convert(into: &nodes) } From f343f041783581637c8d47e2cee1bdb4e0b05f3c Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Wed, 22 Oct 2025 14:56:44 -0500 Subject: [PATCH 4/5] Resolve AST/Literal conversion issues --- .../_StringProcessing/LiteralPrinter.swift | 10 +++++++- .../Regex/ASTConversion.swift | 25 ++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/Sources/_StringProcessing/LiteralPrinter.swift b/Sources/_StringProcessing/LiteralPrinter.swift index 89a28910a..7a799221c 100644 --- a/Sources/_StringProcessing/LiteralPrinter.swift +++ b/Sources/_StringProcessing/LiteralPrinter.swift @@ -609,7 +609,15 @@ extension String { } func escapingConfusableCharacters() -> String { - lazy.map(\.escapingConfusable).joined() + reduce(into: "") { result, ch in + for scalar in ch.unicodeScalars { + if scalar.isPrintableASCII { + result.append(Character(scalar)) + } else { + result.append(scalar.escapedString) + } + } + } } } diff --git a/Sources/_StringProcessing/Regex/ASTConversion.swift b/Sources/_StringProcessing/Regex/ASTConversion.swift index e1370a5a9..2c376fd6d 100644 --- a/Sources/_StringProcessing/Regex/ASTConversion.swift +++ b/Sources/_StringProcessing/Regex/ASTConversion.swift @@ -95,6 +95,27 @@ extension AST.Node { } } + func appendAtom(_ atom: AST.Atom, to str: inout String) -> Bool { + switch atom.kind { + case .char(let c): + str.append(c) + return true + case .scalar(let s): + str.append(Character(s.value)) + return true + case .escaped(let c): + guard let value = c.scalarValue else { return false } + str.append(Character(value)) + return true + case .scalarSequence(let seq): + str.append(contentsOf: seq.scalarValues.lazy.map(Character.init)) + return true + + default: + return false + } + } + switch self { case .alternation(let v): return v.children case .concatenation(let v): @@ -103,9 +124,7 @@ extension AST.Node { .coalescing(with: "", into: { AST.Node.quote(.init($0, .fake)) }) { str, node in switch node { case .atom(let a): - guard let c = a.literalCharacterValue else { return false } - str.append(c) - return true + return appendAtom(a, to: &str) case .quote(let q): str += q.literal return true From 37095f079f3fbe3e84adc113e4d9703643d51aae Mon Sep 17 00:00:00 2001 From: Nate Cook Date: Wed, 22 Oct 2025 15:04:49 -0500 Subject: [PATCH 5/5] Let's use the right name for the list --- .../_StringProcessing/LiteralPrinter.swift | 2 +- .../Regex/AnyRegexOutput.swift | 2 +- Sources/_StringProcessing/Regex/Core.swift | 30 +++++++++---------- Sources/_StringProcessing/Regex/Options.swift | 2 +- .../Utility/RegexFactory.swift | 6 ++-- .../Utility/TypeVerification.swift | 4 +-- Tests/RegexTests/DSLListTests.swift | 4 +-- Tests/RegexTests/OptimizationTests.swift | 8 ++--- 8 files changed, 29 insertions(+), 29 deletions(-) diff --git a/Sources/_StringProcessing/LiteralPrinter.swift b/Sources/_StringProcessing/LiteralPrinter.swift index 7a799221c..d9cdbb04e 100644 --- a/Sources/_StringProcessing/LiteralPrinter.swift +++ b/Sources/_StringProcessing/LiteralPrinter.swift @@ -36,7 +36,7 @@ extension Regex { @available(SwiftStdlib 6.0, *) public var _literalPattern: String? { var gen = LiteralPrinter(options: MatchingOptions()) - var list = self.program.tree.nodes[...] + var list = self.program.list.nodes[...] try? gen.outputList(&list) return gen.canonicalLiteralString } diff --git a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift index 8b754c1ca..a4e405f8c 100644 --- a/Sources/_StringProcessing/Regex/AnyRegexOutput.swift +++ b/Sources/_StringProcessing/Regex/AnyRegexOutput.swift @@ -265,7 +265,7 @@ extension Regex { /// - Parameter name: The name to look for among the regular expression's /// capture groups. Capture group names are case sensitive. public func contains(captureNamed name: String) -> Bool { - program.tree.captureList.captures.contains(where: { + program.list.captureList.captures.contains(where: { $0.name == name }) } diff --git a/Sources/_StringProcessing/Regex/Core.swift b/Sources/_StringProcessing/Regex/Core.swift index 3921274e1..10bb873ff 100644 --- a/Sources/_StringProcessing/Regex/Core.swift +++ b/Sources/_StringProcessing/Regex/Core.swift @@ -93,10 +93,10 @@ public struct Regex: RegexComponent { let program: Program var hasCapture: Bool { - program.tree.hasCapture + program.list.hasCapture } var hasChildren: Bool { - program.tree.hasChildren + program.list.hasChildren } init(ast: AST) { @@ -153,7 +153,7 @@ extension Regex { /// FIXME: If Regex is the unit of composition, then it should be a Node instead, /// and we should have a separate type that handled both global options and, /// likely, compilation/caching. - var tree: DSLList + var list: DSLList /// OptionSet of compiler options for testing purposes fileprivate var compileOptions: _CompileOptions = .default @@ -183,7 +183,7 @@ extension Regex { } // Compile the DSLTree into a lowered program and store it atomically. - let compiledProgram = try! Compiler(tree: tree, compileOptions: compileOptions).emit() + let compiledProgram = try! Compiler(tree: list, compileOptions: compileOptions).emit() let storedNewProgram = _stdlib_atomicInitializeARCRef( object: _loweredProgramStoragePtr, desired: ProgramBox(compiledProgram)) @@ -196,15 +196,15 @@ extension Regex { } init(ast: AST) { - self.tree = DSLList(ast: ast) + self.list = DSLList(ast: ast) } init(tree: DSLTree) { - self.tree = DSLList(tree: tree) + self.list = DSLList(tree: tree) } init(list: DSLList) { - self.tree = list + self.list = list } } @@ -228,7 +228,7 @@ extension Regex { // } var list: DSLList { - program.tree + program.list } init(node: DSLTree.Node) { @@ -240,19 +240,19 @@ extension Regex { } func appending(_ node: DSLTree.Node) -> Regex { - var list = program.tree + var list = program.list list.append(node) return Regex(list: list) } func appending(contentsOf node: some Collection) -> Regex { - var list = program.tree + var list = program.list list.append(contentsOf: node) return Regex(list: list) } func concatenating(_ other: some Collection) -> Regex { - var nodes = program.tree.nodes + var nodes = program.list.nodes switch nodes[0] { case .concatenation(let children): nodes[0] = .concatenation(Array(repeating: TEMP_FAKE_NODE, count: children.count + 1)) @@ -265,7 +265,7 @@ extension Regex { } func alternating(with other: some Collection) -> Regex { - var nodes = program.tree.nodes + var nodes = program.list.nodes switch nodes[0] { case .orderedChoice(let children): nodes[0] = .orderedChoice(Array(repeating: TEMP_FAKE_NODE, count: children.count + 1)) @@ -278,13 +278,13 @@ extension Regex { } func prepending(_ node: DSLTree.Node) -> Regex { - var list = program.tree + var list = program.list list.prepend(node) return Regex(list: list) } func prepending(contentsOf node: some Collection) -> Regex { - var list = program.tree + var list = program.list list.prepend(contentsOf: node) return Regex(list: list) } @@ -309,7 +309,7 @@ extension Regex { return true case .recompile: let _ = try Compiler( - tree: program.tree, + tree: program.list, compileOptions: program.compileOptions).emit() return true } diff --git a/Sources/_StringProcessing/Regex/Options.swift b/Sources/_StringProcessing/Regex/Options.swift index 0aca30068..5b3121831 100644 --- a/Sources/_StringProcessing/Regex/Options.swift +++ b/Sources/_StringProcessing/Regex/Options.swift @@ -295,7 +295,7 @@ extension RegexComponent { ? AST.MatchingOptionSequence(adding: [.init(option, location: .fake)]) : AST.MatchingOptionSequence(removing: [.init(option, location: .fake)]) - var list = regex.program.tree + var list = regex.program.list list.nodes.insert(.nonCapturingGroup(.init(ast: .changeMatchingOptions(sequence)), TEMP_FAKE_NODE), at: 0) return Regex(list: list) } diff --git a/Sources/_StringProcessing/Utility/RegexFactory.swift b/Sources/_StringProcessing/Utility/RegexFactory.swift index 9364207ed..c4865e141 100644 --- a/Sources/_StringProcessing/Utility/RegexFactory.swift +++ b/Sources/_StringProcessing/Utility/RegexFactory.swift @@ -28,7 +28,7 @@ public struct _RegexFactory { // Don't wrap `child` again if it's a leaf node. child.regex.list.hasChildren ? child.regex.prepending(.ignoreCapturesInTypedOutput(TEMP_FAKE_NODE)) as Regex - : .init(list: child.regex.program.tree) + : .init(list: child.regex.program.list) } @available(SwiftStdlib 5.7, *) @@ -36,7 +36,7 @@ public struct _RegexFactory { _ left: some RegexComponent, _ right: some RegexComponent ) -> Regex { - left.regex.concatenating(right.regex.program.tree.nodes) + left.regex.concatenating(right.regex.program.list.nodes) } @available(SwiftStdlib 5.7, *) @@ -44,7 +44,7 @@ public struct _RegexFactory { _ left: some RegexComponent, _ right: some RegexComponent ) -> Regex { - left.regex.alternating(with: right.regex.program.tree.nodes) + left.regex.alternating(with: right.regex.program.list.nodes) } @_spi(RegexBuilder) diff --git a/Sources/_StringProcessing/Utility/TypeVerification.swift b/Sources/_StringProcessing/Utility/TypeVerification.swift index 9f1539973..566127220 100644 --- a/Sources/_StringProcessing/Utility/TypeVerification.swift +++ b/Sources/_StringProcessing/Utility/TypeVerification.swift @@ -21,7 +21,7 @@ extension Regex { var tupleElements: [Any.Type] = [] var labels = "" - for capture in program.tree.captureList.captures { + for capture in program.list.captureList.captures { var captureType = capture.type var i = capture.optionalDepth @@ -41,7 +41,7 @@ extension Regex { // If we have no captures, then our Regex must be Regex. if tupleElements.count == 1 { - let wholeMatchType = program.tree.wholeMatchType + let wholeMatchType = program.list.wholeMatchType return (Output.self == wholeMatchType, wholeMatchType) } diff --git a/Tests/RegexTests/DSLListTests.swift b/Tests/RegexTests/DSLListTests.swift index b29f2f412..3b99b40f3 100644 --- a/Tests/RegexTests/DSLListTests.swift +++ b/Tests/RegexTests/DSLListTests.swift @@ -21,13 +21,13 @@ struct DSLListTests { (#/a(?:b+)c*/#, 8), // literal, concat, a, noncap grp, quant, b, quant, c ]) func convertedNodeCount(regex: Regex, nodeCount: Int) { - let dslList = regex.program.tree + let dslList = regex.program.list #expect(dslList.nodes.count == nodeCount) } @Test(arguments: [#/a|b/#, #/a+b?c/#, #/abc/#, #/a(?:b+)c*/#, #/;[\r\n]/#, #/(?=(?:[1-9]|(?:a|b)))/#]) func compilationComparison(regex: Regex) throws { - let listCompiler = Compiler(list: regex.program.tree) + let listCompiler = Compiler(list: regex.program.list) let listProgram = try listCompiler.emitViaList() // #expect(treeProgram.instructions == listProgram.instructions) diff --git a/Tests/RegexTests/OptimizationTests.swift b/Tests/RegexTests/OptimizationTests.swift index c5e04bc9d..a60d9bf5f 100644 --- a/Tests/RegexTests/OptimizationTests.swift +++ b/Tests/RegexTests/OptimizationTests.swift @@ -17,7 +17,7 @@ import Testing @available(macOS 9999, *) @Test(arguments: [#/a/#, #/a+/#, #/(?:a+)/#, #/(?:a)+/#, #/(?m)a+/#, #/ab?c/#, #/(?:a+)+$/#, #/(?:(?:a+b)+b)/#]) func requiredFirstAtom(pattern: Regex) throws { - let list = pattern.program.tree + let list = pattern.program.list let atom = list.requiredFirstAtom(allowOptionsChanges: true) #expect(atom?.literalCharacterValue == "a", "Missing first character atom in '\(pattern._literalPattern!)'") } @@ -25,7 +25,7 @@ import Testing @available(macOS 9999, *) @Test(arguments: [#/a?/#, #/(?:a|b)/#, #/[a]/#, #/a?bc/#]) func noRequiredFirstAtom(pattern: Regex) throws { - let list = pattern.program.tree + let list = pattern.program.list let atom = list.requiredFirstAtom(allowOptionsChanges: true) #expect(atom == nil, "Unexpected required first atom in '\(pattern._literalPattern!)'") } @@ -33,7 +33,7 @@ import Testing @available(macOS 9999, *) @Test(arguments: [#/a+b/#, #/a*b/#, #/\w+\s/#, #/(?:a+b|b+a)/#, #/\d+a/#, #/a+A/#]) func autoPossessify(pattern: Regex) throws { - var list = pattern.program.tree + var list = pattern.program.list list.autoPossessify() for node in list.nodes { switch node { @@ -53,7 +53,7 @@ import Testing #/(?:(?:ab)+b)/#, // single atom quantifications only ]) func noAutoPossessify(pattern: Regex) throws { - var list = pattern.program.tree + var list = pattern.program.list list.autoPossessify() for node in list.nodes { switch node {