Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
855 changes: 855 additions & 0 deletions Sources/_StringProcessing/ByteCodeGen+DSLList.swift

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ extension Compiler {
var hasEmittedFirstMatchableAtom = false

private let compileOptions: _CompileOptions
fileprivate var optimizationsEnabled: Bool {
internal var optimizationsEnabled: Bool {
!compileOptions.contains(.disableOptimizations)
}

Expand Down Expand Up @@ -61,7 +61,7 @@ extension Compiler.ByteCodeGen {
}
}

fileprivate extension Compiler.ByteCodeGen {
extension Compiler.ByteCodeGen {
mutating func emitAtom(_ a: DSLTree.Atom) throws {
defer {
if a.isMatchable {
Expand Down Expand Up @@ -809,7 +809,7 @@ fileprivate extension Compiler.ByteCodeGen {
default:
return false
}
case .convertedRegexLiteral(let node, _):
case .limitCaptureNesting(let node):
return tryEmitFastQuant(node, kind, minTrips, maxExtraTrips)
case .nonCapturingGroup(let groupKind, let node):
// .nonCapture nonCapturingGroups are ignored during compilation
Expand Down Expand Up @@ -1203,7 +1203,7 @@ fileprivate extension Compiler.ByteCodeGen {
switch node {
case .concatenation(let ch):
return ch.flatMap(flatten)
case .convertedRegexLiteral(let n, _), .ignoreCapturesInTypedOutput(let n):
case .ignoreCapturesInTypedOutput(let n), .limitCaptureNesting(let n):
return flatten(n)
default:
return [node]
Expand Down Expand Up @@ -1283,6 +1283,9 @@ fileprivate extension Compiler.ByteCodeGen {
case let .ignoreCapturesInTypedOutput(child):
try emitNode(child)

case let .limitCaptureNesting(child):
return try emitNode(child)

case .conditional:
throw Unsupported("Conditionals")

Expand All @@ -1306,9 +1309,6 @@ fileprivate extension Compiler.ByteCodeGen {
case let .quotedLiteral(s):
emitQuotedLiteral(s)

case let .convertedRegexLiteral(n, _):
return try emitNode(n)

case .absentFunction:
throw Unsupported("absent function")
case .consumer:
Expand Down Expand Up @@ -1359,8 +1359,6 @@ extension DSLTree.Node {
return false
case .quotedLiteral(let string):
return !string.isEmpty
case .convertedRegexLiteral(let node, _):
return node.guaranteesForwardProgress
case .consumer, .matcher:
// Allow zero width consumers and matchers
return false
Expand All @@ -1369,6 +1367,8 @@ extension DSLTree.Node {
case .quantification(let amount, _, let child):
let (atLeast, _) = amount.ast.bounds
return atLeast ?? 0 > 0 && child.guaranteesForwardProgress
case .limitCaptureNesting(let node), .ignoreCapturesInTypedOutput(let node):
return node.guaranteesForwardProgress
default: return false
}
}
Expand Down
15 changes: 15 additions & 0 deletions Sources/_StringProcessing/Compiler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ class Compiler {
}

__consuming func emit() throws -> MEProgram {
try emitViaList()
}

__consuming func emitViaTree() throws -> MEProgram {
// TODO: Handle global options
var codegen = ByteCodeGen(
options: options,
Expand All @@ -40,6 +44,17 @@ class Compiler {
captureList: tree.captureList)
return try codegen.emitRoot(tree.root)
}

__consuming func emitViaList() throws -> MEProgram {
// TODO: Handle global options
let dslList = DSLList(tree: tree)
var codegen = ByteCodeGen(
options: options,
compileOptions:
compileOptions,
captureList: tree.captureList)
return try codegen.emitRoot(dslList)
}
}

/// Hashable wrapper for `Any.Type`.
Expand Down
6 changes: 2 additions & 4 deletions Sources/_StringProcessing/LiteralPrinter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -116,11 +116,9 @@ extension LiteralPrinter {
outputNode(child)
output(")")

case let .ignoreCapturesInTypedOutput(child):
case let .ignoreCapturesInTypedOutput(child),
let .limitCaptureNesting(child):
outputNode(child)
case .convertedRegexLiteral(let node, _):
outputNode(node)

case let .quantification(amount, kind, node):
outputQuantification(amount, kind, node)
case let .customCharacterClass(charClass):
Expand Down
27 changes: 3 additions & 24 deletions Sources/_StringProcessing/PrintAsPattern.swift
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ extension PrettyPrinter {
case let .ignoreCapturesInTypedOutput(child):
printAsPattern(convertedFromAST: child, isTopLevel: isTopLevel)

case let .limitCaptureNesting(child):
printAsPattern(convertedFromAST: child, isTopLevel: isTopLevel)

case .conditional:
print("/* TODO: conditional */")

Expand Down Expand Up @@ -258,20 +261,6 @@ extension PrettyPrinter {

break

case let .convertedRegexLiteral(.atom(a), _):
if let pattern = a._patternBase(&self), pattern.canBeWrapped {
printAtom(pattern.0)
return
}

break
case let .convertedRegexLiteral(.customCharacterClass(ccc), _):
if ccc.isSimplePrint {
printSimpleCCC(ccc)
return
}

break
default:
break
}
Expand Down Expand Up @@ -305,13 +294,6 @@ extension PrettyPrinter {
case let .quotedLiteral(v):
print(v._quoted)

case let .convertedRegexLiteral(n, _):
// FIXME: This recursion coordinates with back-off
// check above, so it should work out. Need a
// cleaner way to do this. This means the argument
// label is a lie.
printAsPattern(convertedFromAST: n, isTopLevel: isTopLevel)

case let .customCharacterClass(ccc):
printAsPattern(ccc)

Expand Down Expand Up @@ -1431,9 +1413,6 @@ extension DSLTree.Node {
result += node.getNamedCaptures()
}

case .convertedRegexLiteral(let node, _):
result += node.getNamedCaptures()

case .quantification(_, _, let node):
result += node.getNamedCaptures()

Expand Down
20 changes: 2 additions & 18 deletions Sources/_StringProcessing/Regex/ASTConversion.swift
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,13 @@ internal import _RegexParser

extension AST {
var dslTree: DSLTree {
return DSLTree(root.dslTreeNode)
return DSLTree(.limitCaptureNesting(root.dslTreeNode))
}
}

extension AST.Node {
/// Converts an AST node to a `convertedRegexLiteral` node.
var dslTreeNode: DSLTree.Node {
func wrap(_ node: DSLTree.Node) -> DSLTree.Node {
switch node {
case .convertedRegexLiteral:
// FIXME: DSL can have one item concats
// assertionFailure("Double wrapping?")
return node
default:
break
}
// TODO: Should we do this for the
// single-concatenation child too, or should?
// we wrap _that_?
return .convertedRegexLiteral(node, .init(ast: self))
}

// Convert the top-level node without wrapping
func convert() throws -> DSLTree.Node {
switch self {
Expand Down Expand Up @@ -105,9 +90,8 @@ extension AST.Node {
}
}

// FIXME: make total function again
let converted = try! convert()
return wrap(converted)
return converted
}
}

Expand Down
96 changes: 96 additions & 0 deletions Sources/_StringProcessing/Regex/DSLList.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2025 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

struct DSLList {
var nodes: [DSLTree.Node]

init(_ initial: DSLTree.Node) {
self.nodes = [initial]
}

init(_ nodes: [DSLTree.Node]) {
self.nodes = nodes
}

init(tree: DSLTree) {
self.nodes = Array(tree.depthFirst)
}
}

extension DSLTree.Node {
var directChildren: Int {
switch self {
case .trivia, .empty, .quotedLiteral,
.consumer, .matcher, .characterPredicate,
.customCharacterClass, .atom:
return 0

case .orderedChoice(let c), .concatenation(let c):
return c.count

case .capture, .nonCapturingGroup,
.quantification, .ignoreCapturesInTypedOutput,
.limitCaptureNesting, .conditional:
return 1

case .absentFunction:
return 0
}
}
}

extension DSLTree {
struct DepthFirst: Sequence, IteratorProtocol {
typealias Element = DSLTree.Node
private var stack: [Frame]
private let getChildren: (Element) -> [Element]

private struct Frame {
let node: Element
let children: [Element]
var nextIndex: Int = 0
}

fileprivate init(
root: Element,
getChildren: @escaping (Element) -> [Element]
) {
self.getChildren = getChildren
self.stack = [Frame(node: root, children: getChildren(root))]
}

mutating func next() -> Element? {
guard let top = stack.popLast() else { return nil }
// Push children in reverse so leftmost comes out first.
for child in top.children.reversed() {
stack.append(Frame(node: child, children: getChildren(child)))
}

// Since we coalesce the children before adding them to the stack,
// we need an exact matching number of children in the list's
// concatenation node, so that it can provide the correct component
// count. This will go away/change when .concatenation only stores
// a count.
return switch top.node {
case .concatenation:
.concatenation(top.node.coalescedChildren)
default:
top.node
}
}
}

var depthFirst: DepthFirst {
DepthFirst(root: root, getChildren: {
$0.coalescedChildren
})
}
}
Loading