Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 55 additions & 2 deletions Sources/_StringProcessing/ByteCodeGen+DSLList.swift
Original file line number Diff line number Diff line change
Expand Up @@ -351,6 +351,59 @@ fileprivate extension Compiler.ByteCodeGen {
}
}

func _guaranteesForwardProgressImpl(_ list: ArraySlice<DSLTree.Node>, position: inout Int) -> Bool {
guard position < list.endIndex else { return false }
let node = list[position]
position += 1
switch node {
case .orderedChoice(let children):
return (0..<children.count).allSatisfy { _ in
_guaranteesForwardProgressImpl(list, position: &position)
}
case .concatenation(let children):
return (0..<children.count).contains { _ in
_guaranteesForwardProgressImpl(list, position: &position)
}
case .capture(_, _, _, _):
return _guaranteesForwardProgressImpl(list, position: &position)
case .nonCapturingGroup(let kind, _):
switch kind.ast {
case .lookahead, .negativeLookahead, .lookbehind, .negativeLookbehind:
return false
default:
return _guaranteesForwardProgressImpl(list, position: &position)
}
case .atom(let atom):
switch atom {
case .changeMatchingOptions, .assertion: return false
// Captures may be nil so backreferences may be zero length matches
case .backreference: return false
default: return true
}
case .trivia, .empty:
return false
case .quotedLiteral(let string):
return !string.isEmpty
case .consumer, .matcher:
// Allow zero width consumers and matchers
return false
case .customCharacterClass(let ccc):
return ccc.guaranteesForwardProgress
case .quantification(let amount, _, _):
let (atLeast, _) = amount.ast.bounds
guard let atLeast, atLeast > 0 else { return false }
return _guaranteesForwardProgressImpl(list, position: &position)
case .limitCaptureNesting, .ignoreCapturesInTypedOutput:
return _guaranteesForwardProgressImpl(list, position: &position)
default: return false
}
}

func guaranteesForwardProgress(_ list: ArraySlice<DSLTree.Node>) -> Bool {
var pos = list.startIndex
return _guaranteesForwardProgressImpl(list, position: &pos)
}

mutating func emitQuantification(
_ amount: AST.Quantification.Amount,
_ kind: DSLTree.QuantificationKind,
Expand Down Expand Up @@ -526,8 +579,8 @@ fileprivate extension Compiler.ByteCodeGen {
let startPosition: PositionRegister?
// FIXME: forward progress check?!
let emitPositionChecking =
(!optimizationsEnabled || (list.first?.guaranteesForwardProgress != true)) &&
maxExtraTrips == nil
(!optimizationsEnabled || !guaranteesForwardProgress(list))
&& maxExtraTrips == nil

if emitPositionChecking {
startPosition = builder.makePositionRegister()
Expand Down
37 changes: 25 additions & 12 deletions Sources/_StringProcessing/Compiler.swift
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,30 @@
internal import _RegexParser

class Compiler {
let tree: DSLTree
let tree: DSLList

// TODO: Or are these stored on the tree?
var options = MatchingOptions()
private var compileOptions: _CompileOptions = .default

init(ast: AST) {
self.tree = ast.dslTree
self.tree = DSLList(tree: ast.dslTree)
}

init(tree: DSLTree) {
self.tree = tree
self.tree = DSLList(tree: tree)
}

init(list: DSLList) {
self.tree = list
}

init(tree: DSLTree, compileOptions: _CompileOptions) {
self.tree = DSLList(tree: tree)
self.compileOptions = compileOptions
}

init(tree: DSLList, compileOptions: _CompileOptions) {
self.tree = tree
self.compileOptions = compileOptions
}
Expand All @@ -42,18 +51,20 @@ class Compiler {
compileOptions:
compileOptions,
captureList: tree.captureList)
return try codegen.emitRoot(tree.root)
fatalError()
// return try codegen.emitRoot(tree.root)
}

__consuming func emitViaList() throws -> MEProgram {
// TODO: Handle global options
var dslList = DSLList(tree: tree)
// var dslList = DSLList(tree: tree)
var codegen = ByteCodeGen(
options: options,
compileOptions:
compileOptions,
captureList: tree.captureList)
return try codegen.emitRoot(&dslList)
var tree = tree
return try codegen.emitRoot(&tree)
}
}

Expand Down Expand Up @@ -105,20 +116,22 @@ func _compileRegex(
_ syntax: SyntaxOptions = .traditional,
_ semanticLevel: RegexSemanticLevel? = nil
) throws -> MEProgram {
let ast = try parse(regex, syntax)
let dsl: DSLTree
var ast = try parse(regex, syntax)
let dsl: DSLList

switch semanticLevel?.base {
case .graphemeCluster:
let sequence = AST.MatchingOptionSequence(adding: [.init(.graphemeClusterSemantics, location: .fake)])
dsl = DSLTree(.nonCapturingGroup(.init(ast: .changeMatchingOptions(sequence)), ast.dslTree.root))
ast.root = AST.Node.group(AST.Group(.init(faking: .changeMatchingOptions(sequence)), ast.root, .fake))
dsl = DSLList(ast: ast)
case .unicodeScalar:
let sequence = AST.MatchingOptionSequence(adding: [.init(.unicodeScalarSemantics, location: .fake)])
dsl = DSLTree(.nonCapturingGroup(.init(ast: .changeMatchingOptions(sequence)), ast.dslTree.root))
ast.root = AST.Node.group(AST.Group(.init(faking: .changeMatchingOptions(sequence)), ast.root, .fake))
dsl = DSLList(ast: ast)
case .none:
dsl = ast.dslTree
dsl = DSLList(ast: ast)
}
let program = try Compiler(tree: dsl).emit()
let program = try Compiler(list: dsl).emit()
return program
}

Expand Down
166 changes: 164 additions & 2 deletions Sources/_StringProcessing/LiteralPrinter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ extension Regex {
@available(SwiftStdlib 6.0, *)
public var _literalPattern: String? {
var gen = LiteralPrinter(options: MatchingOptions())
gen.outputNode(self.program.tree.root)
var list = self.program.list.nodes[...]
try? gen.outputList(&list)
return gen.canonicalLiteralString
}
}
Expand Down Expand Up @@ -83,6 +84,159 @@ fileprivate struct LiteralPrinter {
mutating func saveInconvertible(_ node: DSLTree.Node) {
segments.append(.inconvertible(node))
}

mutating func inconvertible(_ node: DSLTree.Node) throws {
segments.append(.inconvertible(node))
throw Incovertible.error
}
}

extension LiteralPrinter {
enum Incovertible: Error {
case error
}

mutating func outputList(_ list: inout ArraySlice<DSLTree.Node>) throws {
guard let node = list.popFirst() else {
return
}

switch node {
case let .orderedChoice(children):
try outputAlternation(&list, count: children.count)
case let .concatenation(children):
try outputConcatenation(&list, count: children.count)

case let .capture(name, nil, _, nil):
options.beginScope()
defer { options.endScope() }
try outputCapture(&list, name: name)
case .capture:
// Captures that use a reference or a transform are unsupported
try inconvertible(node)
return

case let .nonCapturingGroup(kind, _):
guard let kindPattern = kind._patternString else {
try inconvertible(node)
return
}
options.beginScope()
defer { options.endScope() }

output(kindPattern)
if case .changeMatchingOptions(let optionSequence) = kind.ast {
options.apply(optionSequence)
}
try outputList(&list)
output(")")

case .ignoreCapturesInTypedOutput(_),
.limitCaptureNesting(_):
try outputList(&list)
case let .quantification(amount, kind, _):
try outputQuantification(&list, amount: amount, kind: kind)
case let .customCharacterClass(charClass):
outputCustomCharacterClass(charClass)
case let .atom(atom):
outputAtom(atom)
case let .quotedLiteral(literal):
output(prepareQuotedLiteral(literal))

case .trivia(_):
// TODO: Include trivia?
return
case .empty:
return

case .conditional, .absentFunction, .consumer, .matcher, .characterPredicate:
saveInconvertible(node)
}
}

mutating func outputAlternation(_ list: inout ArraySlice<DSLTree.Node>, count: Int) throws {
for i in 0..<count {
if i != 0 {
output("|")
}
try outputList(&list)
}
}

mutating func outputConcatenation(_ list: inout ArraySlice<DSLTree.Node>, count: Int) throws {
for _ in 0..<count {
try outputList(&list)
}
}

mutating func outputCapture(_ list: inout ArraySlice<DSLTree.Node>, name: String?) throws {
if let name {
output("(?<\(name)>")
} else {
output("(")
}
try outputList(&list)
output(")")
}

func requiresGrouping(_ list: ArraySlice<DSLTree.Node>) -> Bool {
guard let node = list.first else { return false } // malformed?
switch node {
case .concatenation(let children):
switch children.count {
case 0:
return false
case 1:
return requiresGrouping(list.dropFirst())
default:
return true
}

case .quotedLiteral(let literal):
return prepareQuotedLiteral(literal).count > 1

default:
return false
}
}

mutating func outputQuantification(
_ list: inout ArraySlice<DSLTree.Node>,
amount: DSLTree._AST.QuantificationAmount,
kind: DSLTree.QuantificationKind
) throws {
// RegexBuilder regexes can have children that need
if requiresGrouping(list) {
output("(?:")
try outputList(&list)
output(")")
} else {
try outputList(&list)
}

switch amount.ast {
case .zeroOrMore:
output("*")
case .oneOrMore:
output("+")
case .zeroOrOne:
output("?")
case let .exactly(n):
output("{\(n.value!)}")
case let .nOrMore(n):
output("{\(n.value!),}")
case let .upToN(n):
output("{,\(n.value!)}")
case let .range(low, high):
output("{\(low.value!),\(high.value!)}")
#if RESILIENT_LIBRARIES
@unknown default:
fatalError()
#endif
}

outputQuantificationKind(kind)
}
}

extension LiteralPrinter {
Expand Down Expand Up @@ -455,7 +609,15 @@ extension String {
}

func escapingConfusableCharacters() -> String {
lazy.map(\.escapingConfusable).joined()
reduce(into: "") { result, ch in
for scalar in ch.unicodeScalars {
if scalar.isPrintableASCII {
result.append(Character(scalar))
} else {
result.append(scalar.escapedString)
}
}
}
}
}

Expand Down
Loading
Loading