Skip to content

Commit

Permalink
Break out of quantification loop if there is no forward progress (swi…
Browse files Browse the repository at this point in the history
…ftlang#560)

This fixes infinite loops when we loop over an internal node that does not have any forward progress. Also included is an optimization to only emit the check/break instructions if we have a case that might result in an infinite loop (possibly non-progressing inner node + unlimited quantification)
  • Loading branch information
rctcwyvrn committed Jul 12, 2022
1 parent 65178f9 commit cf6868c
Show file tree
Hide file tree
Showing 9 changed files with 205 additions and 11 deletions.
65 changes: 65 additions & 0 deletions Sources/_StringProcessing/ByteCodeGen.swift
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,12 @@ fileprivate extension Compiler.ByteCodeGen {
decrement %minTrips and fallthrough
loop-body:
<if can't guarantee forward progress && extraTrips = nil>:
mov currentPosition %pos
evaluate the subexpression
<if can't guarantee forward progress && extraTrips = nil>:
if %pos is currentPosition:
goto exit
goto min-trip-count control block
exit-policy control block:
Expand Down Expand Up @@ -646,7 +651,28 @@ fileprivate extension Compiler.ByteCodeGen {
// <subexpression>
// branch min-trip-count
builder.label(loopBody)

// if we aren't sure if the child node will have forward progress and
// we have an unbounded quantification
let startPosition: PositionRegister?
let emitPositionChecking =
(!optimizationsEnabled || !child.guaranteesForwardProgress) &&
extraTrips == nil

if emitPositionChecking {
startPosition = builder.makePositionRegister()
builder.buildMoveCurrentPosition(into: startPosition!)
} else {
startPosition = nil
}
try emitNode(child)
if emitPositionChecking {
// in all quantifier cases, no matter what minTrips or extraTrips is,
// if we have a successful non-advancing match, branch to exit because it
// can match an arbitrary number of times
builder.buildCondBranch(to: exit, ifSamePositionAs: startPosition!)
}

if minTrips <= 1 {
// fallthrough
} else {
Expand Down Expand Up @@ -832,3 +858,42 @@ fileprivate extension Compiler.ByteCodeGen {
return nil
}
}

extension DSLTree.Node {
var guaranteesForwardProgress: Bool {
switch self {
case .orderedChoice(let children):
return children.allSatisfy { $0.guaranteesForwardProgress }
case .concatenation(let children):
return children.contains(where: { $0.guaranteesForwardProgress })
case .capture(_, _, let node, _):
return node.guaranteesForwardProgress
case .nonCapturingGroup(let kind, let child):
switch kind.ast {
case .lookahead, .negativeLookahead, .lookbehind, .negativeLookbehind:
return false
default: return child.guaranteesForwardProgress
}
case .atom(let atom):
switch atom {
case .changeMatchingOptions, .assertion: return false
default: return true
}
case .trivia, .empty:
return false
case .quotedLiteral(let string):
return !string.isEmpty
case .convertedRegexLiteral(let node, _):
return node.guaranteesForwardProgress
case .consumer, .matcher:
// Allow zero width consumers and matchers
return false
case .customCharacterClass:
return true
case .quantification(let amount, _, let child):
let (atLeast, _) = amount.ast.bounds
return atLeast ?? 0 > 0 && child.guaranteesForwardProgress
default: return false
}
}
}
10 changes: 7 additions & 3 deletions Sources/_StringProcessing/Engine/Backtracking.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,18 @@ extension Processor {
// The int registers store values that can be relevant to
// backtracking, such as the number of trips in a quantification.
var intRegisters: [Int]
// Same with position registers
var posRegisters: [Input.Index]

var destructure: (
pc: InstructionAddress,
pos: Position?,
stackEnd: CallStackAddress,
captureEnds: [_StoredCapture],
intRegisters: [Int]
intRegisters: [Int],
PositionRegister: [Input.Index]
) {
(pc, pos, stackEnd, captureEnds, intRegisters)
(pc, pos, stackEnd, captureEnds, intRegisters, posRegisters)
}
}

Expand All @@ -53,7 +56,8 @@ extension Processor {
pos: addressOnly ? nil : currentPosition,
stackEnd: .init(callStack.count),
captureEnds: storedCaptures,
intRegisters: registers.ints)
intRegisters: registers.ints,
posRegisters: registers.positions)
}
}

Expand Down
6 changes: 3 additions & 3 deletions Sources/_StringProcessing/Engine/InstPayload.swift
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,10 @@ extension Instruction.Payload {
interpretPair()
}

init(pos: PositionRegister, pos2: PositionRegister) {
self.init(pos, pos2)
init(addr: InstructionAddress, position: PositionRegister) {
self.init(addr, position)
}
var pairedPosPos: (PositionRegister, PositionRegister) {
var pairedAddrPos: (InstructionAddress, PositionRegister) {
interpretPair()
}

Expand Down
18 changes: 18 additions & 0 deletions Sources/_StringProcessing/Engine/Instruction.swift
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ extension Instruction {
///
case moveImmediate

/// Move the current position into a register
///
/// moveCurrentPosition(into: PositionRegister)
///
/// Operands:
/// - Position register to move into
case moveCurrentPosition

// MARK: General Purpose: Control flow

/// Branch to a new instruction
Expand All @@ -57,6 +65,16 @@ extension Instruction {
///
case condBranchZeroElseDecrement

/// Conditionally branch if the current position is the same as the register
///
/// condBranch(
/// to: InstAddr, ifSamePositionAs: PositionRegister)
///
/// Operands:
/// - Instruction address to branch to, if the position in the register is the same as currentPosition
/// - Position register to check against
case condBranchSamePosition

// TODO: Function calls

// MARK: - Matching
Expand Down
23 changes: 22 additions & 1 deletion Sources/_StringProcessing/Engine/MEBuilder.swift
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ extension MEProgram {
var nextIntRegister = IntRegister(0)
var nextCaptureRegister = CaptureRegister(0)
var nextValueRegister = ValueRegister(0)
var nextPositionRegister = PositionRegister(0)

// Special addresses or instructions
var failAddressToken: AddressToken? = nil
Expand Down Expand Up @@ -105,6 +106,14 @@ extension MEProgram.Builder {
fixup(to: t)
}

mutating func buildCondBranch(
to t: AddressToken,
ifSamePositionAs r: PositionRegister
) {
instructions.append(.init(.condBranchSamePosition, .init(position: r)))
fixup(to: t)
}

mutating func buildSave(_ t: AddressToken) {
instructions.append(.init(.save))
fixup(to: t)
Expand Down Expand Up @@ -211,6 +220,10 @@ extension MEProgram.Builder {
.init(value: value, capture: capture)))
}

mutating func buildMoveCurrentPosition(into r: PositionRegister) {
instructions.append(.init(.moveCurrentPosition, .init(position: r)))
}

mutating func buildBackreference(
_ cap: CaptureRegister
) {
Expand Down Expand Up @@ -257,7 +270,8 @@ extension MEProgram.Builder {
switch inst.opcode {
case .condBranchZeroElseDecrement:
payload = .init(addr: addr, int: inst.payload.int)

case .condBranchSamePosition:
payload = .init(addr: addr, position: inst.payload.position)
case .branch, .save, .saveAddress, .clearThrough:
payload = .init(addr: addr)

Expand All @@ -281,6 +295,7 @@ extension MEProgram.Builder {
regInfo.sequences = sequences.count
regInfo.ints = nextIntRegister.rawValue
regInfo.values = nextValueRegister.rawValue
regInfo.positions = nextPositionRegister.rawValue
regInfo.bitsets = asciiBitsets.count
regInfo.consumeFunctions = consumeFunctions.count
regInfo.assertionFunctions = assertionFunctions.count
Expand Down Expand Up @@ -421,6 +436,12 @@ extension MEProgram.Builder {
return r
}

mutating func makePositionRegister() -> PositionRegister {
let r = nextPositionRegister
defer { nextPositionRegister.rawValue += 1 }
return r
}

// TODO: A register-mapping helper struct, which could release
// registers without monotonicity required

Expand Down
16 changes: 13 additions & 3 deletions Sources/_StringProcessing/Engine/Processor.swift
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ extension Processor {
}

mutating func signalFailure() {
guard let (pc, pos, stackEnd, capEnds, intRegisters) =
guard let (pc, pos, stackEnd, capEnds, intRegisters, posRegisters) =
savePoints.popLast()?.destructure
else {
state = .fail
Expand All @@ -259,6 +259,7 @@ extension Processor {
callStack.removeLast(callStack.count - stackEnd.rawValue)
storedCaptures = capEnds
registers.ints = intRegisters
registers.positions = posRegisters
}

mutating func abort(_ e: Error? = nil) {
Expand Down Expand Up @@ -315,7 +316,10 @@ extension Processor {

registers[reg] = int
controller.step()

case .moveCurrentPosition:
let reg = payload.position
registers[reg] = currentPosition
controller.step()
case .branch:
controller.pc = payload.addr

Expand All @@ -327,7 +331,13 @@ extension Processor {
registers[int] -= 1
controller.step()
}

case .condBranchSamePosition:
let (addr, pos) = payload.pairedAddrPos
if registers[pos] == currentPosition {
controller.pc = addr
} else {
controller.step()
}
case .save:
let resumeAddr = payload.addr
let sp = makeSavePoint(resumeAddr)
Expand Down
14 changes: 14 additions & 0 deletions Sources/_StringProcessing/Engine/Registers.swift
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ extension Processor {
var ints: [Int]

var values: [Any]

var positions: [Input.Index]
}
}

Expand All @@ -66,6 +68,12 @@ extension Processor.Registers {
values[i.rawValue] = newValue
}
}
subscript(_ i: PositionRegister) -> Input.Index {
get { positions[i.rawValue] }
set {
positions[i.rawValue] = newValue
}
}
subscript(_ i: ElementRegister) -> Input.Element {
elements[i.rawValue]
}
Expand All @@ -89,6 +97,8 @@ extension Processor.Registers {
}

extension Processor.Registers {
static let sentinelIndex = "".startIndex

init(
_ program: MEProgram,
_ sentinel: String.Index
Expand Down Expand Up @@ -120,11 +130,15 @@ extension Processor.Registers {

self.values = Array(
repeating: SentinelValue(), count: info.values)
self.positions = Array(
repeating: Processor.Registers.sentinelIndex,
count: info.positions)
}

mutating func reset(sentinel: Input.Index) {
self.ints._setAll(to: 0)
self.values._setAll(to: SentinelValue())
self.positions._setAll(to: Processor.Registers.sentinelIndex)
}
}

Expand Down
36 changes: 36 additions & 0 deletions Tests/RegexTests/CompileTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -208,4 +208,40 @@ extension RegexTests {
expectProgram(for: "[abc]", semanticLevel: .unicodeScalar, doesNotContain: [.matchBitset])
expectProgram(for: "[abc]", semanticLevel: .unicodeScalar, contains: [.consumeBy])
}

func testQuantificationForwardProgressCompile() {
// Unbounded quantification + non forward progressing inner nodes
// Expect to emit the position checking instructions
expectProgram(for: #"(?:(?=a)){1,}"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:\b)*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:(?#comment))+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:|)+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:\w|)+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:\w|(?i-i:))+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:\w|(?#comment))+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:\w|(?#comment)(?i-i:))+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:\w|(?i))+"#, contains: [.moveCurrentPosition, .condBranchSamePosition])

// Bounded quantification, don't emit position checking
expectProgram(for: #"(?:(?=a)){1,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:\b)?"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:(?#comment)){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:|){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:\w|){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:\w|(?i-i:)){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:\w|(?#comment)){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:\w|(?#comment)(?i-i:)){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(?:\w|(?i)){,4}"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])

// Inner node is a quantification that does not guarantee forward progress
expectProgram(for: #"(a*)*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(a?)*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(a{,5})*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"((\b){,4})*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"((\b){1,4})*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"((|){1,4})*"#, contains: [.moveCurrentPosition, .condBranchSamePosition])
// Inner node is a quantification that guarantees forward progress
expectProgram(for: #"(a+)*"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
expectProgram(for: #"(a{1,})*"#, doesNotContain: [.moveCurrentPosition, .condBranchSamePosition])
}
}
28 changes: 27 additions & 1 deletion Tests/RegexTests/MatchTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -1893,5 +1893,31 @@ extension RegexTests {
XCTAssertEqual(matches.count, 3)
}
}
}

func expectCompletion(regex: String, in target: String) {
let expectation = XCTestExpectation(description: "Run the given regex to completion")
Task.init {
let r = try! Regex(regex)
let val = target.matches(of: r).isEmpty
expectation.fulfill()
return val
}
wait(for: [expectation], timeout: 3.0)
}

func testQuantificationForwardProgress() {
expectCompletion(regex: #"(?:(?=a)){1,}"#, in: "aa")
expectCompletion(regex: #"(?:\b)+"#, in: "aa")
expectCompletion(regex: #"(?:(?#comment))+"#, in: "aa")
expectCompletion(regex: #"(?:|)+"#, in: "aa")
expectCompletion(regex: #"(?:\w|)+"#, in: "aa")
expectCompletion(regex: #"(?:\w|(?i-i:))+"#, in: "aa")
expectCompletion(regex: #"(?:\w|(?#comment))+"#, in: "aa")
expectCompletion(regex: #"(?:\w|(?#comment)(?i-i:))+"#, in: "aa")
expectCompletion(regex: #"(?:\w|(?i))+"#, in: "aa")
expectCompletion(regex: #"(a*)*"#, in: "aa")
expectCompletion(regex: #"(a?)*"#, in: "aa")
expectCompletion(regex: #"(a{,4})*"#, in: "aa")
expectCompletion(regex: #"((|)+)*"#, in: "aa")
}
}

0 comments on commit cf6868c

Please sign in to comment.