Skip to content

Commit

Permalink
fix: Support string escape sequences in matching definitions
Browse files Browse the repository at this point in the history
  • Loading branch information
rholshausen committed Jun 21, 2023
1 parent 3c78dc5 commit a2f7fbf
Show file tree
Hide file tree
Showing 2 changed files with 133 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -467,22 +467,98 @@ class MatcherDefinitionParser(private val lexer: MatcherDefinitionLexer) {
fun string(): Result<String?, String> {
lexer.skipWhitespace()
return if (lexer.matchChar('\'')) {
var ch = lexer.peekNextChar()
var ch = lexer.nextChar()
var ch2 = lexer.peekNextChar()
var stringResult = ""
while (ch != '\'' && ch != null) {
while (ch != null && ((ch == '\\' && ch2 == '\'') || (ch != '\''))) {
stringResult += ch
lexer.advance()
ch = lexer.peekNextChar()
if (ch == '\\' && ch2 == '\'') {
stringResult += ch2
lexer.advance()
}
ch = lexer.nextChar()
ch2 = lexer.peekNextChar()
}

if (ch == '\'') {
lexer.advance()
Result.Ok(stringResult)
processRawString(stringResult)
} else {
Result.Err("Unterminated string found at index ${lexer.index}")
}
} else {
Result.Err("Was expecting a string at index ${lexer.index}")
}
}

fun processRawString(rawString: String): Result<String, String> {
val buffer = StringBuilder(rawString.length)
val chars = rawString.chars().iterator()
while (chars.hasNext()) {
val ch = chars.nextInt().toChar()
if (ch == '\\') {
if (chars.hasNext()) {
when (val ch2 = chars.nextInt().toChar()) {
'\\' -> buffer.append(ch)
'b' -> buffer.append('\u0008')
'f' -> buffer.append('\u000C')
'n' -> buffer.append('\n')
'r' -> buffer.append('\r')
't' -> buffer.append('\t')
'u' -> {
if (!chars.hasNext()) {
return Result.Err("Invalid unicode escape found at index ${lexer.index}")
}
val code1 = chars.nextInt().toChar()
val b = StringBuilder(4)
if (code1 == '{') {
var c: Char? = null
while (chars.hasNext()) {
c = chars.nextInt().toChar()
if (c == '}') {
break
}
b.append(c)
}
if (c != '}') {
return Result.Err("Invalid unicode escape found at index ${lexer.index}")
}
} else {
b.append(code1)
if (!chars.hasNext()) {
return Result.Err("Invalid unicode escape found at index ${lexer.index}")
}
val code2 = chars.nextInt().toChar()
b.append(code2)
if (!chars.hasNext()) {
return Result.Err("Invalid unicode escape found at index ${lexer.index}")
}
val code3 = chars.nextInt().toChar()
b.append(code3)
if (!chars.hasNext()) {
return Result.Err("Invalid unicode escape found at index ${lexer.index}")
}
val code4 = chars.nextInt().toChar()
b.append(code4)
}
val code = try {
b.toString().toInt(16)
} catch (e: NumberFormatException) {
return Result.Err("Invalid unicode escape found at index ${lexer.index}")
}
buffer.append(Character.toString(code))
}
else -> {
buffer.append(ch)
buffer.append(ch2)
}
}
} else {
buffer.append(ch)
}
} else {
buffer.append(ch)
}
}
return Result.Ok(buffer.toString())
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,55 @@ class MatchingDefinitionParserSpec extends Specification {
"notEmpty( 'true' )" | 'true' | ValueType.String
'notEmpty(true)' | 'true' | ValueType.Boolean
}

def 'parsing string values'() {
expect:
new MatcherDefinitionParser(new MatcherDefinitionLexer(expression)).string().value == result

where:

expression | result
"''" | ''
"'Example value'" | 'Example value'
"'yyyy-MM-dd HH:mm:ssZZZZZ'" | 'yyyy-MM-dd HH:mm:ssZZZZZ'
"'2020-05-21 16:44:32+10:00'" | '2020-05-21 16:44:32+10:00'
"'\\w{3}\\d+'" | "\\w{3}\\d+"
"'<?xml?><test/>'" | '<?xml?><test/>'
"'\\\$(\\.\\w+)+'" | "\\\$(\\.\\w+)+"
"'we don\\'t currently support parallelograms'" | "we don\\'t currently support parallelograms"
"'\\b backspace'" | "\b backspace"
"'\\f formfeed'" | "\f formfeed"
"'\\n linefeed'" | "\n linefeed"
"'\\r carriage return'" | "\r carriage return"
"'\\t tab'" | "\t tab"
"'\\u0109 unicode hex code'" | "\u0109 unicode hex code"
"'\\u{1DF0B} unicode hex code'" | "${Character.toString(0x1DF0B)} unicode hex code"
"'\\u{1D400} unicode hex code'" | "𝐀 unicode hex code"
}

def 'process raw string'() {
expect:
new MatcherDefinitionParser(new MatcherDefinitionLexer("")).processRawString(expression).value == result

where:

expression | result
'' | ""
'Example value' | 'Example value'
'not escaped \\$(\\.\\w+)+' | 'not escaped \\$(\\.\\w+)+'
'escaped \\\\' | 'escaped \\'
'slash at end \\' | 'slash at end \\'
}

def "process raw string error test"() {
given:
def parser = new MatcherDefinitionParser(new MatcherDefinitionLexer("'invalid escape \\u in string'"))

expect:
parser.processRawString("'invalid escape \\u in string'").errorValue() == "Invalid unicode escape found at index 0"
parser.processRawString('\\u0') instanceof Result.Err
parser.processRawString('\\u00') instanceof Result.Err
parser.processRawString('\\u000') instanceof Result.Err
parser.processRawString('\\u{000') instanceof Result.Err
}
}

0 comments on commit a2f7fbf

Please sign in to comment.