Skip to content

Commit

Permalink
fix: unescape unicode properly
Browse files Browse the repository at this point in the history
  • Loading branch information
mdvorak committed Apr 15, 2023
1 parent e494d09 commit e197ac1
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 31 deletions.
2 changes: 2 additions & 0 deletions fixtures/test-all.properties
Original file line number Diff line number Diff line change
Expand Up @@ -70,3 +70,5 @@ encodedHelloInJapanese = \u3053\u3093\u306b\u3061\u306f
# Using \u without being followed by four hexadecimal digits will throw an exception.
# But with more modern file encodings like UTF-8, you can directly use supported characters.
helloInJapanese = こんにちは
# Key can be also encoded
\u3053\u3093\u306b\u3061\u306f = hello
34 changes: 20 additions & 14 deletions src/properties.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,6 @@ describe('data access', () => {
expect(result).toBeUndefined()

Check failure on line 116 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (latest)

Insert `··`

Check failure on line 116 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (14)

Insert `··`

Check failure on line 116 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (16)

Insert `··`

Check failure on line 116 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (18)

Insert `··`
})

Check failure on line 117 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (latest)

Replace `}` with `··}⏎····`

Check failure on line 117 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (14)

Replace `}` with `··}⏎····`

Check failure on line 117 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (16)

Replace `}` with `··}⏎····`

Check failure on line 117 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (18)

Replace `}` with `··}⏎····`


it('should return last value of duplicate key', () => {
const config: properties.Properties = {
lines: [

Check failure on line 121 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (latest)

Replace `⏎··········'key1=foo1',⏎··········'key2=foo2',⏎··········'key1=foo3'⏎········` with `'key1=foo1',·'key2=foo2',·'key1=foo3'`

Check failure on line 121 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (14)

Replace `⏎··········'key1=foo1',⏎··········'key2=foo2',⏎··········'key1=foo3'⏎········` with `'key1=foo1',·'key2=foo2',·'key1=foo3'`

Check failure on line 121 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (16)

Replace `⏎··········'key1=foo1',⏎··········'key2=foo2',⏎··········'key1=foo3'⏎········` with `'key1=foo1',·'key2=foo2',·'key1=foo3'`

Check failure on line 121 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (18)

Replace `⏎··········'key1=foo1',⏎··········'key2=foo2',⏎··········'key1=foo3'⏎········` with `'key1=foo1',·'key2=foo2',·'key1=foo3'`
Expand All @@ -129,6 +128,25 @@ describe('data access', () => {
const result = properties.get(config, 'key1')
expect(result).toBe('foo3')
})

it('should throw on invalid unicode sequence in key', () => {
const config: properties.Properties = {
lines: ['foo\\u23a=bar']
}

expect(() => properties.get(config, 'foo')).toThrowError()
})

it.each([

Check failure on line 140 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (latest)

Replace `⏎······['foo=bar\\u23a'],⏎······['foo=bar\\u23ax5']` with `['foo=bar\\u23a'],·['foo=bar\\u23ax5']])(`

Check failure on line 140 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (14)

Replace `⏎······['foo=bar\\u23a'],⏎······['foo=bar\\u23ax5']` with `['foo=bar\\u23a'],·['foo=bar\\u23ax5']])(`

Check failure on line 140 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (16)

Replace `⏎······['foo=bar\\u23a'],⏎······['foo=bar\\u23ax5']` with `['foo=bar\\u23a'],·['foo=bar\\u23ax5']])(`

Check failure on line 140 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (18)

Replace `⏎······['foo=bar\\u23a'],⏎······['foo=bar\\u23ax5']` with `['foo=bar\\u23a'],·['foo=bar\\u23ax5']])(`
['foo=bar\\u23a'],
['foo=bar\\u23ax5']
])('should throw on invalid unicode sequence in value %s', (line) => {

Check failure on line 143 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (latest)

Replace `])('should·throw·on·invalid·unicode·sequence·in·value·%s',·(line)` with `··'should·throw·on·invalid·unicode·sequence·in·value·%s',⏎······line`

Check failure on line 143 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (14)

Replace `])('should·throw·on·invalid·unicode·sequence·in·value·%s',·(line)` with `··'should·throw·on·invalid·unicode·sequence·in·value·%s',⏎······line`

Check failure on line 143 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (16)

Replace `])('should·throw·on·invalid·unicode·sequence·in·value·%s',·(line)` with `··'should·throw·on·invalid·unicode·sequence·in·value·%s',⏎······line`

Check failure on line 143 in src/properties.spec.ts

View workflow job for this annotation

GitHub Actions / Build (18)

Replace `])('should·throw·on·invalid·unicode·sequence·in·value·%s',·(line)` with `··'should·throw·on·invalid·unicode·sequence·in·value·%s',⏎······line`
const config: properties.Properties = {
lines: [line]
}

expect(() => properties.get(config, 'foo')).toThrowError()
})
})

describe('set value', () => {
Expand Down Expand Up @@ -364,6 +382,7 @@ describe('data access', () => {
'evenLikeThis\\': '',
hello: 'hello',
helloInJapanese: 'こんにちは',
'こんにちは': 'hello',
keyWithBackslashes: 'This has random backslashes',
'keyWithDelimiters:= ': 'This is the value for the key "keyWithDelimiters:= "',
'keyWitheven\\': 'this colon is not escaped',
Expand All @@ -383,19 +402,6 @@ describe('data access', () => {
})
})

describe('unescape', () => {
it.each([
['foo', 'foo'],
['\\:\\#\\!\\ ', ':#! '],
['a\\r\\f\\n\\t\\\\\\ ', 'a\r\f\n\t\\ '],
['\\u0000\\u0001', '\0\u0001'],
['\\u3053\\u3093\\u306b\\u3061\\u306f', 'こんにちは']
])('should unescape string "%s" to "%s"', (str: string, expected: string) => {
const result = properties.unescape(str)
expect(result).toEqual(expected)
})
})

describe('escapeKey', () => {
it.each([
['foo1', 'foo1'],
Expand Down
80 changes: 63 additions & 17 deletions src/properties.ts
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ function* listPairs(lines: string[]): Generator<{
sep: string,
value: string,
skipSpace: boolean,
escapedNext: boolean
escapedNext: boolean,
unicode?: string
} =>
({state: State.START, start: -1, key: '', sep: '', value: '', skipSpace: true, escapedNext: false})

Expand All @@ -245,6 +246,20 @@ function* listPairs(lines: string[]): Generator<{
}
state.skipSpace = false

// Parse unicode
if (state.unicode) {
// Handle incomplete sequence
if (char === 'EOL') {
throw new Error(`Invalid unicode sequence at line ${line}`)
}

// Append and consume until it has correct length
state.unicode += char
if (state.unicode.length < 6) {
continue
}
}

// First char on the line
if (state.state === State.START) {
switch (char) {
Expand Down Expand Up @@ -272,6 +287,13 @@ function* listPairs(lines: string[]): Generator<{

// Key
if (state.state === State.KEY) {
// Special unicode handling
if (state.unicode) {
state.key += parseUnicode(state.unicode, line)
state.unicode = undefined
continue
}

switch (char) {
case 'EOL':
if (state.escapedNext) {
Expand Down Expand Up @@ -307,10 +329,20 @@ function* listPairs(lines: string[]): Generator<{
}
break
default:
// Normal char
// TODO handle unicode
state.key += state.escapedNext ? unescapeChar(char) : char
state.escapedNext = false
// Escape sequence
if (state.escapedNext) {
state.escapedNext = false
if (char === 'u') {
// Unicode
state.unicode = '0x'
} else {
// Special char
state.key += unescapeChar(char)
}
} else {
// Normal char
state.key += char
}
break
}
}
Expand Down Expand Up @@ -346,6 +378,13 @@ function* listPairs(lines: string[]): Generator<{

// Value
if (state.state === State.VALUE) {
// Special unicode handling
if (state.unicode) {
state.value += parseUnicode(state.unicode, line)
state.unicode = undefined
continue
}

switch (char) {
case 'EOL':
if (state.escapedNext) {
Expand All @@ -369,10 +408,19 @@ function* listPairs(lines: string[]): Generator<{
}
break
default:
// Normal char
// TODO handle unicode
state.value += state.escapedNext ? unescapeChar(char) : char
state.escapedNext = false
if (state.escapedNext) {
state.escapedNext = false
if (char === 'u') {
// Unicode
state.unicode = '0x'
} else {
// Special char
state.value += unescapeChar(char)
}
} else {
// Normal char
state.value += char
}
break
}
}
Expand All @@ -395,14 +443,12 @@ const unescapeChar = (c: string): string => {
}
}

/**
* Unescape key or value.
*
* @param str Escaped string.
* @return Actual string.
*/
export const unescape = (str: string): string =>
str.replace(/\\(.)/g, s => unescapeChar(s[1]))
const parseUnicode = (sequence: string, line: number): string => {
if (!sequence.match(/^0x[\da-fA-F]{4}$/)) {
throw new Error(`Invalid unicode sequence at line ${line}`)
}
return String.fromCharCode(parseInt(sequence, 16))
}

/**
* Escape property key.
Expand Down

0 comments on commit e197ac1

Please sign in to comment.