Skip to content

Commit

Permalink
Fix some more unicode edge cases with character entities.
Browse files Browse the repository at this point in the history
  • Loading branch information
renggli committed Mar 27, 2022
1 parent 043f989 commit f4c84cb
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 4 deletions.
12 changes: 8 additions & 4 deletions lib/src/xml/entities/default_mapping.dart
Expand Up @@ -27,19 +27,23 @@ class XmlDefaultEntityMapping extends XmlEntityMapping {
if (input.length > 1 && input[0] == '#') {
if (input.length > 2 && (input[1] == 'x' || input[1] == 'X')) {
// Hexadecimal character reference.
final value = int.tryParse(input.substring(2), radix: 16);
return value != null ? String.fromCharCode(value) : null;
return _decodeNumericEntity(input.substring(2), 16);
} else {
// Decimal character reference.
final value = int.tryParse(input.substring(1));
return value != null ? String.fromCharCode(value) : null;
return _decodeNumericEntity(input.substring(1), 10);
}
} else {
// Named character reference.
return entities[input];
}
}

String? _decodeNumericEntity(String input, int radix) {
final value = int.tryParse(input, radix: radix);
if (value == null || value < 0 || 0x10FFFF < value) return null;
return String.fromCharCode(value);
}

@override
String encodeText(String input) =>
input.replaceAllMapped(_textPattern, _textReplace);
Expand Down
14 changes: 14 additions & 0 deletions test/entity_test.dart
Expand Up @@ -27,6 +27,20 @@ void testDefaultMapping(XmlEntityMapping entityMapping) {
expect(entityMapping.decode('&#xInvalid;'), '&#xInvalid;');
expect(entityMapping.decode('&#XInvalid;'), '&#XInvalid;');
});
test('unicode', () {
// https://www.compart.com/en/unicode/U+0000
expect(entityMapping.decode('&#0;'), '\u0000');
expect(entityMapping.decode('&#x0000;'), '\u0000');
// https://www.compart.com/en/unicode/U+10FFFF
expect(entityMapping.decode('&#1114111;'), '\uDBFF\uDFFF');
expect(entityMapping.decode('&#x10FFFF;'), '\uDBFF\uDFFF');
});
test('unicode invalid', () {
expect(entityMapping.decode('&#-1;'), '&#-1;');
expect(entityMapping.decode('&#x-1;'), '&#x-1;');
expect(entityMapping.decode('&#1114112;'), '&#1114112;');
expect(entityMapping.decode('&#x110000;'), '&#x110000;');
});
test('incomplete', () {
expect(entityMapping.decode('&'), '&');
expect(entityMapping.decode('&amp'), '&amp');
Expand Down

0 comments on commit f4c84cb

Please sign in to comment.