Skip to content

Commit

Permalink
Fix decoding of USV greater than U+110000
Browse files Browse the repository at this point in the history
Add test cases too
  • Loading branch information
tim-eves committed Mar 1, 2017
1 parent 0159407 commit 348c11e
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
4 changes: 2 additions & 2 deletions src/inc/UtfCodec.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ struct _utf_codec<8>
private:
static const int8 sz_lut[16];
static const byte mask_lut[5];

static const uchar_t limit = 0x110000;

public:
typedef uint8 codeunit_t;
Expand Down Expand Up @@ -157,7 +157,7 @@ struct _utf_codec<8>
case 0: l = -1; return 0xFFFD;
}

if (l != seq_sz || toolong)
if (l != seq_sz || toolong || u >= limit)
{
l = -l;
return 0xFFFD;
Expand Down
3 changes: 3 additions & 0 deletions tests/utftest/utftest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ struct test8
unsigned char str[12];
};
struct test8 tests8[] = {
{ 0, 0, {0xF4, 0x90, 0x80, 0x80, 0, 0, 0, 0, 0, 0, 0, 0} }, // bad(4) [U+110000]
{ 0, 0, {0xC0, 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, // bad(4) [U+110000]
{ 0, 0, {0xA0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} }, // bad(4) [U+110000]
{ 4, -1, {0x7F, 0xDF, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0, 0} }, // U+7F, U+7FF, U+FFFF, U+10FFF
{ 2, 3, {0x7F, 0xDF, 0xBF, 0xF0, 0x8F, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0} }, // U+7F, U+7FF, long(U+FFFF), U+10FFF
{ 1, 1, {0x7F, 0xE0, 0x9F, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0} }, // U+7F, long(U+7FF), U+FFFF, U+10FFF
Expand Down

0 comments on commit 348c11e

Please sign in to comment.