Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 34 additions & 10 deletions src/pk/asn1/der/utf8/der_decode_utf8_string.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
#ifdef LTC_DER

/**
Store a UTF8 STRING
Decode a UTF8 STRING and recover an array of unicode characters.
@param in The DER encoded UTF8 STRING
@param inlen The size of the DER UTF8 STRING
@param out [out] The array of utf8s stored (one per char)
@param outlen [in/out] The number of utf8s stored
@param out [out] The array of unicode characters (wchar_t*)
@param outlen [in/out] The number of unicode characters in the array
@return CRYPT_OK if successful
*/
int der_decode_utf8_string(const unsigned char *in, unsigned long inlen,
Expand Down Expand Up @@ -51,23 +51,47 @@ int der_decode_utf8_string(const unsigned char *in, unsigned long inlen,
return CRYPT_INVALID_PACKET;
}

/* proceed to decode */
/* proceed to recover unicode characters from utf8 data.
for reference see Section 3 of RFC 3629:

https://tools.ietf.org/html/rfc3629#section-3
*/
for (y = 0; x < inlen; ) {
/* get first byte */
/* read first byte */
tmp = in[x++];

/* count number of bytes */
/* a unicode character is recovered from a sequence of 1 to 4 utf8 bytes.
the form of those bytes must match a row in the following table:

0xxxxxxx
110xxxxx 10xxxxxx
1110xxxx 10xxxxxx 10xxxxxx
11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

the number of leading ones in the first byte (0,2,3,4) determines the
number of remaining bytes to read (0,1,2,3)
*/

/* determine z, the number of leading ones.
this is done by left-shifting tmp, which clears the ms-bits */
for (z = 0; (tmp & 0x80) && (z <= 4); z++, tmp = (tmp << 1) & 0xFF);

if (z == 1 || z > 4 || (x + (z - 1) > inlen)) {
/* z should be in {0,2,3,4} */
if (z == 1 || z > 4) {
return CRYPT_INVALID_PACKET;
}

/* decode, grab upper bits */
/* right-shift tmp to restore least-sig bits */
tmp >>= z;

/* grab remaining bytes */
if (z > 1) { --z; }
/* now update z so it equals the number of additional bytes to read */
if (z > 0) { --z; }

if (x + z > inlen) {
return CRYPT_INVALID_PACKET;
}

/* read remaining bytes */
while (z-- != 0) {
if ((in[x] & 0xC0) != 0x80) {
return CRYPT_INVALID_PACKET;
Expand Down
20 changes: 20 additions & 0 deletions tests/der_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -1603,6 +1603,8 @@ int der_test(void)
static const unsigned char utf8_1_der[] = { 0x0C, 0x07, 0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E };
static const wchar_t utf8_2[] = { 0xD55C, 0xAD6D, 0xC5B4 };
static const unsigned char utf8_2_der[] = { 0x0C, 0x09, 0xED, 0x95, 0x9C, 0xEA, 0xB5, 0xAD, 0xEC, 0x96, 0xB4 };
static const wchar_t utf8_3[] = { 0x05E9, 0x05DC, 0x05D5, 0x05DD };
static const unsigned char utf8_3_der[] = { 0x0C, 0x08, 0xD7, 0xA9, 0xD7, 0x9C, 0xD7, 0x95, 0xD7, 0x9D };

unsigned char utf8_buf[32];
wchar_t utf8_out[32];
Expand Down Expand Up @@ -1961,6 +1963,24 @@ tmp_time.off_hh);
return 1;
}

/* encode it */
x = sizeof(utf8_buf);
DO(der_encode_utf8_string(utf8_3, sizeof(utf8_3) / sizeof(utf8_3[0]), utf8_buf, &x));
if (x != sizeof(utf8_3_der) || memcmp(utf8_buf, utf8_3_der, x)) {
fprintf(stderr, "DER UTF8_3 encoded to %lu bytes\n", x);
for (y = 0; y < x; y++) fprintf(stderr, "%02x ", (unsigned)utf8_buf[y]);
fprintf(stderr, "\n");
return 1;
}
/* decode it */
y = sizeof(utf8_out) / sizeof(utf8_out[0]);
DO(der_decode_utf8_string(utf8_buf, x, utf8_out, &y));
if (y != (sizeof(utf8_3) / sizeof(utf8_3[0])) || memcmp(utf8_3, utf8_out, y * sizeof(wchar_t))) {
fprintf(stderr, "DER UTF8_3 decoded to %lu wchar_t\n", y);
for (x = 0; x < y; x++) fprintf(stderr, "%04lx ", (unsigned long)utf8_out[x]);
fprintf(stderr, "\n");
return 1;
}

der_set_test();
der_flexi_test();
Expand Down