diff --git a/src/pk/asn1/der/utf8/der_decode_utf8_string.c b/src/pk/asn1/der/utf8/der_decode_utf8_string.c index 2e40645bb..93a5e5ed2 100644 --- a/src/pk/asn1/der/utf8/der_decode_utf8_string.c +++ b/src/pk/asn1/der/utf8/der_decode_utf8_string.c @@ -11,11 +11,11 @@ #ifdef LTC_DER /** - Store a UTF8 STRING + Decode a UTF8 STRING and recover an array of unicode characters. @param in The DER encoded UTF8 STRING @param inlen The size of the DER UTF8 STRING - @param out [out] The array of utf8s stored (one per char) - @param outlen [in/out] The number of utf8s stored + @param out [out] The array of unicode characters (wchar_t*) + @param outlen [in/out] The number of unicode characters in the array @return CRYPT_OK if successful */ int der_decode_utf8_string(const unsigned char *in, unsigned long inlen, @@ -51,23 +51,47 @@ int der_decode_utf8_string(const unsigned char *in, unsigned long inlen, return CRYPT_INVALID_PACKET; } - /* proceed to decode */ + /* proceed to recover unicode characters from utf8 data. + for reference see Section 3 of RFC 3629: + + https://tools.ietf.org/html/rfc3629#section-3 + */ for (y = 0; x < inlen; ) { - /* get first byte */ + /* read first byte */ tmp = in[x++]; - /* count number of bytes */ + /* a unicode character is recovered from a sequence of 1 to 4 utf8 bytes. + the form of those bytes must match a row in the following table: + + 0xxxxxxx + 110xxxxx 10xxxxxx + 1110xxxx 10xxxxxx 10xxxxxx + 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + + the number of leading ones in the first byte (0,2,3,4) determines the + number of remaining bytes to read (0,1,2,3) + */ + + /* determine z, the number of leading ones. + this is done by left-shifting tmp, which clears the ms-bits */ for (z = 0; (tmp & 0x80) && (z <= 4); z++, tmp = (tmp << 1) & 0xFF); - if (z == 1 || z > 4 || (x + (z - 1) > inlen)) { + /* z should be in {0,2,3,4} */ + if (z == 1 || z > 4) { return CRYPT_INVALID_PACKET; } - /* decode, grab upper bits */ + /* right-shift tmp to restore least-sig bits */ tmp >>= z; - /* grab remaining bytes */ - if (z > 1) { --z; } + /* now update z so it equals the number of additional bytes to read */ + if (z > 0) { --z; } + + if (x + z > inlen) { + return CRYPT_INVALID_PACKET; + } + + /* read remaining bytes */ while (z-- != 0) { if ((in[x] & 0xC0) != 0x80) { return CRYPT_INVALID_PACKET; diff --git a/tests/der_test.c b/tests/der_test.c index 633e3cd59..1ded3c2a8 100644 --- a/tests/der_test.c +++ b/tests/der_test.c @@ -1603,6 +1603,8 @@ int der_test(void) static const unsigned char utf8_1_der[] = { 0x0C, 0x07, 0x41, 0xE2, 0x89, 0xA2, 0xCE, 0x91, 0x2E }; static const wchar_t utf8_2[] = { 0xD55C, 0xAD6D, 0xC5B4 }; static const unsigned char utf8_2_der[] = { 0x0C, 0x09, 0xED, 0x95, 0x9C, 0xEA, 0xB5, 0xAD, 0xEC, 0x96, 0xB4 }; + static const wchar_t utf8_3[] = { 0x05E9, 0x05DC, 0x05D5, 0x05DD }; + static const unsigned char utf8_3_der[] = { 0x0C, 0x08, 0xD7, 0xA9, 0xD7, 0x9C, 0xD7, 0x95, 0xD7, 0x9D }; unsigned char utf8_buf[32]; wchar_t utf8_out[32]; @@ -1961,6 +1963,24 @@ tmp_time.off_hh); return 1; } + /* encode it */ + x = sizeof(utf8_buf); + DO(der_encode_utf8_string(utf8_3, sizeof(utf8_3) / sizeof(utf8_3[0]), utf8_buf, &x)); + if (x != sizeof(utf8_3_der) || memcmp(utf8_buf, utf8_3_der, x)) { + fprintf(stderr, "DER UTF8_3 encoded to %lu bytes\n", x); + for (y = 0; y < x; y++) fprintf(stderr, "%02x ", (unsigned)utf8_buf[y]); + fprintf(stderr, "\n"); + return 1; + } + /* decode it */ + y = sizeof(utf8_out) / sizeof(utf8_out[0]); + DO(der_decode_utf8_string(utf8_buf, x, utf8_out, &y)); + if (y != (sizeof(utf8_3) / sizeof(utf8_3[0])) || memcmp(utf8_3, utf8_out, y * sizeof(wchar_t))) { + fprintf(stderr, "DER UTF8_3 decoded to %lu wchar_t\n", y); + for (x = 0; x < y; x++) fprintf(stderr, "%04lx ", (unsigned long)utf8_out[x]); + fprintf(stderr, "\n"); + return 1; + } der_set_test(); der_flexi_test();