Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implementation of Email Address International support (RFC 8398) in OpenSSL #9654

Closed
wants to merge 10 commits into from
3 changes: 2 additions & 1 deletion crypto/asn1/tbl_standard.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ static const ASN1_STRING_TABLE tbl_standard[] = {
{NID_SNILS, 1, 11, B_ASN1_NUMERICSTRING, STABLE_NO_MASK},
{NID_countryCode3c, 3, 3, B_ASN1_PRINTABLESTRING, STABLE_NO_MASK},
{NID_countryCode3n, 3, 3, B_ASN1_NUMERICSTRING, STABLE_NO_MASK},
{NID_dnsName, 0, -1, B_ASN1_UTF8STRING, STABLE_NO_MASK}
{NID_dnsName, 0, -1, B_ASN1_UTF8STRING, STABLE_NO_MASK},
{NID_id_on_SmtpUTF8Mailbox, 1, ub_email_address, B_ASN1_UTF8STRING, STABLE_NO_MASK}
};

1 change: 1 addition & 0 deletions crypto/build.info
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ SOURCE[../libcrypto]=$UTIL_COMMON \
mem.c mem_sec.c \
cversion.c info.c cpt_err.c ebcdic.c uid.c o_time.c o_dir.c \
o_fopen.c getenv.c o_init.c init.c trace.c provider.c \
punycode.c \
$UPLINKSRC
SOURCE[../providers/libfips.a]=$UTIL_COMMON
SOURCE[../providers/liblegacy.a]=$UTIL_COMMON
Expand Down
338 changes: 338 additions & 0 deletions crypto/punycode.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,338 @@
/*
* Copyright 2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/

#include <stddef.h>
beldmit marked this conversation as resolved.
Show resolved Hide resolved
#include <string.h>
#include <stdio.h>
#include <openssl/e_os2.h>
#include "crypto/punycode.h"

static const unsigned int base = 36;
static const unsigned int tmin = 1;
static const unsigned int tmax = 26;
static const unsigned int skew = 38;
static const unsigned int damp = 700;
static const unsigned int initial_bias = 72;
static const unsigned int initial_n = 0x80;
static const unsigned int maxint = 0xFFFFFFFF;
static const char delimiter = '-';

#define LABEL_BUF_SIZE 512

/*-
* Pseudocode:
*
* function adapt(delta,numpoints,firsttime):
* if firsttime then let delta = delta div damp
* else let delta = delta div 2
* let delta = delta + (delta div numpoints)
* let k = 0
* while delta > ((base - tmin) * tmax) div 2 do begin
* let delta = delta div (base - tmin)
* let k = k + base
* end
* return k + (((base - tmin + 1) * delta) div (delta + skew))
*/

static int adapt(unsigned int delta, unsigned int numpoints,
unsigned int firsttime)
{
unsigned int k = 0;

delta = (firsttime) ? delta / damp : delta / 2;
delta = delta + delta / numpoints;

while (delta > ((base - tmin) * tmax) / 2) {
delta = delta / (base - tmin);
k = k + base;
}

return k + (((base - tmin + 1) * delta) / (delta + skew));
}

static ossl_inline int is_basic(unsigned int a)
{
return (a < 0x80) ? 1 : 0;
}

/*-
* code points digit-values
* ------------ ----------------------
* 41..5A (A-Z) = 0 to 25, respectively
* 61..7A (a-z) = 0 to 25, respectively
* 30..39 (0-9) = 26 to 35, respectively
*/
static ossl_inline int digit_decoded(const unsigned char a)
{
if (a >= 0x41 && a <= 0x5A)
return a - 0x41;

if (a >= 0x61 && a <= 0x7A)
return a - 0x61;

if (a >= 0x30 && a <= 0x39)
return a - 0x30 + 26;

return -1;
}

/*-
* Pseudocode:
*
* function ossl_punycode_decode
* let n = initial_n
* let i = 0
* let bias = initial_bias
* let output = an empty string indexed from 0
* consume all code points before the last delimiter (if there is one)
* and copy them to output, fail on any non-basic code point
* if more than zero code points were consumed then consume one more
* (which will be the last delimiter)
* while the input is not exhausted do begin
* let oldi = i
* let w = 1
* for k = base to infinity in steps of base do begin
* consume a code point, or fail if there was none to consume
* let digit = the code point's digit-value, fail if it has none
* let i = i + digit * w, fail on overflow
* let t = tmin if k <= bias {+ tmin}, or
* tmax if k >= bias + tmax, or k - bias otherwise
* if digit < t then break
* let w = w * (base - t), fail on overflow
* end
* let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?)
* let n = n + i div (length(output) + 1), fail on overflow
* let i = i mod (length(output) + 1)
* {if n is a basic code point then fail}
* insert n into output at position i
* increment i
* end
*/

int ossl_punycode_decode(const char *pEncoded, const size_t enc_len,
unsigned int *pDecoded, unsigned int *pout_length)
{
unsigned int n = initial_n;
unsigned int i = 0;
unsigned int bias = initial_bias;
size_t processed_in = 0, written_out = 0;
unsigned int max_out = *pout_length;

unsigned int basic_count = 0;
unsigned int loop;

for (loop = 0; loop < enc_len; loop++) {
if (pEncoded[loop] == delimiter)
basic_count = loop;
}

if (basic_count > 0) {
if (basic_count > max_out)
return 0;

for (loop = 0; loop < basic_count; loop++) {
if (is_basic(pEncoded[loop]) == 0)
return 0;

pDecoded[loop] = pEncoded[loop];
written_out++;
}
processed_in = basic_count + 1;
}

for (loop = processed_in; loop < enc_len;) {
unsigned int oldi = i;
unsigned int w = 1;
unsigned int k, t;
int digit;

for (k = base;; k += base) {
if (loop >= enc_len)
return 0;

digit = digit_decoded(pEncoded[loop]);
loop++;

if (digit < 0)
return 0;
if ((unsigned int)digit > (maxint - i) / w)
return 0;

i = i + digit * w;
t = (k <= bias) ? tmin : (k >= bias + tmax) ? tmax : k - bias;

if ((unsigned int)digit < t)
break;

if (w > maxint / (base - t))
return 0;
w = w * (base - t);
}

bias = adapt(i - oldi, written_out + 1, (oldi == 0));
if (i / (written_out + 1) > maxint - n)
return 0;
n = n + i / (written_out + 1);
i %= (written_out + 1);

if (written_out > max_out)
return 0;

memmove(pDecoded + i + 1, pDecoded + i,
(written_out - i) * sizeof *pDecoded);
pDecoded[i] = n;
i++;
written_out++;
}

*pout_length = written_out;
return 1;
}

/*
* Encode a code point using UTF-8
* return number of bytes on success, 0 on failure
* (also produces U+FFFD, which uses 3 bytes on failure)
*/
static int codepoint2utf8(unsigned char *out, unsigned long utf)
{
if (utf <= 0x7F) {
/* Plain ASCII */
out[0] = (unsigned char)utf;
out[1] = 0;
return 1;
} else if (utf <= 0x07FF) {
/* 2-byte unicode */
out[0] = (unsigned char)(((utf >> 6) & 0x1F) | 0xC0);
out[1] = (unsigned char)(((utf >> 0) & 0x3F) | 0x80);
out[2] = 0;
return 2;
} else if (utf <= 0xFFFF) {
/* 3-byte unicode */
out[0] = (unsigned char)(((utf >> 12) & 0x0F) | 0xE0);
out[1] = (unsigned char)(((utf >> 6) & 0x3F) | 0x80);
out[2] = (unsigned char)(((utf >> 0) & 0x3F) | 0x80);
out[3] = 0;
return 3;
} else if (utf <= 0x10FFFF) {
/* 4-byte unicode */
out[0] = (unsigned char)(((utf >> 18) & 0x07) | 0xF0);
out[1] = (unsigned char)(((utf >> 12) & 0x3F) | 0x80);
out[2] = (unsigned char)(((utf >> 6) & 0x3F) | 0x80);
out[3] = (unsigned char)(((utf >> 0) & 0x3F) | 0x80);
out[4] = 0;
return 4;
} else {
/* error - use replacement character */
out[0] = (unsigned char)0xEF;
out[1] = (unsigned char)0xBF;
out[2] = (unsigned char)0xBD;
out[3] = 0;
return 0;
}
}

/*-
* Return values:
* 1 - ok, *outlen contains valid buf length
* 0 - ok but buf was too short, *outlen contains valid buf length
* -1 - bad string passed
*/

int ossl_a2ulabel(const char *in, char *out, size_t *outlen)
{
/*-
* Domain name has some parts consisting of ASCII chars joined with dot.
* If a part is shorter than 5 chars, it becomes U-label as is.
* If it does not start with xn--, it becomes U-label as is.
* Otherwise we try to decode it.
*/
char *outptr = out;
const char *inptr = in;
size_t size = 0;
int result = 1;

unsigned int buf[LABEL_BUF_SIZE]; /* It's a hostname */
if (out == NULL)
result = 0;

while (1) {
char *tmpptr = strchr(inptr, '.');
size_t delta = (tmpptr) ? (size_t)(tmpptr - inptr) : strlen(inptr);

if (strncmp(inptr, "xn--", 4) != 0) {
size += delta + 1;

if (size >= *outlen - 1)
result = 0;

if (result > 0) {
memcpy(outptr, inptr, delta + 1);
outptr += delta + 1;
}
} else {
unsigned int bufsize = LABEL_BUF_SIZE;
unsigned int i;

if (ossl_punycode_decode(inptr + 4, delta - 4, buf, &bufsize) <= 0)
return -1;

for (i = 0; i < bufsize; i++) {
unsigned char seed[6];
size_t utfsize = codepoint2utf8(seed, buf[i]);
if (utfsize == 0)
return -1;

size += utfsize;
if (size >= *outlen - 1)
result = 0;

if (result > 0) {
memcpy(outptr, seed, utfsize);
outptr += utfsize;
}
}

if (tmpptr != NULL) {
*outptr = '.';
outptr++;
size++;
if (size >= *outlen - 1)
result = 0;
}
}

if (tmpptr == NULL)
break;

inptr = tmpptr + 1;
}

return result;
}

/*-
* a MUST be A-label
* u MUST be U-label
* Returns 0 if compared values are equal
* 1 if not
* -1 in case of errors
*/

int ossl_a2ucompare(const char *a, const char *u)
{
char a_ulabel[LABEL_BUF_SIZE];
size_t a_size = sizeof(a_ulabel);

if (ossl_a2ulabel(a, a_ulabel, &a_size) <= 0) {
return -1;
}

return (strcmp(a_ulabel, u) == 0) ? 0 : 1;
}