From 9950bc39365e809b385b3ba94cf3eca2c5be8e02 Mon Sep 17 00:00:00 2001 From: Andreas Rumpf Date: Mon, 13 Jun 2016 13:37:49 +0200 Subject: [PATCH] fixes #3045 --- lib/pure/punycode.nim | 166 +++++++++++++++++++++++++++++ web/news/version_0_15_released.rst | 2 + 2 files changed, 168 insertions(+) create mode 100644 lib/pure/punycode.nim diff --git a/lib/pure/punycode.nim b/lib/pure/punycode.nim new file mode 100644 index 000000000000..4f35de48734a --- /dev/null +++ b/lib/pure/punycode.nim @@ -0,0 +1,166 @@ + +import strutils +import unicode + +# issue #3045 + +const + Base = 36 + TMin = 1 + TMax = 26 + Skew = 38 + Damp = 700 + InitialBias = 72 + InitialN = 128 + Delimiter = '-' + +type + PunyError* = object of Exception + +proc decodeDigit(x: char): int {.raises: [PunyError].} = + if '0' <= x and x <= '9': + result = ord(x) - (ord('0') - 26) + elif 'A' <= x and x <= 'Z': + result = ord(x) - ord('A') + elif 'a' <= x and x <= 'z': + result = ord(x) - ord('a') + else: + raise newException(PunyError, "Bad input") + +proc encodeDigit(digit: int): Rune {.raises: [PunyError].} = + if 0 <= digit and digit < 26: + result = Rune(digit + ord('a')) + elif 26 <= digit and digit < 36: + result = Rune(digit + (ord('0') - 26)) + else: + raise newException(PunyError, "internal error in punycode encoding") + +proc isBasic(c: char): bool = ord(c) < 0x80 +proc isBasic(r: Rune): bool = int(r) < 0x80 + +proc adapt(delta, numPoints: int, first: bool): int = + var d = if first: delta div Damp else: delta div 2 + d += d div numPoints + var k = 0 + while d > ((Base-TMin)*TMax) div 2: + d = d div (Base - TMin) + k += Base + result = k + (Base - TMin + 1) * d div (d + Skew) + +proc encode*(prefix, s: string): string {.raises: [PunyError].} = + ## Encode a string that may contain Unicode. + ## Prepend `prefix` to the result + result = prefix + var (d, n, bias) = (0, InitialN, InitialBias) + var (b, remaining) = (0, 0) + for r in s.runes: + if r.isBasic: + # basic Ascii character + inc b + result.add($r) + else: + # special character + inc remaining + + var h = b + if b > 0: + result.add(Delimiter) # we have some Ascii chars + while remaining != 0: + var m: int = high(int32) + for r in s.runes: + if m > int(r) and int(r) >= n: + m = int(r) + d += (m - n) * (h + 1) + if d < 0: + raise newException(PunyError, "invalid label " & s) + n = m + for r in s.runes: + if int(r) < n: + inc d + if d < 0: + raise newException(PunyError, "invalid label " & s) + continue + if int(r) > n: + continue + var q = d + var k = Base + while true: + var t = k - bias + if t < TMin: + t = TMin + elif t > TMax: + t = TMax + if q < t: + break + result.add($encodeDigit(t + (q - t) mod (Base - t))) + q = (q - t) div (Base - t) + k += Base + result.add($encodeDigit(q)) + bias = adapt(d, h + 1, h == b) + d = 0 + inc h + dec remaining + inc d + inc n + +proc encode*(s: string): string {.raises: [PunyError].} = + ## Encode a string that may contain Unicode. Prefix is empty. + result = encode("", s) + +proc decode*(encoded: string): string {.raises: [PunyError].} = + ## Decode a Punycode-encoded string + var + n = InitialN + i = 0 + bias = InitialBias + var d = rfind(encoded, Delimiter) + result = "" + + if d > 0: + # found Delimiter + for j in 0.. (high(int32) - i) div w: + raise newException(PunyError, "Too large a value: " & $digit) + i += digit * w + var t: int + if k <= bias: + t = TMin + elif k >= bias + TMax: + t = TMax + else: + t = k - bias + if digit < t: + break + w *= Base - t + k += Base + bias = adapt(i - oldi, runelen(result) + 1, oldi == 0) + + if i div (runelen(result) + 1) > high(int32) - n: + raise newException(PunyError, "Value too large") + + n += i div (runelen(result) + 1) + i = i mod (runelen(result) + 1) + insert(result, $Rune(n), i) + inc i + +when isMainModule: + assert(decode(encode("", "bücher")) == "bücher") + assert(decode(encode("münchen")) == "münchen") + assert encode("xn--", "münchen") == "xn--mnchen-3ya" diff --git a/web/news/version_0_15_released.rst b/web/news/version_0_15_released.rst index 5b515e748f95..7adc545d437e 100644 --- a/web/news/version_0_15_released.rst +++ b/web/news/version_0_15_released.rst @@ -10,6 +10,8 @@ Some text here. Changes affecting backwards compatibility ----------------------------------------- +- De-deprecated ``re.nim`` because we have too much code using it + and it got the basic API right. Library Additions -----------------