Skip to content

Commit

Permalink
The native IDNA implementation freaks out about two adjacent dots.
Browse files Browse the repository at this point in the history
  • Loading branch information
sporkmonger committed Mar 24, 2014
1 parent 9d1e577 commit 350f88e
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 5 deletions.
22 changes: 17 additions & 5 deletions lib/addressable/idna/native.rb
Expand Up @@ -21,23 +21,35 @@
module Addressable
module IDNA
def self.punycode_encode(value)
IDN::Punycode.encode(value)
IDN::Punycode.encode(value.to_s)
end

def self.punycode_decode(value)
IDN::Punycode.decode(value)
IDN::Punycode.decode(value.to_s)
end

def self.unicode_normalize_kc(value)
IDN::Stringprep.nfkc_normalize(value)
IDN::Stringprep.nfkc_normalize(value.to_s)
end

def self.to_ascii(value)
IDN::Idna.toASCII(value)
value.to_s.split('.', -1).map do |segment|
if segment.size > 0
IDN::Idna.toASCII(segment)
else
''
end
end.join('.')
end

def self.to_unicode(value)
IDN::Idna.toUnicode(value)
value.to_s.split('.', -1).map do |segment|
if segment.size > 0
IDN::Idna.toUnicode(segment)
else
''
end
end.join('.')
end
end
end
4 changes: 4 additions & 0 deletions lib/addressable/idna/pure.rb
Expand Up @@ -64,6 +64,7 @@ module IDNA
# Converts from a Unicode internationalized domain name to an ASCII
# domain name as described in RFC 3490.
def self.to_ascii(input)
input = input.to_s unless input.is_a?(String)
input = input.dup
if input.respond_to?(:force_encoding)
input.force_encoding(Encoding::ASCII_8BIT)
Expand All @@ -89,6 +90,7 @@ def self.to_ascii(input)
# Converts from an ASCII domain name to a Unicode internationalized
# domain name as described in RFC 3490.
def self.to_unicode(input)
input = input.to_s unless input.is_a?(String)
parts = input.split('.')
parts.map! do |part|
if part =~ /^#{ACE_PREFIX}/
Expand Down Expand Up @@ -121,6 +123,7 @@ def self.unicode_normalize_kc(input)
# The input string.
# @return [String] The downcased result.
def self.unicode_downcase(input)
input = input.to_s unless input.is_a?(String)
unpacked = input.unpack("U*")
unpacked.map! { |codepoint| lookup_unicode_lowercase(codepoint) }
return unpacked.pack("U*")
Expand Down Expand Up @@ -376,6 +379,7 @@ class PunycodeBigOutput < StandardError; end
class PunycodeOverflow < StandardError; end

def self.punycode_encode(unicode)
unicode = unicode.to_s unless unicode.is_a?(String)
input = unicode.unpack("U*")
output = [0] * (ACE_MAX_LENGTH + 1)
input_length = input.size
Expand Down
12 changes: 12 additions & 0 deletions spec/addressable/idna_spec.rb
Expand Up @@ -126,6 +126,12 @@
"\357\276\257"
).should == "xn--4ud"
end

it "should handle two adjacent '.'s correctly" do
Addressable::IDNA.to_ascii(
"example..host"
).should == "example..host"
end
end

shared_examples_for "converting from ASCII to unicode" do
Expand Down Expand Up @@ -189,6 +195,12 @@
).should == "\341\206\265"
end

it "should handle two adjacent '.'s correctly" do
Addressable::IDNA.to_unicode(
"example..host"
).should == "example..host"
end

it "should normalize 'string' correctly" do
Addressable::IDNA.unicode_normalize_kc(:'string').should == "string"
Addressable::IDNA.unicode_normalize_kc("string").should == "string"
Expand Down

0 comments on commit 350f88e

Please sign in to comment.