Permalink
Browse files

Merge branch 'master' of github.com:sporkmonger/addressable

  • Loading branch information...
sporkmonger committed Feb 4, 2013
2 parents 654cdf0 + cc3ee19 commit 2069f252a747b89adf70ba9673cb3e39789f1ff0
Showing with 134 additions and 13 deletions.
  1. +1 −0 addressable.gemspec
  2. +4 −4 lib/addressable/template.rb
  3. +52 −9 lib/addressable/uri.rb
  4. +76 −0 spec/addressable/uri_spec.rb
  5. +1 −0 tasks/gem.rake
View
@@ -35,4 +35,5 @@ Gem::Specification.new do |s|
s.add_dependency(%q<rspec>, [">= 2.9.0"])
s.add_dependency(%q<launchy>, [">= 0.3.2"])
end
+ s.license = "Apache License 2.0"
end
@@ -515,11 +515,11 @@ def variable_defaults
private
def ordered_variable_defaults
@ordered_variable_defaults ||= (
- expansions, expansion_regexp = parse_template_pattern(pattern)
+ expansions, _ = parse_template_pattern(pattern)
expansions.map do |capture|
- _, operator, varlist = *capture.match(EXPRESSION)
+ _, _, varlist = *capture.match(EXPRESSION)
varlist.split(',').map do |varspec|
- name = varspec[VARSPEC, 1]
+ varspec[VARSPEC, 1]
end
end.flatten
)
@@ -553,7 +553,7 @@ def transform_partial_capture(mapping, capture, processor = nil)
_, operator, varlist = *capture.match(EXPRESSION)
is_first = true
varlist.split(',').inject('') do |acc, varspec|
- _, name, modifier = *varspec.match(VARSPEC)
+ _, name, _ = *varspec.match(VARSPEC)
value = mapping[name]
if value
operator = '&' if !is_first && operator == '?'
View
@@ -315,6 +315,12 @@ def self.join(*uris)
# value is the reserved plus unreserved character classes specified in
# <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
#
+ # @param [Regexp] upcase_encoded
+ # A string of characters that may already be percent encoded, and whose
+ # encodings should be upcased. This allows normalization of percent
+ # encodings for characters not included in the
+ # <code>character_class</code>.
+ #
# @return [String] The encoded component.
#
# @example
@@ -327,7 +333,8 @@ def self.join(*uris)
# )
# => "simple%2Fexample"
def self.encode_component(component, character_class=
- CharacterClasses::RESERVED + CharacterClasses::UNRESERVED)
+ CharacterClasses::RESERVED + CharacterClasses::UNRESERVED,
+ upcase_encoded='')
return nil if component.nil?
begin
@@ -356,9 +363,15 @@ def self.encode_component(component, character_class=
component = component.dup
component.force_encoding(Encoding::ASCII_8BIT)
end
- return component.gsub(character_class) do |sequence|
+ component.gsub!(character_class) do |sequence|

This comment has been minimized.

Show comment Hide comment
@sporkmonger

sporkmonger Apr 15, 2013

Owner

Yeah, this turned out to have some pretty nasty hard-to-debug side-effects ultimately. Lesson learned: Don't accidentally change your inputs.

@sporkmonger

sporkmonger Apr 15, 2013

Owner

Yeah, this turned out to have some pretty nasty hard-to-debug side-effects ultimately. Lesson learned: Don't accidentally change your inputs.

(sequence.unpack('C*').map { |c| "%" + ("%02x" % c).upcase }).join
end
+ if upcase_encoded.length > 0
+ component.gsub!(/%(#{upcase_encoded.chars.map do |char|
+ char.unpack('C*').map { |c| '%02x' % c }.join
+ end.join('|')})/i) { |s| s.upcase }
+ end
+ return component
end
class << self
@@ -380,11 +393,15 @@ class << self
# <code>Addressable::URI</code>. All other values are invalid. Defaults
# to <code>String</code>.
#
+ # @param [String] leave_encoded
+ # A string of characters to leave encoded. If a percent encoded character
+ # in this list is encountered then it will remain percent encoded.
+ #
# @return [String, Addressable::URI]
# The unencoded component or URI.
# The return type is determined by the <code>return_type</code>
# parameter.
- def self.unencode(uri, return_type=String)
+ def self.unencode(uri, return_type=String, leave_encoded='')
return nil if uri.nil?
begin
@@ -398,7 +415,8 @@ def self.unencode(uri, return_type=String)
"got #{return_type.inspect}"
end
result = uri.gsub(/%[0-9a-f]{2}/i) do |sequence|
- sequence[1..3].to_i(16).chr
+ c = sequence[1..3].to_i(16).chr
+ leave_encoded.include?(c) ? sequence : c
end
result.force_encoding("utf-8") if result.respond_to?(:force_encoding)
if return_type == String
@@ -433,6 +451,13 @@ class << self
# value is the reserved plus unreserved character classes specified in
# <a href="http://www.ietf.org/rfc/rfc3986.txt">RFC 3986</a>.
#
+ # @param [String] leave_encoded
+ # When <code>character_class</code> is a <code>String</code> then
+ # <code>leave_encoded</code> is a string of characters that should remain
+ # percent encoded while normalizing the component; if they appear percent
+ # encoded in the original component, then they will be upcased ("%2f"
+ # normalized to "%2F") but otherwise left alone.
+ #
# @return [String] The normalized component.
#
# @example
@@ -447,8 +472,15 @@ class << self
# Addressable::URI::CharacterClasses::UNRESERVED
# )
# => "simple%2Fexample"
+ # Addressable::URI.normalize_component(
+ # "one%20two%2fthree%26four",
+ # "0-9a-zA-Z &/",
+ # "/"
+ # )
+ # => "one two%2Fthree&four"
def self.normalize_component(component, character_class=
- CharacterClasses::RESERVED + CharacterClasses::UNRESERVED)
+ CharacterClasses::RESERVED + CharacterClasses::UNRESERVED,
+ leave_encoded='')
return nil if component.nil?
begin
@@ -462,19 +494,29 @@ def self.normalize_component(component, character_class=
"Expected String or Regexp, got #{character_class.inspect}"
end
if character_class.kind_of?(String)
- character_class = /[^#{character_class}]/
+ leave_re = if leave_encoded.length > 0
+ character_class << '%'
+
+ "|%(?!#{leave_encoded.chars.map do |char|
+ seq = char.unpack('C*').map { |c| '%02x' % c }.join
+ [seq.upcase, seq.downcase]
+ end.flatten.join('|')})"
+ end
+
+ character_class = /[^#{character_class}]#{leave_re}/
end
if component.respond_to?(:force_encoding)
# We can't perform regexps on invalid UTF sequences, but
# here we need to, so switch to ASCII.
component = component.dup
component.force_encoding(Encoding::ASCII_8BIT)
end
- unencoded = self.unencode_component(component)
+ unencoded = self.unencode_component(component, String, leave_encoded)
begin
encoded = self.encode_component(
Addressable::IDNA.unicode_normalize_kc(unencoded),
- character_class
+ character_class,
+ leave_encoded
)
rescue ArgumentError
encoded = self.encode_component(unencoded)
@@ -1391,7 +1433,8 @@ def normalized_query
(self.query.split("&", -1).map do |pair|
Addressable::URI.normalize_component(
pair,
- Addressable::URI::CharacterClasses::QUERY.sub("\\&", "")
+ Addressable::URI::CharacterClasses::QUERY.sub("\\&", ""),
+ '+'
)
end).join("&")
end)
@@ -3160,6 +3160,10 @@ def to_s
it "should have query_values of {'q' => 'a b'}" do
@uri.query_values.should == {'q' => 'a b'}
end
+
+ it "should have a normalized query of 'q=a+b'" do
+ @uri.normalized_query.should == "q=a+b"
+ end
end
describe Addressable::URI, "when parsed from " +
@@ -3175,6 +3179,43 @@ def to_s
it "should have query_values of {'q' => 'a+b'}" do
@uri.query_values.should == {'q' => 'a+b'}
end
+
+ it "should have a normalized query of 'q=a%2Bb'" do
+ @uri.normalized_query.should == "q=a%2Bb"
+ end
+end
+
+describe Addressable::URI, "when parsed from " +
+ "'http://example.com/?v=%7E&w=%&x=%25&y=%2B&z=C%CC%A7'" do
+ before do
+ @uri = Addressable::URI.parse("http://example.com/?v=%7E&w=%&x=%25&y=%2B&z=C%CC%A7")
+ end
+
+ it "should have a normalized query of 'v=~&w=%25&x=%25&y=%2B&z=%C3%87'" do
+ @uri.normalized_query.should == "v=~&w=%25&x=%25&y=%2B&z=%C3%87"
+ end
+end
+
+describe Addressable::URI, "when parsed from " +
+ "'http://example.com/?v=%7E&w=%&x=%25&y=+&z=C%CC%A7'" do
+ before do
+ @uri = Addressable::URI.parse("http://example.com/?v=%7E&w=%&x=%25&y=+&z=C%CC%A7")
+ end
+
+ it "should have a normalized query of 'v=~&w=%25&x=%25&y=+&z=%C3%87'" do
+ @uri.normalized_query.should == "v=~&w=%25&x=%25&y=+&z=%C3%87"
+ end
+end
+
+describe Addressable::URI, "when parsed from " +
+ "'http://example.com/sound%2bvision'" do
+ before do
+ @uri = Addressable::URI.parse("http://example.com/sound%2bvision")
+ end
+
+ it "should have a normalized path of '/sound+vision'" do
+ @uri.normalized_path.should == '/sound+vision'
+ end
end
describe Addressable::URI, "when parsed from " +
@@ -4633,6 +4674,19 @@ def to_str
end
end
+describe Addressable::URI, "when normalizing a string but leaving some characters encoded" do
+ it "should result in correct percent encoded sequence" do
+ Addressable::URI.normalize_component("%58X%59Y%5AZ", "0-9a-zXY", "Y").should ==
+ "XX%59Y%5A%5A"
+ end
+end
+
+describe Addressable::URI, "when encoding a string with existing encodings to upcase" do
+ it "should result in correct percent encoded sequence" do
+ Addressable::URI.encode_component("JK%4c", "0-9A-IKM-Za-z%", "L").should == "%4AK%4C"
+ end
+end
+
describe Addressable::URI, "when encoding a multibyte string" do
it "should result in correct percent encoded sequence" do
Addressable::URI.encode_component("günther").should == "g%C3%BCnther"
@@ -4678,6 +4732,20 @@ def to_str
end
end
+describe Addressable::URI, "when partially unencoding a string" do
+ it "should unencode all characters by default" do
+ Addressable::URI.unencode('%%25~%7e+%2b', String).should == '%%~~++'
+ end
+
+ it "should unencode characters not in leave_encoded" do
+ Addressable::URI.unencode('%%25~%7e+%2b', String, '~').should == '%%~%7e++'
+ end
+
+ it "should leave characters in leave_encoded alone" do
+ Addressable::URI.unencode('%%25~%7e+%2b', String, '%~+').should == '%%25~%7e+%2b'
+ end
+end
+
describe Addressable::URI, "when unencoding a bogus object" do
it "should raise a TypeError" do
(lambda do
@@ -5025,6 +5093,14 @@ def to_str
@uri.query_values = @uri.query_values(Array)
@uri.to_s.should == original_uri
end
+
+ describe 'when a hash with mixed types is assigned to query_values' do
+ it 'should not raise an error' do
+ pending 'Issue #94' do
+ expect { subject.query_values = { "page" => "1", :page => 2 } }.to_not raise_error ArgumentError
+ end
+ end
+ end
end
describe Addressable::URI, "when assigning path values" do
View
@@ -28,6 +28,7 @@ namespace :gem do
s.email = "bob@sporkmonger.com"
s.homepage = RUBY_FORGE_URL
s.rubyforge_project = RUBY_FORGE_PROJECT
+ s.license = "Apache License 2.0"
end
Gem::PackageTask.new(GEM_SPEC) do |p|

0 comments on commit 2069f25

Please sign in to comment.