From c1e5fbbb59101c039e8b657c8052e152c572d5ac Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Wed, 26 Jan 2022 18:35:30 -0800 Subject: [PATCH] Avoid use of regexps for parsing parameter keys This avoids a RegexpError when trying to parse long key input. It also avoids an invalid InvalidParameterError when trying to parse non-UTF8 keys, which was only raised previously because regexps were used without marking them as ASCII-8BIT. Flip the depth parameter to QueryParser#normalize_params to be the current parsing depth, instead of a downward counter from the maximum depth. Fix a bunch of questionable behavior in parameter parsing when using [ and ] outside cases that are expected. Treat [ and ] as normal characters if the occur outside expected usage. This leaves one questionable parameter parsing behavior that also existed previously, which is that: a[b]c is parsed the same as a[b][c]. Fixes #1704. --- lib/rack/multipart/parser.rb | 2 +- lib/rack/query_parser.rb | 56 +++++++++++++++++++++++++++++------- test/spec_request.rb | 16 +++++++++++ test/spec_utils.rb | 19 ++++++++++-- 4 files changed, 78 insertions(+), 15 deletions(-) diff --git a/lib/rack/multipart/parser.rb b/lib/rack/multipart/parser.rb index f06a2ab3f..31724fc32 100644 --- a/lib/rack/multipart/parser.rb +++ b/lib/rack/multipart/parser.rb @@ -205,7 +205,7 @@ def result @collector.each do |part| part.get_data do |data| tag_multipart_encoding(part.filename, part.content_type, part.name, data) - @query_parser.normalize_params(@params, part.name, data, @query_parser.param_depth_limit) + @query_parser.normalize_params(@params, part.name, data, 0) end end MultipartInfo.new @params.to_params_hash, @collector.find_all(&:file?).map(&:body) diff --git a/lib/rack/query_parser.rb b/lib/rack/query_parser.rb index 9a85a4cae..239bcf53e 100644 --- a/lib/rack/query_parser.rb +++ b/lib/rack/query_parser.rb @@ -72,7 +72,7 @@ def parse_nested_query(qs, separator = nil) (qs || '').split(separator ? (COMMON_SEP[separator] || /[#{separator}] */n) : DEFAULT_SEP).each do |p| k, v = p.split('=', 2).map! { |s| unescape(s) } - normalize_params(params, k, v, param_depth_limit) + normalize_params(params, k, v, 0) end end @@ -85,11 +85,37 @@ def parse_nested_query(qs, separator = nil) # the structural types represented by two different parameter names are in # conflict, a ParameterTypeError is raised. def normalize_params(params, name, v, depth) - raise RangeError if depth <= 0 + raise RangeError if depth >= param_depth_limit + + if !name + # nil name, treat same as empty string (required by tests) + k = after = '' + elsif depth == 0 + # Start of parsing, don't treat [] or [ at start of string specially + if start = name.index('[', 1) + # Start of parameter nesting, use part before brackets as key + k = name[0, start] + after = name[start, name.length] + else + # Plain parameter with no nesting + k = name + after = '' + end + elsif name.start_with?('[]') + # Array nesting + k = '[]' + after = name[2, name.length] + elsif name.start_with?('[') && (start = name.index(']', 1)) + # Hash nesting, use the part inside brackets as the key + k = name[1, start-1] + after = name[start+1, name.length] + else + # Probably malformed input, nested but not starting with [ + # treat full name as key for backwards compatibility. + k = name + after = '' + end - name =~ %r(\A[\[\]]*([^\[\]]+)\]*) - k = $1 || '' - after = $' || '' v ||= String.new if k.empty? @@ -101,26 +127,34 @@ def normalize_params(params, name, v, depth) end if after == '' - params[k] = v + if k == '[]' && depth != 0 + return [v] + else + params[k] = v + end elsif after == "[" params[name] = v elsif after == "[]" params[k] ||= [] raise ParameterTypeError, "expected Array (got #{params[k].class.name}) for param `#{k}'" unless params[k].is_a?(Array) params[k] << v - elsif after =~ %r(^\[\]\[([^\[\]]+)\]$) || after =~ %r(^\[\](.+)$) - child_key = $1 + elsif after.start_with?('[]') + # Recognize x[][y] (hash inside array) parameters + unless after[2] == '[' && after.end_with?(']') && (child_key = after[3, after.length-4]) && !child_key.empty? && !child_key.index('[') && !child_key.index(']') + # Handle other nested array parameters + child_key = after[2, after.length] + end params[k] ||= [] raise ParameterTypeError, "expected Array (got #{params[k].class.name}) for param `#{k}'" unless params[k].is_a?(Array) if params_hash_type?(params[k].last) && !params_hash_has_key?(params[k].last, child_key) - normalize_params(params[k].last, child_key, v, depth - 1) + normalize_params(params[k].last, child_key, v, depth + 1) else - params[k] << normalize_params(make_params, child_key, v, depth - 1) + params[k] << normalize_params(make_params, child_key, v, depth + 1) end else params[k] ||= make_params raise ParameterTypeError, "expected Hash (got #{params[k].class.name}) for param `#{k}'" unless params_hash_type?(params[k]) - params[k] = normalize_params(params[k], after, v, depth - 1) + params[k] = normalize_params(params[k], after, v, depth + 1) end params diff --git a/test/spec_request.rb b/test/spec_request.rb index 7371823a3..d0b0df342 100644 --- a/test/spec_request.rb +++ b/test/spec_request.rb @@ -1527,6 +1527,22 @@ def params end } + (24..27).each do |exp| + length = 2**exp + it "handles ASCII NUL input of #{length} bytes" do + mr = Rack::MockRequest.env_for("/", + "REQUEST_METHOD" => 'POST', + :input => "\0"*length) + req = make_request mr + req.query_string.must_equal "" + req.GET.must_be :empty? + keys = req.POST.keys + keys.length.must_equal 1 + keys.first.length.must_equal(length-1) + keys.first.must_equal("\0"*(length-1)) + end + end + class NonDelegate < Rack::Request def delegate?; false; end end diff --git a/test/spec_utils.rb b/test/spec_utils.rb index a26739276..b2d75f271 100644 --- a/test/spec_utils.rb +++ b/test/spec_utils.rb @@ -259,9 +259,8 @@ def assert_nested_query(exp, act) must_raise(Rack::Utils::ParameterTypeError). message.must_equal "expected Array (got String) for param `y'" - lambda { Rack::Utils.parse_nested_query("foo%81E=1") }. - must_raise(Rack::Utils::InvalidParameterError). - message.must_equal "invalid byte sequence in UTF-8" + Rack::Utils.parse_nested_query("foo%81E=1"). + must_equal "foo\x81E"=>"1" end it "only moves to a new array when the full key has been seen" do @@ -276,6 +275,20 @@ def assert_nested_query(exp, act) ] end + it "handles unexpected use of [ and ] in parameter keys as normal characters" do + Rack::Utils.parse_nested_query("[]=1&[a]=2&b[=3&c]=4"). + must_equal "[]" => "1", "[a]" => "2", "b[" => "3", "c]" => "4" + + Rack::Utils.parse_nested_query("d[[]=5&e][]=6&f[[]]=7"). + must_equal "d" => {"[" => "5"}, "e]" => ["6"], "f" => { "[" => { "]" => "7" } } + + Rack::Utils.parse_nested_query("g[h]i=8&j[k]l[m]=9"). + must_equal "g" => { "h" => { "i" => "8" } }, "j" => { "k" => { "l[m]" =>"9" } } + + Rack::Utils.parse_nested_query("l[[[[[[[[]]]]]]]=10"). + must_equal "l"=>{"[[[[[[["=>{"]]]]]]"=>"10"}} + end + it "allow setting the params hash class to use for parsing query strings" do begin default_parser = Rack::Utils.default_query_parser