Skip to content

Commit

Permalink
Split form/query parsing into two steps (#2038)
Browse files Browse the repository at this point in the history
* Split form/query parsing into two steps

First we parse the raw input into a stream of [key, value] pairs, and
only after that do we expand that into the deep params hash.

This allows a user to operate directly on the pair stream if they need
to apply different semantics, without needing to rewind the input, and
without creating a conflict with anything else (like a middleware) that
wants to use Rack's standard GET / POST hash format.
  • Loading branch information
matthewd authored and ioquatix committed Mar 12, 2023
1 parent 91f0c4b commit 7215fa7
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 42 deletions.
2 changes: 2 additions & 0 deletions lib/rack/constants.rb
Expand Up @@ -54,11 +54,13 @@ module Rack
RACK_RESPONSE_FINISHED = 'rack.response_finished'
RACK_REQUEST_FORM_INPUT = 'rack.request.form_input'
RACK_REQUEST_FORM_HASH = 'rack.request.form_hash'
RACK_REQUEST_FORM_PAIRS = 'rack.request.form_pairs'
RACK_REQUEST_FORM_VARS = 'rack.request.form_vars'
RACK_REQUEST_FORM_ERROR = 'rack.request.form_error'
RACK_REQUEST_COOKIE_HASH = 'rack.request.cookie_hash'
RACK_REQUEST_COOKIE_STRING = 'rack.request.cookie_string'
RACK_REQUEST_QUERY_HASH = 'rack.request.query_hash'
RACK_REQUEST_QUERY_PAIRS = 'rack.request.query_pairs'
RACK_REQUEST_QUERY_STRING = 'rack.request.query_string'
RACK_METHODOVERRIDE_ORIGINAL_METHOD = 'rack.methodoverride.original_method'
end
25 changes: 25 additions & 0 deletions lib/rack/multipart.rb
Expand Up @@ -13,6 +13,31 @@ module Rack
module Multipart
MULTIPART_BOUNDARY = "AaB03x"

# Accumulator for multipart form data, conforming to the QueryParser API.
# In future, the Parser could return the pair list directly, but that would
# change its API.
class ParamList # :nodoc:
def self.make_params
new
end

def self.normalize_params(params, key, value)
params << [key, value]
end

def initialize
@pairs = []
end

def <<(pair)
@pairs << pair
end

def to_params_hash
@pairs
end
end

class << self
def parse_multipart(env, params = Rack::Utils.default_query_parser)
io = env[RACK_INPUT]
Expand Down
51 changes: 34 additions & 17 deletions lib/rack/query_parser.rb
Expand Up @@ -37,19 +37,42 @@ def initialize(params_class, _key_space_limit=(not_deprecated = true; nil), para
@param_depth_limit = param_depth_limit
end

# Stolen from Mongrel, with some small modifications:
# Originally stolen from Mongrel, now with some modifications:
# Parses a query string by breaking it up at the '&'. You can also use this
# to parse cookies by changing the characters used in the second parameter
# (which defaults to '&').
def parse_query(qs, separator = nil, &unescaper)
unescaper ||= method(:unescape)
#
# Returns an array of 2-element arrays, where the first element is the
# key and the second element is the value.
def split_query(qs, separator = nil, &unescaper)
pairs = []

if qs && !qs.empty?
unescaper ||= method(:unescape)

qs.split(separator ? (COMMON_SEP[separator] || /[#{separator}] */n) : DEFAULT_SEP).each do |p|
next if p.empty?
pair = p.split('=', 2).map!(&unescaper)
pair << nil if pair.length == 1
pairs << pair
end
end

params = make_params
pairs
rescue ArgumentError => e
raise InvalidParameterError, e.message, e.backtrace
end

(qs || '').split(separator ? (COMMON_SEP[separator] || /[#{separator}] */n) : DEFAULT_SEP).each do |p|
next if p.empty?
k, v = p.split('=', 2).map!(&unescaper)
# Parses a query string by breaking it up at the '&'. You can also use this
# to parse cookies by changing the characters used in the second parameter
# (which defaults to '&').
#
# Returns a hash where each value is a string (when a key only appears once)
# or an array of strings (when a key appears more than once).
def parse_query(qs, separator = nil, &unescaper)
params = make_params

split_query(qs, separator, &unescaper).each do |k, v|
if cur = params[k]
if cur.class == Array
params[k] << v
Expand All @@ -61,7 +84,7 @@ def parse_query(qs, separator = nil, &unescaper)
end
end

return params.to_h
params.to_h
end

# parse_nested_query expands a query string into structural types. Supported
Expand All @@ -72,17 +95,11 @@ def parse_query(qs, separator = nil, &unescaper)
def parse_nested_query(qs, separator = nil)
params = make_params

unless qs.nil? || qs.empty?
(qs || '').split(separator ? (COMMON_SEP[separator] || /[#{separator}] */n) : DEFAULT_SEP).each do |p|
k, v = p.split('=', 2).map! { |s| unescape(s) }

_normalize_params(params, k, v, 0)
end
split_query(qs, separator).each do |k, v|
_normalize_params(params, k, v, 0)
end

return params.to_h
rescue ArgumentError => e
raise InvalidParameterError, e.message, e.backtrace
params.to_h
end

# normalize_params recursively expands parameters into structural types. If
Expand Down
84 changes: 69 additions & 15 deletions lib/rack/request.rb
Expand Up @@ -483,11 +483,22 @@ def parseable_data?
# Returns the data received in the query string.
def GET
if get_header(RACK_REQUEST_QUERY_STRING) == query_string
get_header(RACK_REQUEST_QUERY_HASH)
if query_hash = get_header(RACK_REQUEST_QUERY_HASH)
return query_hash
end
end

set_header(RACK_REQUEST_QUERY_HASH, expand_params(query_param_list))
end

def query_param_list
if get_header(RACK_REQUEST_QUERY_STRING) == query_string
get_header(RACK_REQUEST_QUERY_PAIRS)
else
query_hash = parse_query(query_string, '&')
set_header(RACK_REQUEST_QUERY_STRING, query_string)
set_header(RACK_REQUEST_QUERY_HASH, query_hash)
query_pairs = split_query(query_string, '&')
set_header RACK_REQUEST_QUERY_STRING, query_string
set_header RACK_REQUEST_QUERY_HASH, nil
set_header(RACK_REQUEST_QUERY_PAIRS, query_pairs)
end
end

Expand All @@ -496,32 +507,53 @@ def GET
# This method support both application/x-www-form-urlencoded and
# multipart/form-data.
def POST
if get_header(RACK_REQUEST_FORM_INPUT).equal?(get_header(RACK_INPUT))
if form_hash = get_header(RACK_REQUEST_FORM_HASH)
return form_hash
end
end

set_header(RACK_REQUEST_FORM_HASH, expand_params(body_param_list))
end

def body_param_list
if error = get_header(RACK_REQUEST_FORM_ERROR)
raise error.class, error.message, cause: error.cause
end

begin
if get_header(RACK_INPUT).nil?
raise "Missing rack.input"
elsif get_header(RACK_REQUEST_FORM_INPUT) == get_header(RACK_INPUT)
get_header(RACK_REQUEST_FORM_HASH)
rack_input = get_header(RACK_INPUT)

form_pairs = nil

# If the form data has already been memoized from the same
# input:
if get_header(RACK_REQUEST_FORM_INPUT).equal?(rack_input)
if form_pairs = get_header(RACK_REQUEST_FORM_PAIRS)
return form_pairs
end
end

if rack_input.nil?
form_pairs = []
elsif form_data? || parseable_data?
unless set_header(RACK_REQUEST_FORM_HASH, parse_multipart)
form_vars = get_header(RACK_INPUT).read
unless form_pairs = Rack::Multipart.extract_multipart(self, Rack::Multipart::ParamList)
form_vars = rack_input.read

# Fix for Safari Ajax postings that always append \0
# form_vars.sub!(/\0\z/, '') # performance replacement:
form_vars.slice!(-1) if form_vars.end_with?("\0")

set_header RACK_REQUEST_FORM_VARS, form_vars
set_header RACK_REQUEST_FORM_HASH, parse_query(form_vars, '&')
form_pairs = split_query(form_vars, '&')
end
set_header RACK_REQUEST_FORM_INPUT, get_header(RACK_INPUT)
get_header RACK_REQUEST_FORM_HASH
else
set_header RACK_REQUEST_FORM_INPUT, get_header(RACK_INPUT)
set_header(RACK_REQUEST_FORM_HASH, {})
form_pairs = []
end

set_header RACK_REQUEST_FORM_INPUT, rack_input
set_header RACK_REQUEST_FORM_HASH, nil
set_header(RACK_REQUEST_FORM_PAIRS, form_pairs)
rescue => error
set_header(RACK_REQUEST_FORM_ERROR, error)
raise
Expand Down Expand Up @@ -661,6 +693,28 @@ def parse_multipart
Rack::Multipart.extract_multipart(self, query_parser)
end

def split_query(query, d = '&')
query_parser = query_parser()
unless query_parser.respond_to?(:split_query)
query_parser = Utils.default_query_parser
unless query_parser.respond_to?(:split_query)
query_parser = QueryParser.make_default(0)
end
end

query_parser.split_query(query, d)
end

def expand_params(pairs, query_parser = query_parser())
params = query_parser.make_params

pairs.each do |key, value|
query_parser.normalize_params(params, key, value)
end

params.to_params_hash
end

def split_header(value)
value ? value.strip.split(/[,\s]+/) : []
end
Expand Down
16 changes: 6 additions & 10 deletions test/spec_request.rb
Expand Up @@ -572,11 +572,12 @@ def self.req(headers)
end

it "parse the query string" do
req = make_request(Rack::MockRequest.env_for("/?foo=bar&quux=bla"))
req.query_string.must_equal "foo=bar&quux=bla"
req.GET.must_equal "foo" => "bar", "quux" => "bla"
req.POST.must_be :empty?
req.params.must_equal "foo" => "bar", "quux" => "bla"
request = make_request(Rack::MockRequest.env_for("/?foo=bar&quux=bla&nothing&empty="))
request.query_string.must_equal "foo=bar&quux=bla&nothing&empty="
request.GET.must_equal "foo" => "bar", "quux" => "bla", "nothing" => "", "empty" => ""
request.POST.must_be :empty?
request.params.must_equal "foo" => "bar", "quux" => "bla", "nothing" => "", "empty" => ""
request.query_param_list.must_equal [["foo", "bar"], ["quux", "bla"], ["nothing", nil], ["empty", ""]]
end

it "not truncate query strings containing semi-colons #543 only in POST" do
Expand Down Expand Up @@ -696,11 +697,6 @@ def initialize(*)
message.must_equal "invalid %-encoding (a%)"
end

it "raise if rack.input is missing" do
req = make_request({})
lambda { req.POST }.must_raise RuntimeError
end

it "parse POST data when method is POST and no content-type given" do
req = make_request \
Rack::MockRequest.env_for("/?foo=quux",
Expand Down

0 comments on commit 7215fa7

Please sign in to comment.