Skip to content

Commit

Permalink
Fix request URL normalisation for bare domain and 8-bit characters (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
c960657 committed Aug 2, 2023
1 parent 2cbdff9 commit 8891d89
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 9 deletions.
10 changes: 8 additions & 2 deletions app/lib/request.rb
Expand Up @@ -76,8 +76,8 @@ class Request
HTTP::URI.new(
scheme: uri.normalized_scheme,
authority: uri.normalized_authority,
path: Addressable::URI.normalize_path(uri.path),
query: uri.query
path: Addressable::URI.normalize_path(encode_non_ascii(uri.path)).presence || '/',
query: encode_non_ascii(uri.query)
)
end

Expand Down Expand Up @@ -151,6 +151,12 @@ def valid_url?(url)
%w(http https).include?(parsed_url.scheme) && parsed_url.host.present?
end

NON_ASCII_PATTERN = /[^\x00-\x7F]+/

def encode_non_ascii(str)
str&.gsub(NON_ASCII_PATTERN) { |substr| CGI.escape(substr.encode(Encoding::UTF_8)) }
end

def http_client
HTTP.use(:auto_inflate).use(normalize_uri: { normalizer: URI_NORMALIZER }).follow(max_hops: 3)
end
Expand Down
67 changes: 60 additions & 7 deletions spec/lib/request_spec.rb
Expand Up @@ -95,6 +95,33 @@
end
end

context 'with bare domain URL' do
let(:url) { 'http://example.com' }

before do
stub_request(:get, 'http://example.com')
end

it 'normalizes path' do
subject.perform do |response|
expect(response.request.uri.path).to eq '/'
end
end

it 'normalizes path used for request signing' do
subject.perform

headers = subject.instance_variable_get(:@headers)
expect(headers[Request::REQUEST_TARGET]).to eq 'get /'
end

it 'normalizes path used in request line' do
subject.perform do |response|
expect(response.request.headline).to eq 'GET / HTTP/1.1'
end
end
end

context 'with unnormalized URL' do
let(:url) { 'HTTP://EXAMPLE.com:80/foo%41%3A?bar=%41%3A#baz' }

Expand All @@ -114,18 +141,31 @@
end
end

it 'does modify path' do
it 'does not modify path' do
subject.perform do |response|
expect(response.request.uri.path).to eq '/foo%41%3A'
end
end

it 'does modify query string' do
it 'does not modify query string' do
subject.perform do |response|
expect(response.request.uri.query).to eq 'bar=%41%3A'
end
end

it 'does not modify path used for request signing' do
subject.perform

headers = subject.instance_variable_get(:@headers)
expect(headers[Request::REQUEST_TARGET]).to eq 'get /foo%41%3A'
end

it 'does not modify path used in request line' do
subject.perform do |response|
expect(response.request.headline).to eq 'GET /foo%41%3A?bar=%41%3A HTTP/1.1'
end
end

it 'strips fragment' do
subject.perform do |response|
expect(response.request.uri.fragment).to be_nil
Expand All @@ -134,22 +174,35 @@
end

context 'with non-ASCII URL' do
let(:url) { 'http://éxample.com/föo?bär=1' }
let(:url) { 'http://éxample.com:81/föo?bär=1' }

before do
stub_request(:get, 'http://xn--xample-9ua.com/f%C3%B6o?b%C3%A4r=1')
stub_request(:get, 'http://xn--xample-9ua.com:81/f%C3%B6o?b%C3%A4r=1')
end

it 'IDN-encodes host' do
subject.perform do |response|
expect(response.request.uri.authority).to eq 'xn--xample-9ua.com'
expect(response.request.uri.authority).to eq 'xn--xample-9ua.com:81'
end
end

it 'percent-escapes path and query string' do
it 'IDN-encodes host in Host header' do
subject.perform do |response|
expect(response.request.headers['Host']).to eq 'xn--xample-9ua.com'
end
end

it 'percent-escapes path used for request signing' do
subject.perform

expect(a_request(:get, 'http://xn--xample-9ua.com/f%C3%B6o?b%C3%A4r=1')).to have_been_made
headers = subject.instance_variable_get(:@headers)
expect(headers[Request::REQUEST_TARGET]).to eq 'get /f%C3%B6o'
end

it 'normalizes path used in request line' do
subject.perform do |response|
expect(response.request.headline).to eq 'GET /f%C3%B6o?b%C3%A4r=1 HTTP/1.1'
end
end
end

Expand Down

0 comments on commit 8891d89

Please sign in to comment.