diff --git a/context/index.yaml b/context/index.yaml index c22c9b2f..ace76e06 100644 --- a/context/index.yaml +++ b/context/index.yaml @@ -20,14 +20,6 @@ files: - path: middleware.md title: Middleware description: This guide explains how to build and use HTTP middleware with `Protocol::HTTP::Middleware`. -- path: hypertext-references.md - title: Hypertext References - description: This guide explains how to use `Protocol::HTTP::Reference` for constructing - and manipulating hypertext references (URLs with parameters). -- path: url-parsing.md - title: URL Parsing - description: This guide explains how to use `Protocol::HTTP::URL` for parsing and - manipulating URL components, particularly query strings and parameters. - path: streaming.md title: Streaming description: This guide gives an overview of how to implement streaming requests diff --git a/guides/hypertext-references/readme.md b/guides/hypertext-references/readme.md deleted file mode 100644 index 698a65b5..00000000 --- a/guides/hypertext-references/readme.md +++ /dev/null @@ -1,140 +0,0 @@ -# Hypertext References - -This guide explains how to use `Protocol::HTTP::Reference` for constructing and manipulating hypertext references (URLs with parameters). - -## Overview - -{ruby Protocol::HTTP::Reference} is used to construct "hypertext references" which consist of a path and URL-encoded parameters. References provide a rich API for URL construction, path manipulation, and parameter handling. - -## Basic Construction - -``` ruby -require 'protocol/http/reference' - -# Simple reference with parameters: -reference = Protocol::HTTP::Reference.new("/search", nil, nil, {q: 'kittens', limit: 10}) -reference.to_s -# => "/search?q=kittens&limit=10" - -# Parse existing URLs: -reference = Protocol::HTTP::Reference.parse("/api/users?page=2&sort=name#results") -reference.path # => "/api/users" -reference.query # => "page=2&sort=name" -reference.fragment # => "results" - -# To get parameters as a hash, decode the query string: -parameters = Protocol::HTTP::URL.decode(reference.query) -parameters # => {"page" => "2", "sort" => "name"} -``` - -## Path Manipulation - -References support sophisticated path manipulation including relative path resolution: - -``` ruby -base = Protocol::HTTP::Reference.new("/api/v1/users") - -# Append paths: -user_detail = base.with(path: "123") -user_detail.to_s # => "/api/v1/users/123" - -# Relative path navigation: -parent = user_detail.with(path: "../groups", pop: true) -parent.to_s # => "/api/v1/groups" - -# Absolute path replacement: -root = user_detail.with(path: "/status") -root.to_s # => "/status" -``` - -## Advanced Parameter Handling - -``` ruby -# Complex parameter structures: -reference = Protocol::HTTP::Reference.new("/search", nil, nil, { - filters: { - category: "books", - price: {min: 10, max: 50} - }, - tags: ["fiction", "mystery"] -}) - -reference.to_s -# => "/search?filters[category]=books&filters[price][min]=10&filters[price][max]=50&tags[]=fiction&tags[]=mystery" - -# Parameter merging: -base = Protocol::HTTP::Reference.new("/api", nil, nil, {version: "v1", format: "json"}) -extended = base.with(parameters: {detailed: true}, merge: true) -extended.to_s -# => "/api?version=v1&format=json&detailed=true" - -# Parameter replacement (using merge: false): -replaced = base.with(parameters: {format: "xml"}, merge: false) -replaced.to_s -# => "/api?format=xml" -``` - -## Merge Behavior and Query Strings - -The `merge` parameter controls both parameter handling and query string behavior: - -``` ruby -# Create a reference with both query string and parameters: -ref = Protocol::HTTP::Reference.new("/api", "existing=query", nil, {version: "v1"}) -ref.to_s -# => "/api?existing=query&version=v1" - -# merge: true (default) - keeps existing query string: -merged = ref.with(parameters: {new: "argument"}, merge: true) -merged.to_s -# => "/api?existing=query&version=v1&new=argument" - -# merge: false with new parameters - clears query string: -replaced = ref.with(parameters: {new: "argument"}, merge: false) -replaced.to_s -# => "/api?new=argument" - -# merge: false without new parameters - keeps everything: -unchanged = ref.with(path: "v2", merge: false) -unchanged.to_s -# => "/api/v2?existing=query&version=v1" -``` - -## URL Encoding and Special Characters - -References handle URL encoding automatically: - -``` ruby -# Spaces and special characters: -reference = Protocol::HTTP::Reference.new("/search", nil, nil, { - q: "hello world", - filter: "price > $10" -}) -reference.to_s -# => "/search?q=hello%20world&filter=price%20%3E%20%2410" - -# Unicode support: -unicode_ref = Protocol::HTTP::Reference.new("/files", nil, nil, { - name: "résumé.pdf", - emoji: "😀" -}) -unicode_ref.to_s -# => "/files?name=r%C3%A9sum%C3%A9.pdf&emoji=%F0%9F%98%80" -``` - -## Reference Merging - -References can be merged following RFC2396 URI resolution rules: - -``` ruby -base = Protocol::HTTP::Reference.new("/docs/guide/") -relative = Protocol::HTTP::Reference.new("../api/reference.html") - -merged = base + relative -merged.to_s # => "/docs/api/reference.html" - -# Absolute references override completely -absolute = Protocol::HTTP::Reference.new("/completely/different/path") -result = base + absolute -result.to_s # => "/completely/different/path" -``` diff --git a/guides/links.yaml b/guides/links.yaml index 221cf402..4ff4a0b3 100644 --- a/guides/links.yaml +++ b/guides/links.yaml @@ -6,10 +6,6 @@ headers: order: 3 middleware: order: 4 -hypertext-references: - order: 5 -url-parsing: - order: 6 streaming: order: 7 design-overview: diff --git a/guides/url-parsing/readme.md b/guides/url-parsing/readme.md deleted file mode 100644 index a3e0679e..00000000 --- a/guides/url-parsing/readme.md +++ /dev/null @@ -1,130 +0,0 @@ -# URL Parsing - -This guide explains how to use `Protocol::HTTP::URL` for parsing and manipulating URL components, particularly query strings and parameters. - -## Overview - -{ruby Protocol::HTTP::URL} provides utilities for parsing and manipulating URL components, particularly query strings and parameters. It offers robust encoding/decoding capabilities for complex parameter structures. - -While basic query parameter encoding follows the `application/x-www-form-urlencoded` standard, there is no universal standard for serializing complex nested structures (arrays, nested objects) in URLs. Different frameworks use varying conventions for these cases, and this implementation follows common patterns where possible. - -## Basic Query Parameter Parsing - -``` ruby -require 'protocol/http/url' - -# Parse query parameters from a URL: -reference = Protocol::HTTP::Reference.parse("/search?q=ruby&category=programming&page=2") -parameters = Protocol::HTTP::URL.decode(reference.query) -# => {"q" => "ruby", "category" => "programming", "page" => "2"} - -# Symbolize keys for easier access: -parameters = Protocol::HTTP::URL.decode(reference.query, symbolize_keys: true) -# => {:q => "ruby", :category => "programming", :page => "2"} -``` - -## Complex Parameter Structures - -The URL module handles nested parameters, arrays, and complex data structures: - -``` ruby -# Array parameters: -query = "tags[]=ruby&tags[]=programming&tags[]=web" -parameters = Protocol::HTTP::URL.decode(query) -# => {"tags" => ["ruby", "programming", "web"]} - -# Nested hash parameters: -query = "user[name]=John&user[email]=john@example.com&user[preferences][theme]=dark" -parameters = Protocol::HTTP::URL.decode(query) -# => {"user" => {"name" => "John", "email" => "john@example.com", "preferences" => {"theme" => "dark"}}} - -# Mixed structures: -query = "filters[categories][]=books&filters[categories][]=movies&filters[price][min]=10&filters[price][max]=100" -parameters = Protocol::HTTP::URL.decode(query) -# => {"filters" => {"categories" => ["books", "movies"], "price" => {"min" => "10", "max" => "100"}}} -``` - -## Encoding Parameters to Query Strings - -``` ruby -# Simple parameters: -parameters = {"search" => "protocol-http", "limit" => "20"} -query = Protocol::HTTP::URL.encode(parameters) -# => "search=protocol-http&limit=20" - -# Array parameters: -parameters = {"tags" => ["ruby", "http", "protocol"]} -query = Protocol::HTTP::URL.encode(parameters) -# => "tags[]=ruby&tags[]=http&tags[]=protocol" - -# Nested parameters: -parameters = { - user: { - profile: { - name: "Alice", - settings: { - notifications: true, - theme: "light" - } - } - } -} -query = Protocol::HTTP::URL.encode(parameters) -# => "user[profile][name]=Alice&user[profile][settings][notifications]=true&user[profile][settings][theme]=light" -``` - -## URL Escaping and Unescaping - -``` ruby -# Escape special characters: -Protocol::HTTP::URL.escape("hello world!") -# => "hello%20world%21" - -# Escape path components (preserves path separators): -Protocol::HTTP::URL.escape_path("/path/with spaces/file.html") -# => "/path/with%20spaces/file.html" - -# Unescape percent-encoded strings: -Protocol::HTTP::URL.unescape("hello%20world%21") -# => "hello world!" - -# Handle Unicode characters: -Protocol::HTTP::URL.escape("café") -# => "caf%C3%A9" - -Protocol::HTTP::URL.unescape("caf%C3%A9") -# => "café" -``` - -## Scanning and Processing Query Strings - -For custom processing, you can scan query strings directly: - -``` ruby -query = "name=John&age=30&active=true" - -Protocol::HTTP::URL.scan(query) do |key, value| - puts "#{key}: #{value}" -end -# Output: -# name: John -# age: 30 -# active: true -``` - -## Security and Limits - -The URL module includes built-in protection against deeply nested parameter attacks: - -``` ruby -# This will raise an error to prevent excessive nesting: -begin - Protocol::HTTP::URL.decode("a[b][c][d][e][f][g][h][i]=value") -rescue ArgumentError => error - puts error.message - # => "Key length exceeded limit!" -end - -# You can adjust the maximum nesting level: -Protocol::HTTP::URL.decode("a[b][c]=value", 5) # Allow up to 5 levels of nesting -``` diff --git a/lib/protocol/http/cookie.rb b/lib/protocol/http/cookie.rb index adf1f0d0..249a50a9 100644 --- a/lib/protocol/http/cookie.rb +++ b/lib/protocol/http/cookie.rb @@ -4,59 +4,66 @@ # Copyright, 2019-2025, by Samuel Williams. # Copyright, 2022, by Herrick Fang. -require_relative "url" +require_relative "quoted_string" module Protocol module HTTP # Represents an individual cookie key-value pair. class Cookie + # Valid cookie name characters according to RFC 6265. + # cookie-name = token (RFC 2616 defines token) + VALID_COOKIE_KEY = /\A#{TOKEN}\z/.freeze + + # Valid cookie value characters according to RFC 6265. + # cookie-value = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE ) + # cookie-octet = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E + # Excludes control chars, whitespace, DQUOTE, comma, semicolon, and backslash + VALID_COOKIE_VALUE = /\A[\x21\x23-\x2B\x2D-\x3A\x3C-\x5B\x5D-\x7E]*\z/.freeze + # Initialize the cookie with the given name, value, and directives. # - # @parameter name [String] The name of the cookiel, e.g. "session_id". + # @parameter name [String] The name of the cookie, e.g. "session_id". # @parameter value [String] The value of the cookie, e.g. "1234". # @parameter directives [Hash] The directives of the cookie, e.g. `{"path" => "/"}`. - def initialize(name, value, directives) + # @raises [ArgumentError] If the name or value contains invalid characters. + def initialize(name, value, directives = nil) + unless VALID_COOKIE_KEY.match?(name) + raise ArgumentError, "Invalid cookie name: #{name.inspect}" + end + + if value && !VALID_COOKIE_VALUE.match?(value) + raise ArgumentError, "Invalid cookie value: #{value.inspect}" + end + @name = name @value = value @directives = directives end # @attribute [String] The name of the cookie. - attr :name + attr_accessor :name # @attribute [String] The value of the cookie. - attr :value + attr_accessor :value # @attribute [Hash] The directives of the cookie. - attr :directives - - # Encode the name of the cookie. - def encoded_name - URL.escape(@name) - end - - # Encode the value of the cookie. - def encoded_value - URL.escape(@value) - end + attr_accessor :directives # Convert the cookie to a string. # # @returns [String] The string representation of the cookie. def to_s - buffer = String.new.b + buffer = String.new - buffer << encoded_name << "=" << encoded_value + buffer << @name << "=" << @value if @directives - @directives.collect do |key, value| + @directives.each do |key, value| buffer << ";" + buffer << key - case value - when String - buffer << key << "=" << value - when TrueClass - buffer << key + if value != true + buffer << "=" << value.to_s end end end @@ -74,11 +81,7 @@ def self.parse(string) key, value = head.split("=", 2) directives = self.parse_directives(directives) - self.new( - URL.unescape(key), - URL.unescape(value), - directives, - ) + self.new(key, value, directives) end # Parse a list of strings into a hash of directives. diff --git a/lib/protocol/http/header/accept.rb b/lib/protocol/http/header/accept.rb index 040fa9f2..773af895 100644 --- a/lib/protocol/http/header/accept.rb +++ b/lib/protocol/http/header/accept.rb @@ -5,7 +5,7 @@ # Copyright, 2025, by William T. Nelson. require_relative "split" -require_relative "quoted_string" +require_relative "../quoted_string" require_relative "../error" module Protocol diff --git a/lib/protocol/http/header/accept_charset.rb b/lib/protocol/http/header/accept_charset.rb index 2774db6e..470240c1 100644 --- a/lib/protocol/http/header/accept_charset.rb +++ b/lib/protocol/http/header/accept_charset.rb @@ -4,7 +4,7 @@ # Copyright, 2025, by Samuel Williams. require_relative "split" -require_relative "quoted_string" +require_relative "../quoted_string" require_relative "../error" module Protocol diff --git a/lib/protocol/http/header/accept_encoding.rb b/lib/protocol/http/header/accept_encoding.rb index 898ea967..92149368 100644 --- a/lib/protocol/http/header/accept_encoding.rb +++ b/lib/protocol/http/header/accept_encoding.rb @@ -4,7 +4,7 @@ # Copyright, 2025, by Samuel Williams. require_relative "split" -require_relative "quoted_string" +require_relative "../quoted_string" require_relative "../error" module Protocol diff --git a/lib/protocol/http/header/accept_language.rb b/lib/protocol/http/header/accept_language.rb index b9e8b469..a21a836c 100644 --- a/lib/protocol/http/header/accept_language.rb +++ b/lib/protocol/http/header/accept_language.rb @@ -4,7 +4,7 @@ # Copyright, 2025, by Samuel Williams. require_relative "split" -require_relative "quoted_string" +require_relative "../quoted_string" require_relative "../error" module Protocol diff --git a/lib/protocol/http/header/digest.rb b/lib/protocol/http/header/digest.rb index 74370bd7..7a6b63d5 100644 --- a/lib/protocol/http/header/digest.rb +++ b/lib/protocol/http/header/digest.rb @@ -4,7 +4,7 @@ # Copyright, 2025, by Samuel Williams. require_relative "split" -require_relative "quoted_string" +require_relative "../quoted_string" require_relative "../error" module Protocol diff --git a/lib/protocol/http/header/quoted_string.rb b/lib/protocol/http/header/quoted_string.rb deleted file mode 100644 index 25c3db36..00000000 --- a/lib/protocol/http/header/quoted_string.rb +++ /dev/null @@ -1,49 +0,0 @@ -# frozen_string_literal: true - -# Released under the MIT License. -# Copyright, 2025, by Samuel Williams. - -module Protocol - module HTTP - module Header - # According to https://tools.ietf.org/html/rfc7231#appendix-C - TOKEN = /[!#$%&'*+\-.^_`|~0-9A-Z]+/i - - QUOTED_STRING = /"(?:.(?!(?. It should already match the QUOTED_STRING pattern above by the parser. - def self.unquote(value, normalize_whitespace = true) - value = value[1...-1] - - value.gsub!(/\\(.)/, '\1') - - if normalize_whitespace - # LWS = [CRLF] 1*( SP | HT ) - value.gsub!(/[\r\n]+\s+/, " ") - end - - return value - end - - QUOTES_REQUIRED = /[()<>@,;:\\"\/\[\]?={} \t]/ - - # Quote a string for HTTP header values if required. - # - # @raises [ArgumentError] if the value contains invalid characters like control characters or newlines. - def self.quote(value, force = false) - # Check if quoting is required: - if value =~ QUOTES_REQUIRED or force - "\"#{value.gsub(/["\\]/, '\\\\\0')}\"" - else - value - end - end - end - end - end -end diff --git a/lib/protocol/http/header/server_timing.rb b/lib/protocol/http/header/server_timing.rb index 5ddf80b7..74444aa1 100644 --- a/lib/protocol/http/header/server_timing.rb +++ b/lib/protocol/http/header/server_timing.rb @@ -4,7 +4,7 @@ # Copyright, 2025, by Samuel Williams. require_relative "split" -require_relative "quoted_string" +require_relative "../quoted_string" require_relative "../error" module Protocol diff --git a/lib/protocol/http/header/te.rb b/lib/protocol/http/header/te.rb index 9a3e2412..e331db13 100644 --- a/lib/protocol/http/header/te.rb +++ b/lib/protocol/http/header/te.rb @@ -4,7 +4,7 @@ # Copyright, 2025, by Samuel Williams. require_relative "split" -require_relative "quoted_string" +require_relative "../quoted_string" require_relative "../error" module Protocol diff --git a/lib/protocol/http/quoted_string.rb b/lib/protocol/http/quoted_string.rb new file mode 100644 index 00000000..b794d44b --- /dev/null +++ b/lib/protocol/http/quoted_string.rb @@ -0,0 +1,47 @@ +# frozen_string_literal: true + +# Released under the MIT License. +# Copyright, 2025, by Samuel Williams. + +module Protocol + module HTTP + # According to https://tools.ietf.org/html/rfc7231#appendix-C + TOKEN = /[!#$%&'*+\-.^_`|~0-9A-Z]+/i + + QUOTED_STRING = /"(?:.(?!(?. It should already match the QUOTED_STRING pattern above by the parser. + def self.unquote(value, normalize_whitespace = true) + value = value[1...-1] + + value.gsub!(/\\(.)/, '\1') + + if normalize_whitespace + # LWS = [CRLF] 1*( SP | HT ) + value.gsub!(/[\r\n]+\s+/, " ") + end + + return value + end + + QUOTES_REQUIRED = /[()<>@,;:\\"\/\[\]?={} \t]/ + + # Quote a string for HTTP header values if required. + # + # @raises [ArgumentError] if the value contains invalid characters like control characters or newlines. + def self.quote(value, force = false) + # Check if quoting is required: + if value =~ QUOTES_REQUIRED or force + "\"#{value.gsub(/["\\]/, '\\\\\0')}\"" + else + value + end + end + end + end +end \ No newline at end of file diff --git a/lib/protocol/http/reference.rb b/lib/protocol/http/reference.rb deleted file mode 100644 index 5d0d5a3a..00000000 --- a/lib/protocol/http/reference.rb +++ /dev/null @@ -1,253 +0,0 @@ -# frozen_string_literal: true - -# Released under the MIT License. -# Copyright, 2018-2025, by Samuel Williams. - -require_relative "url" - -module Protocol - module HTTP - # A relative reference, excluding any authority. The path part of an HTTP request. - class Reference - include Comparable - - # Generate a reference from a path and user parameters. The path may contain a `#fragment` or `?query=parameters`. - def self.parse(path = "/", parameters = nil) - base, fragment = path.split("#", 2) - path, query = base.split("?", 2) - - self.new(path, query, fragment, parameters) - end - - # Initialize the reference. - # - # @parameter path [String] The path component, e.g. `/foo/bar/index.html`. - # @parameter query [String | Nil] The un-parsed query string, e.g. 'x=10&y=20'. - # @parameter fragment [String | Nil] The fragment, the part after the '#'. - # @parameter parameters [Hash | Nil] User supplied parameters that will be appended to the query part. - def initialize(path = "/", query = nil, fragment = nil, parameters = nil) - @path = path - @query = query - @fragment = fragment - @parameters = parameters - end - - # @attribute [String] The path component, e.g. `/foo/bar/index.html`. - attr_accessor :path - - # @attribute [String] The un-parsed query string, e.g. 'x=10&y=20'. - attr_accessor :query - - # @attribute [String] The fragment, the part after the '#'. - attr_accessor :fragment - - # @attribute [Hash] User supplied parameters that will be appended to the query part. - attr_accessor :parameters - - # Freeze the reference. - # - # @returns [Reference] The frozen reference. - def freeze - return self if frozen? - - @path.freeze - @query.freeze - @fragment.freeze - @parameters.freeze - - super - end - - # Implicit conversion to an array. - # - # @returns [Array] The reference as an array, `[path, query, fragment, parameters]`. - def to_ary - [@path, @query, @fragment, @parameters] - end - - # Compare two references. - # - # @parameter other [Reference] The other reference to compare. - # @returns [Integer] -1, 0, 1 if the reference is less than, equal to, or greater than the other reference. - def <=> other - to_ary <=> other.to_ary - end - - # Type-cast a reference. - # - # @parameter reference [Reference | String] The reference to type-cast. - # @returns [Reference] The type-casted reference. - def self.[] reference - if reference.is_a? self - return reference - else - return self.parse(reference) - end - end - - # @returns [Boolean] Whether the reference has parameters. - def parameters? - @parameters and !@parameters.empty? - end - - # @returns [Boolean] Whether the reference has a query string. - def query? - @query and !@query.empty? - end - - # @returns [Boolean] Whether the reference has a fragment. - def fragment? - @fragment and !@fragment.empty? - end - - # Append the reference to the given buffer. - def append(buffer = String.new) - if query? - buffer << URL.escape_path(@path) << "?" << @query - buffer << "&" << URL.encode(@parameters) if parameters? - else - buffer << URL.escape_path(@path) - buffer << "?" << URL.encode(@parameters) if parameters? - end - - if fragment? - buffer << "#" << URL.escape(@fragment) - end - - return buffer - end - - # Convert the reference to a string, e.g. `/foo/bar/index.html?x=10&y=20#section` - # - # @returns [String] The reference as a string. - def to_s - append - end - - # Merges two references as specified by RFC2396, similar to `URI.join`. - def + other - other = self.class[other] - - self.class.new( - expand_path(self.path, other.path, true), - other.query, - other.fragment, - other.parameters, - ) - end - - # Just the base path, without any query string, parameters or fragment. - def base - self.class.new(@path, nil, nil, nil) - end - - # Update the reference with the given path, parameters and fragment. - # - # @parameter path [String] Append the string to this reference similar to `File.join`. - # @parameter parameters [Hash] Append the parameters to this reference. - # @parameter fragment [String] Set the fragment to this value. - # @parameter pop [Boolean] If the path contains a trailing filename, pop the last component of the path before appending the new path. - # @parameter merge [Boolean] If the parameters are specified, merge them with the existing parameters, otherwise replace them (including query string). - def with(path: nil, parameters: false, fragment: @fragment, pop: false, merge: true) - if merge - # Merge mode: combine new parameters with existing, keep query: - # parameters = (@parameters || {}).merge(parameters || {}) - if @parameters - if parameters - parameters = @parameters.merge(parameters) - else - parameters = @parameters - end - elsif !parameters - parameters = @parameters - end - - query = @query - else - # Replace mode: use new parameters if provided, clear query when replacing: - if parameters == false - # No new parameters provided, keep existing: - parameters = @parameters - query = @query - else - # New parameters provided, replace and clear query: - # parameters = parameters - query = nil - end - end - - if path - path = expand_path(@path, path, pop) - else - path = @path - end - - self.class.new(path, query, fragment, parameters) - end - - private - - def split(path) - if path.empty? - [path] - else - path.split("/", -1) - end - end - - def expand_absolute_path(path, parts) - parts.each do |part| - if part == ".." - path.pop - elsif part == "." - # Do nothing. - else - path << part - end - end - - if path.first != "" - path.unshift("") - end - end - - def expand_relative_path(path, parts) - parts.each do |part| - if part == ".." and path.any? - path.pop - elsif part == "." - # Do nothing. - else - path << part - end - end - end - - # @param pop [Boolean] whether to remove the last path component of the base path, to conform to URI merging behaviour, as defined by RFC2396. - def expand_path(base, relative, pop = true) - if relative.start_with? "/" - return relative - end - - path = split(base) - - # RFC2396 Section 5.2: - # 6) a) All but the last segment of the base URI's path component is - # copied to the buffer. In other words, any characters after the - # last (right-most) slash character, if any, are excluded. - path.pop if pop or path.last == "" - - parts = split(relative) - - # Absolute path: - if path.first == "" - expand_absolute_path(path, parts) - else - expand_relative_path(path, parts) - end - - return path.join("/") - end - end - end -end diff --git a/lib/protocol/http/url.rb b/lib/protocol/http/url.rb deleted file mode 100644 index 581e4c62..00000000 --- a/lib/protocol/http/url.rb +++ /dev/null @@ -1,149 +0,0 @@ -# frozen_string_literal: true - -# Released under the MIT License. -# Copyright, 2019-2024, by Samuel Williams. -# Copyright, 2022, by Herrick Fang. - -module Protocol - module HTTP - # Helpers for working with URLs. - module URL - # Escapes a string using percent encoding, e.g. `a b` -> `a%20b`. - # - # @parameter string [String] The string to escape. - # @returns [String] The escaped string. - def self.escape(string, encoding = string.encoding) - string.b.gsub(/([^a-zA-Z0-9_.\-]+)/) do |m| - "%" + m.unpack("H2" * m.bytesize).join("%").upcase - end.force_encoding(encoding) - end - - # Unescapes a percent encoded string, e.g. `a%20b` -> `a b`. - # - # @parameter string [String] The string to unescape. - # @returns [String] The unescaped string. - def self.unescape(string, encoding = string.encoding) - string.b.gsub(/%(\h\h)/) do |hex| - Integer($1, 16).chr - end.force_encoding(encoding) - end - - # Matches characters that are not allowed in a URI path segment. According to RFC 3986 Section 3.3 (https://tools.ietf.org/html/rfc3986#section-3.3), a valid path segment consists of "pchar" characters. This pattern identifies characters that must be percent-encoded when included in a URI path segment. - NON_PATH_CHARACTER_PATTERN = /([^a-zA-Z0-9_\-\.~!$&'()*+,;=:@\/]+)/.freeze - - # Escapes non-path characters using percent encoding. In other words, this method escapes characters that are not allowed in a URI path segment. According to RFC 3986 Section 3.3 (https://tools.ietf.org/html/rfc3986#section-3.3), a valid path segment consists of "pchar" characters. This method percent-encodes characters that are not "pchar" characters. - # - # @parameter path [String] The path to escape. - # @returns [String] The escaped path. - def self.escape_path(path) - encoding = path.encoding - path.b.gsub(NON_PATH_CHARACTER_PATTERN) do |m| - "%" + m.unpack("H2" * m.bytesize).join("%").upcase - end.force_encoding(encoding) - end - - # Encodes a hash or array into a query string. This method is used to encode query parameters in a URL. For example, `{"a" => 1, "b" => 2}` is encoded as `a=1&b=2`. - # - # @parameter value [Hash | Array | Nil] The value to encode. - # @parameter prefix [String] The prefix to use for keys. - def self.encode(value, prefix = nil) - case value - when Array - return value.map {|v| - self.encode(v, "#{prefix}[]") - }.join("&") - when Hash - return value.map {|k, v| - self.encode(v, prefix ? "#{prefix}[#{escape(k.to_s)}]" : escape(k.to_s)) - }.reject(&:empty?).join("&") - when nil - return prefix - else - raise ArgumentError, "value must be a Hash" if prefix.nil? - - return "#{prefix}=#{escape(value.to_s)}" - end - end - - # Scan a string for URL-encoded key/value pairs. - # @yields {|key, value| ...} - # @parameter key [String] The unescaped key. - # @parameter value [String] The unescaped key. - def self.scan(string) - string.split("&") do |assignment| - next if assignment.empty? - - key, value = assignment.split("=", 2) - - yield unescape(key), value.nil? ? value : unescape(value) - end - end - - # Split a key into parts, e.g. `a[b][c]` -> `["a", "b", "c"]`. - # - # @parameter name [String] The key to split. - # @returns [Array(String)] The parts of the key. - def self.split(name) - name.scan(/([^\[]+)|(?:\[(.*?)\])/)&.tap do |parts| - parts.flatten! - parts.compact! - end - end - - # Assign a value to a nested hash. - # - # @parameter keys [Array(String)] The parts of the key. - # @parameter value [Object] The value to assign. - # @parameter parent [Hash] The parent hash. - def self.assign(keys, value, parent) - top, *middle = keys - - middle.each_with_index do |key, index| - if key.nil? or key.empty? - parent = (parent[top] ||= Array.new) - top = parent.size - - if nested = middle[index+1] and last = parent.last - top -= 1 unless last.include?(nested) - end - else - parent = (parent[top] ||= Hash.new) - top = key - end - end - - parent[top] = value - end - - # Decode a URL-encoded query string into a hash. - # - # @parameter string [String] The query string to decode. - # @parameter maximum [Integer] The maximum number of keys in a path. - # @parameter symbolize_keys [Boolean] Whether to symbolize keys. - # @returns [Hash] The decoded query string. - def self.decode(string, maximum = 8, symbolize_keys: false) - parameters = {} - - self.scan(string) do |name, value| - keys = self.split(name) - - if keys.empty? - raise ArgumentError, "Invalid key path: #{name.inspect}!" - end - - if keys.size > maximum - raise ArgumentError, "Key length exceeded limit!" - end - - if symbolize_keys - keys.collect!{|key| key.empty? ? nil : key.to_sym} - end - - self.assign(keys, value, parameters) - end - - return parameters - end - end - end -end diff --git a/readme.md b/readme.md index b57c2ae0..241316e9 100644 --- a/readme.md +++ b/readme.md @@ -22,10 +22,6 @@ Please see the [project documentation](https://socketry.github.io/protocol-http/ - [Middleware](https://socketry.github.io/protocol-http/guides/middleware/index) - This guide explains how to build and use HTTP middleware with `Protocol::HTTP::Middleware`. - - [Hypertext References](https://socketry.github.io/protocol-http/guides/hypertext-references/index) - This guide explains how to use `Protocol::HTTP::Reference` for constructing and manipulating hypertext references (URLs with parameters). - - - [URL Parsing](https://socketry.github.io/protocol-http/guides/url-parsing/index) - This guide explains how to use `Protocol::HTTP::URL` for parsing and manipulating URL components, particularly query strings and parameters. - - [Streaming](https://socketry.github.io/protocol-http/guides/streaming/index) - This guide gives an overview of how to implement streaming requests and responses. - [Design Overview](https://socketry.github.io/protocol-http/guides/design-overview/index) - This guide explains the high level design of `protocol-http` in the context of wider design patterns that can be used to implement HTTP clients and servers. @@ -34,6 +30,15 @@ Please see the [project documentation](https://socketry.github.io/protocol-http/ Please see the [project releases](https://socketry.github.io/protocol-http/releases/index) for all releases. +### Unreleased + + - **Breaking Change**: Move `Protocol::HTTP::Header::QuotedString` to `Protocol::HTTP::QuotedString` for better reusability. + - **Breaking Change**: Handle cookie key/value pairs using `QuotedString` as per RFC 6265. + - Don't use URL encoding for cookie key/value. + - **Breaking Change**: Remove `Protocol::HTTP::URL` and `Protocol::HTTP::Reference` – replaced by `Protocol::URL` gem. + - `Protocol::HTTP::URL` -\> `Protocol::URL::Encoding`. + - `Protocol::HTTP::Reference` -\> `Protocol::URL::Reference`. + ### v0.54.0 - Introduce rich support for `Header::Digest`, `Header::ServerTiming`, `Header::TE`, `Header::Trailer` and `Header::TransferEncoding`. @@ -77,17 +82,13 @@ Please see the [project releases](https://socketry.github.io/protocol-http/relea - Ensure chunks are flushed if required, when streaming. -### v0.30.0 - - - [`Request[]` and `Response[]` Keyword Arguments](https://socketry.github.io/protocol-http/releases/index#request[]-and-response[]-keyword-arguments) - - [Interim Response Handling](https://socketry.github.io/protocol-http/releases/index#interim-response-handling) - ## See Also - [protocol-http1](https://github.com/socketry/protocol-http1) — HTTP/1 client/server implementation using this interface. - [protocol-http2](https://github.com/socketry/protocol-http2) — HTTP/2 client/server implementation using this interface. + - [protocol-url](https://github.com/socketry/protocol-url) — URL parsing and manipulation library. - [async-http](https://github.com/socketry/async-http) — Asynchronous HTTP client and server, supporting multiple HTTP protocols & TLS. - [async-websocket](https://github.com/socketry/async-websocket) — Asynchronous client and server WebSockets. diff --git a/releases.md b/releases.md index 174f8815..419b0010 100644 --- a/releases.md +++ b/releases.md @@ -1,5 +1,14 @@ # Releases +## Unreleased + + - **Breaking Change**: Move `Protocol::HTTP::Header::QuotedString` to `Protocol::HTTP::QuotedString` for better reusability. + - **Breaking Change**: Handle cookie key/value pairs using `QuotedString` as per RFC 6265. + - Don't use URL encoding for cookie key/value. + - **Breaking Change**: Remove `Protocol::HTTP::URL` and `Protocol::HTTP::Reference` – replaced by `Protocol::URL` gem. + - `Protocol::HTTP::URL` -> `Protocol::URL::Encoding`. + - `Protocol::HTTP::Reference` -> `Protocol::URL::Reference`. + ## v0.54.0 - Introduce rich support for `Header::Digest`, `Header::ServerTiming`, `Header::TE`, `Header::Trailer` and `Header::TransferEncoding`. diff --git a/test/protocol/http/cookie.rb b/test/protocol/http/cookie.rb new file mode 100644 index 00000000..d31740c6 --- /dev/null +++ b/test/protocol/http/cookie.rb @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +# Released under the MIT License. +# Copyright, 2025, by Samuel Williams. + +require "protocol/http/cookie" + +describe Protocol::HTTP::Cookie do + describe "#initialize" do + it "accepts valid cookie names" do + cookie = Protocol::HTTP::Cookie.new("session_id", "123") + expect(cookie.name).to be == "session_id" + expect(cookie.value).to be == "123" + end + + it "accepts valid cookie values with allowed characters" do + # Test cookie-octet range: !#$%&'()*+-./0-9:;<=>?@A-Z[]^_`a-z{|}~ + cookie = Protocol::HTTP::Cookie.new("test", "abc123!#$%&'()*+-./:") + expect(cookie.value).to be == "abc123!#$%&'()*+-./:" + end + + it "rejects cookie names with invalid characters" do + expect do + Protocol::HTTP::Cookie.new("session id", "123") + end.to raise_exception(ArgumentError, message: be =~ /Invalid cookie name/) + end + + it "rejects cookie names with semicolon" do + expect do + Protocol::HTTP::Cookie.new("session;id", "123") + end.to raise_exception(ArgumentError, message: be =~ /Invalid cookie name/) + end + + it "rejects cookie values with control characters" do + expect do + Protocol::HTTP::Cookie.new("session", "123\n456") + end.to raise_exception(ArgumentError, message: be =~ /Invalid cookie value/) + end + + it "rejects cookie values with semicolon" do + expect do + Protocol::HTTP::Cookie.new("session", "123;456") + end.to raise_exception(ArgumentError, message: be =~ /Invalid cookie value/) + end + + it "rejects cookie values with comma" do + expect do + Protocol::HTTP::Cookie.new("session", "123,456") + end.to raise_exception(ArgumentError, message: be =~ /Invalid cookie value/) + end + + it "rejects cookie values with backslash" do + expect do + Protocol::HTTP::Cookie.new("session", "123\\456") + end.to raise_exception(ArgumentError, message: be =~ /Invalid cookie value/) + end + + it "rejects cookie values with double quote" do + expect do + Protocol::HTTP::Cookie.new("session", '"quoted"') + end.to raise_exception(ArgumentError, message: be =~ /Invalid cookie value/) + end + + it "accepts nil value" do + cookie = Protocol::HTTP::Cookie.new("session", nil) + expect(cookie.value).to be_nil + end + end + + describe "#to_s" do + it "returns cookie name and value" do + cookie = Protocol::HTTP::Cookie.new("session", "abc123") + expect(cookie.to_s).to be == "session=abc123" + end + + it "includes directives" do + cookie = Protocol::HTTP::Cookie.new("session", "123", {"path" => "/", "secure" => true}) + expect(cookie.to_s).to be == "session=123;path=/;secure" + end + end + + describe ".parse" do + it "parses simple cookie" do + cookie = Protocol::HTTP::Cookie.parse("session=123") + expect(cookie.name).to be == "session" + expect(cookie.value).to be == "123" + end + + it "parses cookie with equals in value" do + cookie = Protocol::HTTP::Cookie.parse("session=123==") + expect(cookie.name).to be == "session" + expect(cookie.value).to be == "123==" + end + + it "parses cookie with directives" do + cookie = Protocol::HTTP::Cookie.parse("session=123; path=/; secure") + expect(cookie.name).to be == "session" + expect(cookie.value).to be == "123" + expect(cookie.directives).to be == {"path" => "/", "secure" => true} + end + end +end diff --git a/test/protocol/http/header/cookie.rb b/test/protocol/http/header/cookie.rb index 515a07d5..b560df0d 100644 --- a/test/protocol/http/header/cookie.rb +++ b/test/protocol/http/header/cookie.rb @@ -39,21 +39,21 @@ end end - with "session=123==; secure" do + with "session=abc123; secure" do it "can parse cookies" do expect(cookies).to have_keys("session") session = cookies["session"] expect(session).to have_attributes( name: be == "session", - value: be == "123==", + value: be == "abc123", ) expect(session.directives).to have_keys("secure") end it "has string representation" do session = cookies["session"] - expect(session.to_s).to be == "session=123%3D%3D;secure" + expect(session.to_s).to be == "session=abc123;secure" end end end diff --git a/test/protocol/http/header/quoted_string.rb b/test/protocol/http/quoted_string.rb similarity index 91% rename from test/protocol/http/header/quoted_string.rb rename to test/protocol/http/quoted_string.rb index a464c9a1..060eac27 100644 --- a/test/protocol/http/header/quoted_string.rb +++ b/test/protocol/http/quoted_string.rb @@ -3,9 +3,9 @@ # Released under the MIT License. # Copyright, 2025, by Samuel Williams. -require "protocol/http/header/quoted_string" +require "protocol/http/quoted_string" -describe Protocol::HTTP::Header::QuotedString do +describe Protocol::HTTP::QuotedString do with ".unquote" do it "ignores linear whitespace" do quoted_string = subject.unquote(%Q{"Hello\r\n World"}) diff --git a/test/protocol/http/reference.rb b/test/protocol/http/reference.rb deleted file mode 100644 index 780697a0..00000000 --- a/test/protocol/http/reference.rb +++ /dev/null @@ -1,237 +0,0 @@ -# frozen_string_literal: true - -# Released under the MIT License. -# Copyright, 2018-2025, by Samuel Williams. - -require "protocol/http/reference" - -describe Protocol::HTTP::Reference do - let(:reference) {subject.new} - - with "#base" do - let(:reference) {subject.new("/foo/bar", "foo=bar", "baz", {x: 10})} - - it "returns reference with only the path" do - expect(reference.base).to have_attributes( - path: be == reference.path, - parameters: be_nil, - fragment: be_nil, - ) - end - end - - with "#+" do - let(:absolute) {subject["/foo/bar"]} - let(:relative) {subject["foo/bar"]} - let(:up) {subject["../baz"]} - - it "can add a relative path" do - expect(reference + relative).to be == absolute - end - - it "can add an absolute path" do - expect(reference + absolute).to be == absolute - end - - it "can add an absolute path" do - expect(relative + absolute).to be == absolute - end - - it "can remove relative parts" do - expect(absolute + up).to be == subject["/baz"] - end - end - - with "#freeze" do - it "can freeze reference" do - expect(reference.freeze).to be_equal(reference) - expect(reference).to be(:frozen?) - end - end - - with "#with" do - it "can nest paths" do - reference = subject.new("/foo") - expect(reference.path).to be == "/foo" - - nested_resource = reference.with(path: "bar") - expect(nested_resource.path).to be == "/foo/bar" - end - - it "can update path" do - copy = reference.with(path: "foo/bar.html") - expect(copy.path).to be == "/foo/bar.html" - end - - it "can append path components" do - copy = reference.with(path: "foo/").with(path: "bar/") - - expect(copy.path).to be == "/foo/bar/" - end - - it "can append empty path components" do - copy = reference.with(path: "") - - expect(copy.path).to be == reference.path - end - - it "can append parameters" do - copy = reference.with(parameters: {x: 10}) - - expect(copy.parameters).to be == {x: 10} - end - - it "can merge parameters" do - copy = reference.with(parameters: {x: 10}).with(parameters: {y: 20}) - - expect(copy.parameters).to be == {x: 10, y: 20} - end - - it "can copy parameters" do - copy = reference.with(parameters: {x: 10}).with(path: "foo") - - expect(copy.parameters).to be == {x: 10} - expect(copy.path).to be == "/foo" - end - - it "can replace path with absolute path" do - copy = reference.with(path: "foo").with(path: "/bar") - - expect(copy.path).to be == "/bar" - end - - it "can replace path with relative path" do - copy = reference.with(path: "foo").with(path: "../../bar") - - expect(copy.path).to be == "/bar" - end - - with "#query" do - let(:reference) {subject.new("foo/bar/baz.html", "x=10", nil, nil)} - - it "can replace query" do - copy = reference.with(parameters: nil, merge: false) - - expect(copy.parameters).to be_nil - expect(copy.query).to be_nil - end - - it "keeps existing query when merge: false with no parameters" do - copy = reference.with(fragment: "new-fragment", merge: false) - - # Original had no parameters: - expect(copy.parameters).to be_nil - - # Query should be preserved: - expect(copy.query).to be == "x=10" - - # Fragment should be updated: - expect(copy.fragment).to be == "new-fragment" - end - end - - with "parameters and query" do - let(:reference) {subject.new("foo/bar/baz.html", "x=10", nil, {y: 20, z: 30})} - - it "keeps existing parameters and query when merge: false with no new parameters" do - copy = reference.with(fragment: "new-fragment", merge: false) - - # Original parameters preserved: - expect(copy.parameters).to be == {y: 20, z: 30} - - # Query should be preserved: - expect(copy.query).to be == "x=10" - - # Fragment should be updated: - expect(copy.fragment).to be == "new-fragment" - end - end - - with "relative path" do - let(:reference) {subject.new("foo/bar/baz.html", nil, nil, nil)} - - it "can compute new relative path" do - copy = reference.with(path: "../index.html", pop: true) - - expect(copy.path).to be == "foo/index.html" - end - - it "can compute relative path with more uplevels" do - copy = reference.with(path: "../../../index.html", pop: true) - - expect(copy.path).to be == "../index.html" - end - end - end - - with "empty query string" do - let(:reference) {subject.new("/", "", nil, {})} - - it "it should not append query string" do - expect(reference.to_s).not.to be(:include?, "?") - end - - it "can add a relative path" do - result = reference + subject["foo/bar"] - - expect(result.to_s).to be == "/foo/bar" - end - end - - with "empty fragment" do - let(:reference) {subject.new("/", nil, "", nil)} - - it "it should not append query string" do - expect(reference.to_s).not.to be(:include?, "#") - end - end - - describe Protocol::HTTP::Reference.parse("path with spaces/image.jpg") do - it "encodes whitespace" do - expect(subject.to_s).to be == "path%20with%20spaces/image.jpg" - end - end - - describe Protocol::HTTP::Reference.parse("path", array: [1, 2, 3]) do - it "encodes array" do - expect(subject.to_s).to be == "path?array[]=1&array[]=2&array[]=3" - end - end - - describe Protocol::HTTP::Reference.parse("path_with_underscores/image.jpg") do - it "doesn't touch underscores" do - expect(subject.to_s).to be == "path_with_underscores/image.jpg" - end - end - - describe Protocol::HTTP::Reference.parse("index", "my name" => "Bob Dole") do - it "encodes query" do - expect(subject.to_s).to be == "index?my%20name=Bob%20Dole" - end - end - - describe Protocol::HTTP::Reference.parse("index#All Your Base") do - it "encodes fragment" do - expect(subject.to_s).to be == "index\#All%20Your%20Base" - end - end - - describe Protocol::HTTP::Reference.parse("I/❤️/UNICODE", face: "😀") do - it "encodes unicode" do - expect(subject.to_s).to be == "I/%E2%9D%A4%EF%B8%8F/UNICODE?face=%F0%9F%98%80" - end - end - - describe Protocol::HTTP::Reference.parse("foo?bar=10&baz=20", yes: "no") do - it "can use existing query parameters" do - expect(subject.to_s).to be == "foo?bar=10&baz=20&yes=no" - end - end - - describe Protocol::HTTP::Reference.parse("foo#frag") do - it "can use existing fragment" do - expect(subject.fragment).to be == "frag" - expect(subject.to_s).to be == "foo#frag" - end - end -end diff --git a/test/protocol/http/url.rb b/test/protocol/http/url.rb deleted file mode 100644 index ec0088e2..00000000 --- a/test/protocol/http/url.rb +++ /dev/null @@ -1,91 +0,0 @@ -# frozen_string_literal: true - -# Released under the MIT License. -# Copyright, 2019-2025, by Samuel Williams. -# Copyright, 2022, by Herrick Fang. - -require "protocol/http/url" - -ValidParameters = Sus::Shared("valid parameters") do |parameters, query_string = nil| - let(:encoded) {Protocol::HTTP::URL.encode(parameters)} - - if query_string - it "can encode #{parameters.inspect}" do - expect(encoded).to be == query_string - end - end - - let(:decoded) {Protocol::HTTP::URL.decode(encoded)} - - it "can round-trip #{parameters.inspect}" do - expect(decoded).to be == parameters - end -end - -describe Protocol::HTTP::URL do - it_behaves_like ValidParameters, {"foo" => "bar"}, "foo=bar" - it_behaves_like ValidParameters, {"foo" => ["1", "2", "3"]}, "foo[]=1&foo[]=2&foo[]=3" - - it_behaves_like ValidParameters, {"foo" => {"bar" => "baz"}}, "foo[bar]=baz" - it_behaves_like ValidParameters, {"foo" => [{"bar" => "baz"}]}, "foo[][bar]=baz" - - it_behaves_like ValidParameters, {"foo" => [{"bar" => "baz"}, {"bar" => "bob"}]} - - RoundTrippedParameters = Sus::Shared("round-tripped parameters") do - let(:encoded) {Protocol::HTTP::URL.encode(parameters)} - let(:decoded) {Protocol::HTTP::URL.decode(encoded, symbolize_keys: true)} - - it "can round-trip parameters" do - expect(decoded).to be == parameters - end - end - - with "basic parameters" do - let(:parameters) {{x: "10", y: "20"}} - - it_behaves_like RoundTrippedParameters - end - - with "nested parameters" do - let(:parameters) {{things: [{x: "10"}, {x: "20"}]}} - - it_behaves_like RoundTrippedParameters - end - - with "nil values" do - let(:parameters) {{x: nil}} - - it_behaves_like RoundTrippedParameters - end - - with "nil values in arrays" do - let(:parameters) {{x: ["1", nil, "2"]}} - - it_behaves_like RoundTrippedParameters - end - - with ".decode" do - it "fails on deeply nested parameters" do - expect do - Protocol::HTTP::URL.decode("a[b][c][d][e][f][g][h][i]=10") - end.to raise_exception(ArgumentError, message: be =~ /Key length exceeded/) - end - - it "fails with missing key" do - expect do - Protocol::HTTP::URL.decode("=foo") - end.to raise_exception(ArgumentError, message: be =~ /Invalid key/) - end - - it "fails with empty pairs" do - expect(Protocol::HTTP::URL.decode("a=1&&b=2")).to be == {"a" => "1", "b" => "2"} - expect(Protocol::HTTP::URL.decode("a&&b")).to be == {"a" => nil, "b" => nil} - end - end - - with ".unescape" do - it "succeds with hex characters" do - expect(Protocol::HTTP::URL.unescape("%3A")).to be == ":" - end - end -end