From f6da21949f0b0a2707c2d5cb9ded92b297b9ccc7 Mon Sep 17 00:00:00 2001 From: Marcin Romaszewicz Date: Tue, 19 May 2026 06:51:41 -0700 Subject: [PATCH] Percent-encode deepObject parameter wire output Closes: #131 MarshalDeepObject emitted raw bytes for values, map keys, and paramName. Previously masked by oapi-codegen v2.6.0's client re-encoding via url.Values.Encode(); v2.7.0 stopped re-encoding (PR #2307), exposing the bug as 400s against RFC3986-compliant servers and silent query-string injection from '&' in values. The fix passes values, each path segment, and the paramName through url.QueryEscape, matching the v2.6.0-effective behavior. Adds TestDeepObject_URLEncoding (51 sub-cases) covering reserved chars, non-ASCII, emoji, control bytes across values, map keys, and param names. Documents the unmarshal-side ambiguity for literal [ ] in keys (inherent to the deepObject format, matches qs/Rails behavior). --- README.md | 186 +++++++++++++++++++++++++++++++++++++- deepobject.go | 39 ++++++-- deepobject_test.go | 217 +++++++++++++++++++++++++++++++++++++++++++-- styleparam_test.go | 9 +- 4 files changed, 432 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 7392812e..2baa02d3 100644 --- a/README.md +++ b/README.md @@ -3,4 +3,188 @@ ⚠️ This README may be for the latest development version, which may contain unreleased changes. Please ensure you're looking at the README for the latest release version. -This provides any runtime-specific code that the generated code that oapi-codegen generates may need, and therefore is expected to be used with [deepmap/oapi-codegen](https://github.com/deepmap/oapi-codegen). +This package provides helper functions for marshaling and unmarshalling HTTP +parameters in headers, cookies, and query arguments in various formats, as well +as functions for reading and writing form encoded data representing models. + +You won't need to use this package directly, since it's imported be the boilerplate +from `oapi-codegen`, however, you do need to use the correct version needed by +the code generator, since it makes assumptions about runtime behavior. + +## Parameter Handling + +OpenAPI 3.x parameters are characterized by three orthogonal attributes: +**style**, **location**, and **explode**. The serialized form on the wire is +determined by the combination of all three. + +### Styles + +Parameters come in the following styles (all defined by the OpenAPI 3.x spec): + +- **`simple`** — comma-separated values. The default for path and header + parameters. +- **`label`** — values prefixed with `.`, separated by `.` (explode) or `,` + (no explode). Path parameters only. +- **`matrix`** — values prefixed with `;name=`, repeated (explode) or + comma-separated (no explode). Path parameters only. +- **`form`** — `name=value` pairs joined with `&`. The default for query and + cookie parameters. +- **`spaceDelimited`** — array elements joined by literal spaces (no + explode); behaves identically to `form` when exploded. Query parameters, + arrays only. +- **`pipeDelimited`** — array elements joined by literal `|` (no explode); + behaves identically to `form` when exploded. Query parameters, arrays + only. +- **`deepObject`** — nested bracket notation, e.g. `name[field]=value`. + Query parameters, objects only, must be exploded. + +### Locations + +Each style is only valid in specific parameter locations: + +| Location | Allowed styles | +|----------|---------------| +| `path` | `simple`, `label`, `matrix` | +| `query` | `form`, `spaceDelimited`, `pipeDelimited`, `deepObject` | +| `header` | `simple` | +| `cookie` | `form` | + +### Explode + +Each style can be `explode: true` or `explode: false`, which changes how +multi-value parameters (arrays and objects) are packed. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
StyleTypeexplode: falseexplode: true
simpleprimitive 555
array [3,4,5]3,4,53,4,5
object {role:"admin", firstName:"Alex"}firstName,Alex,role,adminfirstName=Alex,role=admin
labelprimitive 5.5.5
array [3,4,5].3,4,5.3.4.5
object {role:"admin", firstName:"Alex"}.firstName,Alex,role,admin.firstName=Alex.role=admin
matrixprimitive 5;id=5;id=5
array [3,4,5];id=3,4,5;id=3;id=4;id=5
object {role:"admin", firstName:"Alex"};id=firstName,Alex,role,admin;firstName=Alex;role=admin
formprimitive 5id=5id=5
array [3,4,5]id=3,4,5id=3&id=4&id=5
object {role:"admin", firstName:"Alex"}id=firstName,Alex,role,adminfirstName=Alex&role=admin
spaceDelimitedprimitivenot supported
array [3,4,5]id=3 4 5id=3&id=4&id=5
objectnot supported
pipeDelimitedprimitivenot supported
array [3,4,5]id=3|4|5id=3&id=4&id=5
objectnot supported
deepObjectprimitivenot supported
array [3,4,5]id[0]=3&id[1]=4&id[2]=5 (explode required)
object {role:"admin", firstName:"Alex"}id[firstName]=Alex&id[role]=admin (explode required)
+ +> The above outputs are shown unescaped for readability. In real use, values +> destined for query parameters are passed through `url.QueryEscape` (or +> `url.PathEscape` for path parameters), so reserved characters and +> non-ASCII bytes are percent-encoded on the wire. + +### Parameter Limitations + +The OpenAPI 3.x parameter styles are convenient but each has at least one +sharp edge. The list below documents behaviors that surprise users and the +cases where round-tripping is not possible in principle. + +#### Encoding + +- **Query and path values are percent-encoded.** Reserved characters + (`&`, `=`, `#`, `?`, etc.) and non-ASCII bytes are escaped via + `url.QueryEscape` / `url.PathEscape`. Spaces in query values are encoded + as `+` (form-urlencoded convention), matching `url.Values.Encode()`. +- **Header values are passed through raw.** Per RFC 7230 §3.2.6, header + field values may contain visible ASCII plus space/tab; bytes ≥ `0x80` are + `obs-text` and explicitly marked obsolete in RFC 9110. There is no + generally-agreed mechanism for transporting non-ASCII text in arbitrary + header values (RFC 8187 covers only header *parameters* like + `Content-Disposition` `filename*=`). If your spec puts non-ASCII or + control characters into a header parameter, the wire format is + RFC-noncompliant and proxies may strip or reject the request. +- **Cookie values are passed through raw.** Per RFC 6265 §4.1.1, cookie + values may not contain `CTL`, whitespace, `"`, `,`, `;`, `\`, or any byte + ≥ `0x80`. Most cookie libraries URL-encode by convention, but this + package does not — if your spec puts spaces or non-ASCII into a cookie + parameter, the value will not be RFC 6265-conformant. +- **Map keys are percent-encoded for `deepObject` only.** For `simple`, + `label`, `matrix`, and `form` styles, map keys are emitted raw. If your + map keys are non-ASCII or contain URL-reserved characters, the wire + representation will not be encoded. +- **`allowReserved`** (`StyleParamOptions.AllowReserved`) is a query-only + option per the OpenAPI 3.x spec, and only applies to *values*, not + parameter names or map keys. + +#### `deepObject` + +- **Bracket notation is structural, not data.** `MarshalDeepObject` + percent-encodes literal `[` and `]` inside values and map keys as `%5B` + / `%5D` on the wire. However, once a server calls `url.ParseQuery`, those + escapes are decoded back to `[` and `]` — at which point a key like + `p[a[b]]` is ambiguous between `{p: {a: {b: ...}}}` and + `{p: {"a[b]": ...}}`. `UnmarshalDeepObject` cannot distinguish these + cases and adopts the same greedy left-to-right parse used by qs (Node), + Rails `Rack::Utils.parse_nested_query`, and similar libraries: every + unescaped `[` opens a new nesting level. **Literal `[` and `]` inside + map keys cannot be round-tripped.** Use a different separator in + user-supplied keys if this matters to you. +- **OpenAPI 3.x defines `deepObject` only for object schemas.** This + package extends it to maps and arrays for convenience (arrays are + numerically indexed: `p[0]`, `p[1]`, …), but other tooling may not + accept that wire form. +- **`deepObject` requires `explode: true`.** Non-exploded `deepObject` has + no well-defined wire format; an error is returned. + +#### `spaceDelimited` / `pipeDelimited` + +- **Array-only.** Per the OpenAPI spec, these styles only apply to arrays + of primitives. Passing a primitive or object returns an error. +- **Exploded form degenerates to `form`.** When `explode: true`, the + separator becomes `&` (not space or pipe), so the output is + byte-identical to `form` exploded. The space/pipe separator only takes + effect when `explode: false`. This is per the OpenAPI spec, but it + surprises many users. +- **Unexploded `spaceDelimited` is RFC-fragile.** Literal spaces in a + query string are tolerated by most parsers but are not RFC 3986- + conformant; `+` would be the form-urlencoded canonical form for space, + but `spaceDelimited` is defined to use literal `%20`-equivalent space + (the value bytes themselves are then encoded normally). + +#### Type-conversion edge cases + +- **`null`** in a struct field marshals to the literal string `"null"` in + `deepObject` output. There is no distinct OpenAPI representation for + absent vs. JSON-null in query parameters. +- **`time.Time` and `types.Date`** are formatted via RFC 3339 and + `2006-01-02` respectively when used as primitives in any style. If you + want a different format, wrap the field in a type that implements + `encoding.TextMarshaler`. +- **`types.UUID`** stringifies to the canonical hyphenated 36-character + form; query/path location escaping is a no-op (UUIDs are in the + unreserved set). +- **`json.Marshaler` is consulted for structs**, then the result is + re-decoded with `UseNumber()` and re-styled. Numbers therefore retain + their original precision, but the round-trip through JSON means struct + field tags are honored (not raw Go field names). + diff --git a/deepobject.go b/deepobject.go index 10a9c211..ac19386f 100644 --- a/deepobject.go +++ b/deepobject.go @@ -54,18 +54,24 @@ func marshalDeepObject(in interface{}, path []string) ([]string, error) { default: // Now, for a concrete value, we will turn the path elements // into a deepObject style set of subscripts. [a, b, c] turns into - // [a][b][c] - prefix := "[" + strings.Join(path, "][") + "]" + // [a][b][c]. Path segments may originate from user-controlled map + // keys, so each segment is percent-encoded; the literal '[' and ']' + // remain as structural delimiters. + encoded := make([]string, len(path)) + for i, p := range path { + encoded[i] = url.QueryEscape(p) + } + prefix := "[" + strings.Join(encoded, "][") + "]" var value string if t == nil { value = "null" } else { - value = fmt.Sprintf("%v", t) + value = url.QueryEscape(fmt.Sprintf("%v", t)) } result = []string{ - prefix + fmt.Sprintf("=%s", value), + prefix + "=" + value, } } return result, nil @@ -93,9 +99,12 @@ func MarshalDeepObject(i interface{}, paramName string) (string, error) { return "", fmt.Errorf("error traversing JSON structure: %w", err) } - // Prefix the param name to each subscripted field. + // Prefix the param name to each subscripted field. The param name is + // percent-encoded to keep the wire output ASCII-clean even if the spec + // declares a non-identifier parameter name. + encodedParamName := url.QueryEscape(paramName) for i := range fields { - fields[i] = paramName + fields[i] + fields[i] = encodedParamName + fields[i] } return strings.Join(fields, "&"), nil } @@ -135,6 +144,24 @@ func makeFieldOrValue(paths [][]string, values []string) fieldOrValue { return f } +// UnmarshalDeepObject decodes deepObject-style query parameters (e.g. +// `filter[name]=alice&filter[role]=admin`) into dst, using paramName as the +// outer prefix. +// +// Encoding: MarshalDeepObject percent-encodes values, map keys, and the +// param name itself, so any byte sequence — including non-ASCII text and URL +// reserved characters in values or in map keys — round-trips correctly. The +// '[' and ']' characters that appear at the top level of each fragment are +// always structural; literal brackets inside data are encoded as %5B/%5D on +// the wire. +// +// Known limitation: literal '[' or ']' inside map keys cannot be round- +// tripped. After url.ParseQuery decodes %5B/%5D back to '['/']', the parser +// cannot distinguish a structural bracket from a literal bracket that was +// part of a key (e.g. `p[a[b]]` is ambiguous between `{p: {a: {b: ...}}}` +// and `{p: {"a[b]": ...}}`). This matches the behavior of every other +// deepObject implementation (qs, Rails, PHP); the deepObject style itself +// does not define an escape mechanism for brackets inside keys. func UnmarshalDeepObject(dst interface{}, paramName string, params url.Values) error { return unmarshalDeepObject(dst, paramName, params, false) } diff --git a/deepobject_test.go b/deepobject_test.go index 7425a9f0..3855600f 100644 --- a/deepobject_test.go +++ b/deepobject_test.go @@ -2,9 +2,11 @@ package runtime import ( "net/url" + "reflect" "strings" "testing" "time" + "unicode" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -102,15 +104,14 @@ func TestDeepObject(t *testing.T) { marshaled, err := MarshalDeepObject(srcObj, "p") require.NoError(t, err) - require.EqualValues(t, "p[ab][0]=true&p[ao][0][Foo]=bar&p[ao][0][Is]=true&p[ao][1][Foo]=baz&p[ao][1][Is]=false&p[aop][0][Foo]=a&p[aop][1][Foo]=b&p[aop][1][count]=2&p[as][0]=hello&p[as][1]=world&p[b]=true&p[d]=2020-02-01&p[f]=4.2&p[i]=12&p[m][additional]=1&p[o][ID]=456&p[o][Name]=Joe Schmoe&p[oas][0]=foo&p[oas][1]=bar&p[ob]=true&p[od]=2020-02-01&p[of]=3.7&p[oi]=5&p[om][additional]=1&p[onas][names][0]=Bill&p[onas][names][1]=Frank&p[oo][ID]=123&p[oo][Name]=Marcin Romaszewicz", marshaled) + // Spaces in values are encoded as '+' per url.QueryEscape (form-encoded + // convention, matching the pre-v2.7.0 behavior of url.Values.Encode()). + require.EqualValues(t, "p[ab][0]=true&p[ao][0][Foo]=bar&p[ao][0][Is]=true&p[ao][1][Foo]=baz&p[ao][1][Is]=false&p[aop][0][Foo]=a&p[aop][1][Foo]=b&p[aop][1][count]=2&p[as][0]=hello&p[as][1]=world&p[b]=true&p[d]=2020-02-01&p[f]=4.2&p[i]=12&p[m][additional]=1&p[o][ID]=456&p[o][Name]=Joe+Schmoe&p[oas][0]=foo&p[oas][1]=bar&p[ob]=true&p[od]=2020-02-01&p[of]=3.7&p[oi]=5&p[om][additional]=1&p[onas][names][0]=Bill&p[onas][names][1]=Frank&p[oo][ID]=123&p[oo][Name]=Marcin+Romaszewicz", marshaled) - params := make(url.Values) - marshaledParts := strings.Split(marshaled, "&") - for _, p := range marshaledParts { - parts := strings.Split(p, "=") - require.Equal(t, 2, len(parts)) - params.Set(parts[0], parts[1]) - } + // Use url.ParseQuery for the round-trip — it decodes percent-escapes and + // '+' back to literal characters, matching what a real HTTP server does. + params, err := url.ParseQuery(marshaled) + require.NoError(t, err) var dstObj AllFields err = UnmarshalDeepObject(&dstObj, "p", params) @@ -242,3 +243,203 @@ func TestDeepObject_NonIndexedArray(t *testing.T) { assert.Equal(t, []string{"a", "b"}, dst.Vals) }) } + +// assertDeepObjectWireSafe asserts the marshaled deepObject output is safe to +// use on the wire and round-trips correctly. Three invariants: +// +// 1. Every byte is 7-bit ASCII. RFC 3986 requires non-ASCII bytes to be +// percent-encoded; Go's net/url is lenient and won't surface the bug, so +// we check the bytes directly. +// 2. The output survives a full URL round trip: +// URL.RawQuery → URL.String() → url.Parse() → URL.Query() +// This catches characters (notably '#') that ParseQuery accepts in +// isolation but that break when the string is assigned to URL.RawQuery +// and re-parsed as part of a full URL. +// 3. Re-parsed values + UnmarshalDeepObject reconstruct the original. +func assertDeepObjectWireSafe(t *testing.T, marshaled, paramName string, dst, src interface{}) { + t.Helper() + + if i := strings.IndexFunc(marshaled, func(r rune) bool { + return r > unicode.MaxASCII + }); i >= 0 { + t.Fatalf("non-ASCII rune at byte offset %d; deepObject output must be percent-encoded; raw=%q", i, marshaled) + } + + u, err := url.Parse("http://example.test/path") + require.NoError(t, err) + u.RawQuery = marshaled + roundtrippedURL := u.String() + + parsedURL, err := url.Parse(roundtrippedURL) + require.NoError(t, err, "URL round-trip failed; raw=%q url=%q", marshaled, roundtrippedURL) + require.Empty(t, parsedURL.Fragment, + "unexpected URL fragment %q after round-trip — unescaped '#' leaked into RawQuery; raw=%q", + parsedURL.Fragment, marshaled) + + parsedQuery := parsedURL.Query() + for k := range parsedQuery { + require.True(t, strings.HasPrefix(k, paramName+"["), + "unexpected top-level key %q after URL round-trip; raw=%q", k, marshaled) + } + + err = UnmarshalDeepObject(dst, paramName, parsedQuery) + require.NoError(t, err, "unmarshal failed; raw=%q", marshaled) + assert.Equal(t, src, reflect.ValueOf(dst).Elem().Interface(), + "round-trip mismatch; raw=%q", marshaled) +} + +// TestDeepObject_URLEncoding pins issue +// https://github.com/oapi-codegen/runtime/issues/131: the marshaller must +// percent-encode reserved characters and non-ASCII bytes in values, in map +// keys, and in the param name itself. Without this, an `&` in a value +// silently injects an extra query parameter and non-ASCII content produces +// invalid URIs that RFC3986-compliant servers reject with 400. +func TestDeepObject_URLEncoding(t *testing.T) { + type Inner struct { + Name string `json:"name"` + } + type Outer struct { + Field string `json:"field"` + Inner Inner `json:"inner"` + } + + // Adversarial value cases. Each case puts the bad string in Field, with a + // safe constant value in Inner.Name so we can also detect injection (a + // stray top-level "inner.name=safe" pair would prove the value escaped its + // enclosing param). + valueCases := []struct { + name string + val string + }{ + {"empty", ""}, + {"plain ascii", "Alex"}, + {"space", "a b c"}, + {"ampersand", "a&admin=true"}, + {"equals", "k=v"}, + {"question mark", "what?"}, + {"hash fragment", "before#after"}, + {"plus literal", "1+2=3"}, + {"semicolon", "a;b;c"}, + {"comma", "a,b,c"}, + {"slash", "a/b/c"}, + {"colon", "key:value"}, + {"at sign", "user@host"}, + {"percent literal", "100%"}, + {"square brackets", "[bracketed]"}, + {"curly braces", "{json}"}, + {"reserved kitchen sink", "&=?#+;,/:@!$'()*[]"}, + {"single quote", "it's"}, + {"double quote", `say "hi"`}, + {"backtick", "`code`"}, + {"backslash", `a\b`}, + {"tab", "a\tb"}, + {"newline", "a\nb"}, + {"carriage return", "a\rb"}, + {"null byte", "a\x00b"}, + {"cjk hiragana", "こんにちは"}, + {"cjk han", "你好世界"}, + {"cyrillic", "Привет"}, + {"arabic", "مرحبا"}, + {"hebrew", "שלום"}, + {"thai", "สวัสดี"}, + {"emoji 4-byte", "🚀"}, + {"emoji zwj sequence", "👨‍👩‍👧‍👦"}, + {"combining accents", "é"}, // e + combining acute + {"surrogate-range", "\U0001F600\U0001F601"}, + {"mixed unicode and reserved", "filter&q=こんにちは"}, + {"long repeated reserved", strings.Repeat("&", 64)}, + } + + for _, tc := range valueCases { + t.Run("value/"+tc.name, func(t *testing.T) { + src := Outer{Field: tc.val, Inner: Inner{Name: "safe"}} + + marshaled, err := MarshalDeepObject(src, "p") + require.NoError(t, err) + + var dst Outer + assertDeepObjectWireSafe(t, marshaled, "p", &dst, src) + }) + } + + // Adversarial map-key cases. JSON-encoded structs cannot produce + // arbitrary keys, but map[string]string can, and that's a real user + // pattern (filters[name]=... where the filter key comes from a user). + keyCases := []struct { + name string + key string + }{ + {"plain ascii", "name"}, + {"space in key", "first name"}, + {"ampersand in key", "a&b"}, + {"equals in key", "a=b"}, + {"dollar prefix", "$eq"}, + {"non-ascii key", "名前"}, + {"emoji key", "🔑"}, + {"reserved kitchen sink key", "&=?#+;,/:@"}, + } + + for _, tc := range keyCases { + t.Run("key/"+tc.name, func(t *testing.T) { + src := map[string]string{tc.key: "safe"} + + marshaled, err := MarshalDeepObject(src, "p") + require.NoError(t, err) + + var dst map[string]string + assertDeepObjectWireSafe(t, marshaled, "p", &dst, src) + }) + } + + // Adversarial param-name. The codegen emits the parameter name as + // declared in the OpenAPI spec, which is usually a safe identifier, but + // nothing in the spec forbids spaces or non-ASCII names — and the + // marshaller prepends the name to each fragment without escaping. + paramNameCases := []string{ + "plain", + "with space", + "with&", + "フィルター", + "🔥", + } + for _, pn := range paramNameCases { + t.Run("paramName/"+pn, func(t *testing.T) { + src := map[string]string{"name": "safe"} + + marshaled, err := MarshalDeepObject(src, pn) + require.NoError(t, err) + + // Marshaled output must be 7-bit ASCII regardless of paramName + // content. We can't use the full helper here because the helper + // checks for `paramName+"["` literally in parsed keys; once + // encoded, the prefix is the percent-encoded form. Just check the + // ASCII invariant and that the output parses. + if i := strings.IndexFunc(marshaled, func(r rune) bool { + return r > unicode.MaxASCII + }); i >= 0 { + t.Fatalf("non-ASCII rune at byte offset %d in paramName-prefixed output; raw=%q", i, marshaled) + } + + u, err := url.Parse("http://example.test/path") + require.NoError(t, err) + u.RawQuery = marshaled + parsedURL, err := url.Parse(u.String()) + require.NoError(t, err, "URL round-trip failed; raw=%q", marshaled) + require.Empty(t, parsedURL.Fragment, "unexpected fragment; raw=%q", marshaled) + }) + } + + // Non-ASCII assertion: the issue text spells out the exact expected + // percent-encoded form for "こんにちは". Lock that in so we don't drift to + // some other encoder. + t.Run("non-ascii uses utf-8 percent-encoding (RFC3986 §2.5)", func(t *testing.T) { + src := map[string]string{"name": "こんにちは"} + marshaled, err := MarshalDeepObject(src, "p") + require.NoError(t, err) + // "こんにちは" → E3 81 93 E3 82 93 E3 81 AB E3 81 A1 E3 81 AF. + // Each byte becomes %XX; url.QueryEscape uses uppercase hex. + assert.Contains(t, marshaled, + "%E3%81%93%E3%82%93%E3%81%AB%E3%81%A1%E3%81%AF", + "expected UTF-8 percent-encoded value; got %q", marshaled) + }) +} diff --git a/styleparam_test.go b/styleparam_test.go index 3fe6a298..4ecbc6c1 100644 --- a/styleparam_test.go +++ b/styleparam_test.go @@ -882,14 +882,15 @@ func TestStyleParamNameEncoding(t *testing.T) { assert.EqualValues(t, ";id=5", result) }) - t.Run("deepObject param name not yet encoded", func(t *testing.T) { - // NOTE: MarshalDeepObject handles its own serialization and does not - // currently encode param names. This documents the current behavior. + t.Run("deepObject param name is encoded", func(t *testing.T) { + // MarshalDeepObject percent-encodes the param name so non-identifier + // names (e.g. those containing reserved URI chars) produce wire-safe + // output. See issue #131. type Obj struct { Name string `json:"name"` } result, err := StyleParamWithOptions("deepObject", true, "filter[]", Obj{Name: "foo"}, opts) assert.NoError(t, err) - assert.EqualValues(t, "filter[][name]=foo", result) + assert.EqualValues(t, "filter%5B%5D[name]=foo", result) }) }