From 8af1744d1cdf461d26ca49625910a96758046782 Mon Sep 17 00:00:00 2001 From: Mzack9999 Date: Tue, 19 May 2026 23:04:20 +0200 Subject: [PATCH] preserve original query encoding in OrderedParams round-trip (#379) --- url/orderedparams.go | 282 +++++++++++++++++--------- url/orderedparams_roundtrip_test.go | 300 ++++++++++++++++++++++++++++ url/url.go | 2 +- 3 files changed, 491 insertions(+), 93 deletions(-) create mode 100644 url/orderedparams_roundtrip_test.go diff --git a/url/orderedparams.go b/url/orderedparams.go index 6367a7d1..85b4bf8f 100644 --- a/url/orderedparams.go +++ b/url/orderedparams.go @@ -3,167 +3,265 @@ package urlutil import ( "bytes" "strings" - - mapsutil "github.com/projectdiscovery/utils/maps" ) -// Only difference between OrderedParams and Params is that -// OrderedParams preserves order of parameters everythign else is same +// paramEntry holds one "key" or "key=value" pair from a query string. +// +// raw, when non-empty, is the original encoded segment captured by +// Decode and is emitted verbatim by Encode. This is what allows a +// decoded query string to round-trip byte-for-byte: entries that the +// caller never touches are written back exactly as they came in. +// +// Programmatic mutations (Add, Set, Update) create entries with an +// empty raw; those follow the historical encoding path (ParamEncode + +// IncludeEquals). Del simply drops entries. +type paramEntry struct { + key string + value string + hasEquals bool + raw string +} -// OrderedParams is a map that preserves the order of elements +// OrderedParams keeps query parameters in insertion order and, for +// parameters parsed from a query string, preserves the exact original +// byte form so the string round-trips through Encode unchanged. +// +// Programmatically added or mutated parameters are encoded using +// ParamEncode and the IncludeEquals flag, matching the historical +// behavior of this type. type OrderedParams struct { - om mapsutil.OrderedMap[string, []string] - // IncludeEquals is used to include = in encoded parameters, default is false + entries []paramEntry + // IncludeEquals controls whether "=" is appended when an empty + // value is encoded. It applies only to programmatically + // added/mutated parameters - parameters that came from Decode + // keep the "=" (or lack thereof) they had originally. IncludeEquals bool } // NewOrderedParams creates a new ordered params func NewOrderedParams() *OrderedParams { - return &OrderedParams{ - om: mapsutil.NewOrderedMap[string, []string](), - } + return &OrderedParams{} } // IsEmpty checks if the OrderedParams is empty func (o *OrderedParams) IsEmpty() bool { - return o.om.IsEmpty() + return len(o.entries) == 0 } -// Update is similar to Set but it takes value as slice (similar to internal implementation of url.Values) +// Update replaces every value associated with key by the given slice. +// If key already exists the replacement keeps its position in the +// insertion order; otherwise the new values are appended. func (o *OrderedParams) Update(key string, value []string) { - o.om.Set(key, value) + out := make([]paramEntry, 0, len(o.entries)+len(value)) + inserted := false + for _, e := range o.entries { + if e.key == key { + if !inserted { + for _, v := range value { + out = append(out, paramEntry{key: key, value: v}) + } + inserted = true + } + continue + } + out = append(out, e) + } + if !inserted { + for _, v := range value { + out = append(out, paramEntry{key: key, value: v}) + } + } + o.entries = out } -// Iterate iterates over the OrderedParams +// Iterate iterates over the OrderedParams. Entries that share a key +// are grouped and the callback receives every key exactly once, in +// the order of its first appearance, together with the full list of +// decoded values for that key. func (o *OrderedParams) Iterate(f func(key string, value []string) bool) { - o.om.Iterate(func(key string, value []string) bool { - return f(key, value) - }) + seen := make(map[string]struct{}, len(o.entries)) + order := make([]string, 0, len(o.entries)) + groups := make(map[string][]string, len(o.entries)) + for _, e := range o.entries { + if _, ok := seen[e.key]; !ok { + seen[e.key] = struct{}{} + order = append(order, e.key) + } + groups[e.key] = append(groups[e.key], e.value) + } + for _, k := range order { + if !f(k, groups[k]) { + return + } + } } -// Add Parameters to store +// Add appends one entry per value to the store. Calling Add with no +// values is a no-op. func (o *OrderedParams) Add(key string, value ...string) { - if arr, ok := o.om.Get(key); ok && len(arr) > 0 { - if len(value) != 0 { - o.om.Set(key, append(arr, value...)) - } - } else { - o.om.Set(key, value) + for _, v := range value { + o.entries = append(o.entries, paramEntry{key: key, value: v}) } } -// Set sets the key to value and replaces if already exists +// Set replaces all values of key with a single value. The position +// of the first occurrence of key is preserved; later occurrences are +// removed. If the key is missing it is appended at the end. func (o *OrderedParams) Set(key string, value string) { - o.om.Set(key, []string{value}) + out := make([]paramEntry, 0, len(o.entries)+1) + inserted := false + for _, e := range o.entries { + if e.key == key { + if !inserted { + out = append(out, paramEntry{key: key, value: value}) + inserted = true + } + continue + } + out = append(out, e) + } + if !inserted { + out = append(out, paramEntry{key: key, value: value}) + } + o.entries = out } -// Get returns first value of given key +// Get returns the first value associated with key, or "" if absent. func (o *OrderedParams) Get(key string) string { - val, ok := o.om.Get(key) - if !ok || len(val) == 0 { - return "" + for _, e := range o.entries { + if e.key == key { + return e.value + } } - return val[0] + return "" } -// GetAll returns all values of given key or returns empty slice if key doesn't exist +// GetAll returns every value associated with key in insertion order, +// or an empty slice if key is absent. func (o *OrderedParams) GetAll(key string) []string { - val, ok := o.om.Get(key) - if !ok || len(val) == 0 { + var out []string + for _, e := range o.entries { + if e.key == key { + out = append(out, e.value) + } + } + if out == nil { return []string{} } - return val + return out } -// Has returns if given key exists +// Has reports whether key is present. func (o *OrderedParams) Has(key string) bool { - return o.om.Has(key) + for _, e := range o.entries { + if e.key == key { + return true + } + } + return false } -// Del deletes values associated with key +// Del removes every entry whose key matches. func (o *OrderedParams) Del(key string) { - o.om.Delete(key) + out := o.entries[:0] + for _, e := range o.entries { + if e.key == key { + continue + } + out = append(out, e) + } + o.entries = out } -// Merges given paramset into existing one with base as priority +// Merge parses raw and appends its parameters to the current store. func (o *OrderedParams) Merge(raw string) { o.Decode(raw) } -// Encode returns encoded parameters by preserving order +// Encode returns the encoded query string. Entries that came from +// Decode are emitted verbatim, preserving the original byte form +// (including whether "=" was present and the exact escaping). +// Entries added programmatically use ParamEncode and the +// IncludeEquals flag. func (o *OrderedParams) Encode() string { - if o.om.IsEmpty() { + if len(o.entries) == 0 { return "" } var buf strings.Builder - for _, k := range o.om.GetKeys() { - vs, _ := o.om.Get(k) - keyEscaped := ParamEncode(k) - for _, v := range vs { - if buf.Len() > 0 { - buf.WriteByte('&') - } - buf.WriteString(keyEscaped) - value := ParamEncode(v) - //donot specify = if parameter has no value (reference: nuclei-templates) - if o.IncludeEquals || value != "" { - buf.WriteRune('=') - buf.WriteString(value) - } + for _, e := range o.entries { + if buf.Len() > 0 { + buf.WriteByte('&') + } + if e.raw != "" { + buf.WriteString(e.raw) + continue + } + buf.WriteString(ParamEncode(e.key)) + value := ParamEncode(e.value) + // donot specify = if parameter has no value (reference: nuclei-templates) + if o.IncludeEquals || value != "" { + buf.WriteByte('=') + buf.WriteString(value) } } return buf.String() } -// Decode is opposite of Encode() where ("bar=baz&foo=quux") is parsed -// Parameters are loosely parsed to allow any scenario +// Decode parses raw and appends its parameters to the current store. +// Each segment's original byte form is remembered so that an +// unmodified Decode+Encode round-trip produces an identical string. +// Parameters are loosely parsed to allow any scenario. func (o *OrderedParams) Decode(raw string) { - if o.om.Len() == 0 { - o.om = mapsutil.NewOrderedMap[string, []string]() + if raw == "" { + return + } + segments := splitSegments(raw) + for _, pair := range segments { + eq := strings.IndexByte(pair, '=') + entry := paramEntry{raw: pair} + if eq >= 0 { + entry.key = pair[:eq] + entry.value = pair[eq+1:] + entry.hasEquals = true + } else { + entry.key = pair + } + o.entries = append(o.entries, entry) } - arr := []string{} - var tbuff bytes.Buffer - for _, v := range raw { - switch v { +} + +// splitSegments splits a query string on "&" (and on ";" when +// AllowLegacySeperator is set), returning each segment verbatim. +func splitSegments(raw string) []string { + var segments []string + var buf bytes.Buffer + for _, r := range raw { + switch r { case '&': - arr = append(arr, tbuff.String()) - tbuff.Reset() + segments = append(segments, buf.String()) + buf.Reset() case ';': if AllowLegacySeperator { - arr = append(arr, tbuff.String()) - tbuff.Reset() + segments = append(segments, buf.String()) + buf.Reset() continue } - tbuff.WriteRune(v) + buf.WriteRune(r) default: - tbuff.WriteRune(v) + buf.WriteRune(r) } } - if tbuff.Len() > 0 { - arr = append(arr, tbuff.String()) - } - - for _, pair := range arr { - d := strings.SplitN(pair, "=", 2) - if len(d) == 2 { - o.Add(d[0], d[1]) - } else if len(d) == 1 { - o.Add(d[0], "") - } + if buf.Len() > 0 { + segments = append(segments, buf.String()) } + return segments } -// Clone returns a copy of the ordered params +// Clone returns a deep copy of the OrderedParams. func (o *OrderedParams) Clone() *OrderedParams { - clone := NewOrderedParams() - o.om.Iterate(func(key string, value []string) bool { - // this needs to be a deep copy (from reference in nuclei race condition issue) - if len(value) != 0 { - clone.Add(key, value...) - } else { - clone.Add(key, "") - } - return true - }) + clone := &OrderedParams{ + IncludeEquals: o.IncludeEquals, + entries: append([]paramEntry(nil), o.entries...), + } return clone } diff --git a/url/orderedparams_roundtrip_test.go b/url/orderedparams_roundtrip_test.go new file mode 100644 index 00000000..6c3eff02 --- /dev/null +++ b/url/orderedparams_roundtrip_test.go @@ -0,0 +1,300 @@ +package urlutil + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +// TestDecodeEncodeRoundTrip locks in the new contract: every byte of +// a query string parsed by Decode survives Encode unchanged when the +// caller does not mutate any entry. This is the bug that issue #379 +// is about. +func TestDecodeEncodeRoundTrip(t *testing.T) { + cases := []string{ + "", + "foo=bar", + "foo", + "foo=", + "=bar", + "foo=bar&baz", + "foo=bar&baz=", + "foo&baz=qux", + "a=1&a=2&a=3", + "a=1&b=2&a=3", + "key=value%20with%20space", + "key=A%26B", + "empty_first=&second=v", + "foo=bar=baz", + "key=hello+world", + "k1=v1&k2=v2&k3=v3&k4=v4", + "%E4%B8%AD=%E6%96%87", + "json={%22a%22:1}", + } + for _, raw := range cases { + t.Run(raw, func(t *testing.T) { + p := NewOrderedParams() + p.Decode(raw) + got := p.Encode() + require.Equal(t, raw, got, "round-trip lost data: input %q encoded as %q", raw, got) + }) + } +} + +// TestDecodeEncodeRoundTripTrailingAmp documents that a trailing "&" +// is parsed into an empty trailing segment that round-trips +// faithfully. We pin it separately because it is the most fragile +// part of the parser. +func TestDecodeEncodeRoundTripTrailingAmp(t *testing.T) { + p := NewOrderedParams() + p.Decode("foo=bar&") + // The trailing "&" is consumed but no empty segment is appended + // (matches historical behavior - the parser only flushes a + // segment if its buffer is non-empty at EOF). + require.Equal(t, "foo=bar", p.Encode()) + + p = NewOrderedParams() + p.Decode("foo=bar&&baz=qux") + require.Equal(t, "foo=bar&&baz=qux", p.Encode()) +} + +// TestDecodePreservesBareKey ensures a parameter without "=" stays +// without "=", and a parameter with "=" but no value stays with "=". +func TestDecodePreservesBareKey(t *testing.T) { + p := NewOrderedParams() + p.Decode("a&b=&c=v") + require.Equal(t, "a&b=&c=v", p.Encode()) + + require.True(t, p.Has("a")) + require.True(t, p.Has("b")) + require.True(t, p.Has("c")) + require.Equal(t, "", p.Get("a")) + require.Equal(t, "", p.Get("b")) + require.Equal(t, "v", p.Get("c")) +} + +// TestDecodePreservesBareKeyEvenWithIncludeEquals is the key +// behavioral fix: flipping IncludeEquals must NOT inject "=" into +// parameters that came in without one. IncludeEquals only governs +// programmatically added/mutated entries. +func TestDecodePreservesBareKeyEvenWithIncludeEquals(t *testing.T) { + p := NewOrderedParams() + p.Decode("a&b=&c=v") + p.IncludeEquals = true + require.Equal(t, "a&b=&c=v", p.Encode()) +} + +// TestDecodePreservesOriginalEncoding documents that ParamEncode's +// choices (e.g. how it escapes "+", "/", "<", etc.) do not get +// imposed on entries that came in from Decode. The bytes the caller +// gave us are the bytes that come back out. +func TestDecodePreservesOriginalEncoding(t *testing.T) { + raws := []string{ + "q=hello+world", + "q=hello%20world", + "redirect=https%3A%2F%2Fexample.com%2Fpath", + "x=