diff --git a/QsNet/Internal/Encoder.cs b/QsNet/Internal/Encoder.cs
index dd08133..8b60f97 100644
--- a/QsNet/Internal/Encoder.cs
+++ b/QsNet/Internal/Encoder.cs
@@ -1,7 +1,9 @@
using System;
using System.Collections;
using System.Collections.Generic;
+using System.Globalization;
using System.Linq;
+using System.Runtime.CompilerServices;
using System.Text;
using QsNet.Enums;
using QsNet.Models;
@@ -11,34 +13,144 @@ namespace QsNet.Internal;
///
/// A helper class for encoding data into a query string format.
///
+///
+///
+/// Performance notes : This type sits on hot paths. It relies on Utils.Encode for percent-encoding.
+/// The UTF-8 encoder path uses precomputed ASCII lookup tables for RFC 3986/1738 unreserved sets to fast-scan
+/// ASCII and avoid per-char predicate cost. Latin-1 branches are intentionally left unchanged to preserve legacy
+/// behavior and measurements.
+///
+///
+/// Semantics : RFC3986 by default; RFC1738 only maps space to '+' (other bytes identical). When list
+/// format is comma , the separator comma between elements is written literally and never re-encoded; commas
+/// originating inside element values are encoded as "%2C". When allowDots and encodeDotInKeys are
+/// both true, '.' in keys is encoded as "%2E" to avoid ambiguity.
+///
+///
+/// Safety : The implementation avoids unsafe code. If an unsafe micro-optimization is
+/// considered in the future, only add it when dedicated benchmarks show a real win and all unit/compat tests pass.
+/// Encoding semantics must remain identical.
+///
+/// Thread-safety : Stateless; safe to use concurrently.
+///
+/// Benchmarks : See UtilsEncodeBenchmarks . Any change here or in Utils.Encode should be
+/// validated against the UTF-8 and Latin-1 datasets (ascii-safe, latin1-fallback, reserved-heavy, utf8-mixed) to
+/// prevent regressions.
+///
+///
internal static class Encoder
{
private static readonly Formatter IdentityFormatter = s => s;
///
- /// Encodes the given data into a query string format.
+ /// Converts to a culture-invariant string.
+ /// Booleans become "true"/"false"; numeric types use InvariantCulture; null becomes an empty string.
///
- /// The data to encode; can be any type.
- /// If true, will not encode undefined values.
- /// A dictionary for tracking cyclic references.
- /// An optional prefix for the encoded string.
- /// A generator for array prefixes.
- /// If true, uses comma for array encoding.
- /// If true, allows empty lists in the output.
- /// If true, handles nulls strictly.
- /// If true, skips null values in the output.
- /// If true, encodes dots in keys.
- /// An optional custom encoder function.
- /// An optional date serializer function.
- /// An optional sorter for keys.
- /// An optional filter to apply to the data.
- /// If true, allows dots in keys.
- /// The format to use for encoding (default is RFC3986).
- /// A custom formatter function.
- /// If true, only encodes values without keys.
- /// The character encoding to use (default is UTF-8).
- /// If true, adds a '?' prefix to the output.
- /// The encoded result.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static string ToInvariantString(object? value)
+ {
+ if (value is null) return string.Empty;
+ return value switch
+ {
+ bool b => b ? "true" : "false",
+ sbyte v => v.ToString(CultureInfo.InvariantCulture),
+ byte v => v.ToString(CultureInfo.InvariantCulture),
+ short v => v.ToString(CultureInfo.InvariantCulture),
+ ushort v => v.ToString(CultureInfo.InvariantCulture),
+ int v => v.ToString(CultureInfo.InvariantCulture),
+ uint v => v.ToString(CultureInfo.InvariantCulture),
+ long v => v.ToString(CultureInfo.InvariantCulture),
+ ulong v => v.ToString(CultureInfo.InvariantCulture),
+ float v => v.ToString(CultureInfo.InvariantCulture),
+ double v => v.ToString(CultureInfo.InvariantCulture),
+ decimal v => v.ToString(CultureInfo.InvariantCulture),
+ char ch => ch.ToString(),
+ _ => value.ToString() ?? string.Empty
+ };
+ }
+
+ // Encodes a single element for the comma-join fast path.
+ // - Uses the provided encoder (or Utils.Encode) according to `format` and `cs`.
+ // - The comma separator between elements is appended by the caller and is never re‑encoded.
+ // - Any commas that originate *inside* a value are percent-encoded as "%2C" to preserve round‑trip semantics.
+ // - RFC3986 is the default; RFC1738 only changes space handling (space => '+').
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void AppendCommaEncodedValue(
+ StringBuilder sb,
+ object? value,
+ Encoding cs,
+ Format format,
+ ValueEncoder? encoder
+ )
+ {
+ var encoded = encoder != null ? encoder(value, cs, format) : Utils.Encode(value, cs, format);
+
+#if NETSTANDARD2_0
+ if (encoded.IndexOf(',') >= 0)
+ encoded = encoded.Replace(",", "%2C"); // commas inside values must be encoded
+#else
+ if (encoded.Contains(',', StringComparison.Ordinal))
+ encoded = encoded.Replace(",", "%2C", StringComparison.Ordinal);
+#endif
+
+ sb.Append(encoded);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static bool IsLeaf(object? v, bool skipNulls)
+ {
+ if (v is null) return skipNulls;
+ return v is string || v is byte[] || Utils.IsNonNullishPrimitive(v, skipNulls);
+ }
+
+ ///
+ /// Encodes into query-string fragments.
+ /// Returns either a single "key=value" fragment (as a string), a sequence of fragments (as an IEnumerable boxed as
+ /// object),
+ /// or an empty array when nothing should be emitted. Callers are expected to flatten and join with '&'.
+ ///
+ /// The value to encode; may be any object, dictionary, list/array, or primitive.
+ /// If true, treats the current value as logically undefined (missing) and emits nothing.
+ ///
+ /// Cycle-detection frame used across recursion; pass the current frame to detect
+ /// self-references.
+ ///
+ /// Optional prefix for the current key path (e.g., an existing query or parent key).
+ /// Function that produces the key for array elements (indices, brackets, or comma mode).
+ ///
+ /// When using the comma list format, if true, appends "[]" to the key for single-element
+ /// arrays to preserve round‑trip parsing.
+ ///
+ /// If true, encodes empty lists as "key[]"; otherwise, empty lists produce no output.
+ /// If true, encodes null as the bare key (e.g., "k"); otherwise encodes as "k=".
+ /// If true, omits pairs whose value is null; also enables a leaf fast-path for cycle detection.
+ ///
+ /// If true and is true, encodes '.' in keys as "%2E"
+ /// to avoid ambiguity.
+ ///
+ /// Optional custom value encoder; when null, falls back to Utils.Encode .
+ ///
+ /// Optional serializer for values (ISO 8601 by default); applied to
+ /// comma arrays as well.
+ ///
+ /// Optional key sort comparer; when null, a faster unsorted path is used.
+ ///
+ /// Optional filter. If a FunctionFilter , it's applied to the current object/value; if an
+ /// IterableFilter , its iterable provides the key set.
+ ///
+ ///
+ /// If true, uses dotted notation for object navigation (e.g., "a.b"); otherwise uses bracket
+ /// notation (e.g., "a[b]").
+ ///
+ /// Target escaping rules (RFC3986 by default; RFC1738 maps spaces to '+').
+ /// Post-processing applied to each emitted string fragment; default is identity.
+ /// If true, values are encoded but keys are not passed to .
+ /// Character encoding for the encoder (UTF-8 by default).
+ /// If true, prepends '?' to the very first fragment (useful for top-level calls).
+ ///
+ /// A string fragment, a sequence of fragments, or an empty array when no output is produced. The caller is responsible
+ /// for joining with '&'.
+ ///
public static object Encode(
object? data,
bool undefined,
@@ -66,33 +178,39 @@ public static object Encode(
var cs = charset ?? Encoding.UTF8;
var gen = generateArrayPrefix ?? ListFormat.Indices.GetGenerator();
- var isCommaGen = ReferenceEquals(gen, ListFormat.Comma.GetGenerator());
+ var commaGen = ListFormat.Comma.GetGenerator();
+ var isCommaGen = gen == commaGen;
var crt = commaRoundTrip ?? isCommaGen;
var keyPrefixStr = prefix ?? (addQueryPrefix ? "?" : "");
var obj = data;
+ // Only encode '.' when both AllowDots and EncodeDotInKeys are true (preserves legacy behavior when AllowDots == false).
+ var dotsAndEncode = allowDots && encodeDotInKeys;
var objKey = data; // identity key
var tmpSc = sideChannel;
var step = 0;
var found = false;
- while (!found)
- {
- tmpSc = tmpSc.Parent;
- if (tmpSc is null)
- break;
- step++;
- if (objKey is not null && tmpSc.TryGet(objKey, out var pos))
+ // Fast path (#3): skip cycle detection when the current value is a leaf.
+ // Leaves never recurse, so they can’t participate in cycles.
+ if (!IsLeaf(data, skipNulls))
+ while (!found)
{
- if (pos == step)
- throw new InvalidOperationException("Cyclic object value");
- found = true;
- }
+ tmpSc = tmpSc.Parent;
+ if (tmpSc is null)
+ break;
+ step++;
+ if (objKey is not null && tmpSc.TryGet(objKey, out var pos))
+ {
+ if (pos == step)
+ throw new InvalidOperationException("Cyclic object value");
+ found = true;
+ }
- if (tmpSc.Parent is null)
- step = 0;
- }
+ if (tmpSc.Parent is null)
+ step = 0;
+ }
if (filter is FunctionFilter ff)
obj = ff.Function(keyPrefixStr, obj);
@@ -127,22 +245,17 @@ public static object Encode(
{
if (encoder == null)
{
- var s = obj switch
- {
- bool b => b ? "true" : "false",
- _ => obj?.ToString() ?? ""
- };
+ var s = ToInvariantString(obj);
return $"{fmt(keyPrefixStr)}={fmt(s)}";
}
- var keyPart = encodeValuesOnly ? keyPrefixStr : encoder(keyPrefixStr, null, null);
- var valuePart = encoder(obj, null, null);
+ var keyPart = encodeValuesOnly ? keyPrefixStr : encoder(keyPrefixStr, cs, format);
+ var valuePart = encoder(obj, cs, format);
return $"{fmt(keyPart)}={fmt(valuePart)}";
}
- var values = new List();
if (undefined)
- return values;
+ return Array.Empty();
// Detect sequence once and cache materialization for index access / counts
var isSeq = false;
@@ -150,19 +263,252 @@ public static object Encode(
if (obj is IEnumerable seq0 and not string and not IDictionary)
{
isSeq = true;
- seqList = seq0.Cast().ToList();
+ if (obj is List already)
+ seqList = already;
+ else
+ seqList = seq0.Cast().ToList();
+ }
+
+ // Fast path (#1): when no sorting is requested, avoid building objKeys and
+ // iterate the structure directly to eliminate extra allocations and lookups.
+ if (sort == null && !(isCommaGen && obj is IEnumerable and not string and not IDictionary) &&
+ filter is not IterableFilter)
+ {
+#if NETSTANDARD2_0
+ // Intentionally gate on encodeDotInKeys only to preserve legacy behavior when AllowDots = false
+ var encodedPrefixFast = encodeDotInKeys && keyPrefixStr.IndexOf('.') >= 0
+ ? keyPrefixStr.Replace(".", "%2E")
+ : keyPrefixStr;
+#else
+ // Intentionally gate on encodeDotInKeys only to preserve legacy behavior when AllowDots = false
+ var encodedPrefixFast = encodeDotInKeys && keyPrefixStr.Contains('.', StringComparison.Ordinal)
+ ? keyPrefixStr.Replace(".", "%2E", StringComparison.Ordinal)
+ : keyPrefixStr;
+#endif
+ var adjustedPrefixFast =
+ crt && isSeq && seqList is { Count: 1 }
+ ? $"{encodedPrefixFast}[]"
+ : encodedPrefixFast;
+
+ if (allowEmptyLists && isSeq && seqList is { Count: 0 })
+ return $"{adjustedPrefixFast}[]";
+
+ // Fast path (#5): mark side-channel once per parent instead of per child
+ var markSideChannelFast = objKey is not null && (obj is IDictionary || isSeq);
+ if (markSideChannelFast)
+ sideChannel.Set(objKey!, step);
+
+ List valuesFast;
+
+ void AddKv(object? keyObj, object? val)
+ {
+ if (skipNulls && val is null)
+ return;
+
+ var keyStr = keyObj?.ToString() ?? string.Empty;
+ var encodedKey = keyStr;
+#if NETSTANDARD2_0
+ if (dotsAndEncode && keyStr.IndexOf('.') >= 0)
+ encodedKey = keyStr.Replace(".", "%2E");
+#else
+ if (dotsAndEncode && keyStr.Contains('.', StringComparison.Ordinal))
+ encodedKey = keyStr.Replace(".", "%2E", StringComparison.Ordinal);
+#endif
+ var keyPrefixFast =
+ isSeq
+ ? gen(adjustedPrefixFast, encodedKey)
+ : allowDots
+ ? $"{adjustedPrefixFast}.{encodedKey}"
+ : $"{adjustedPrefixFast}[{encodedKey}]";
+
+ // Removed per-iteration sideChannel.Set
+
+ var childSc = IsLeaf(val, skipNulls) ? sideChannel : new SideChannelFrame(sideChannel);
+
+ var encoded = Encode(
+ val,
+ false,
+ childSc,
+ keyPrefixFast,
+ gen,
+ crt,
+ allowEmptyLists,
+ strictNullHandling,
+ skipNulls,
+ encodeDotInKeys,
+ encoder,
+ serializeDate,
+ sort,
+ filter,
+ allowDots,
+ format,
+ fmt,
+ encodeValuesOnly,
+ cs,
+ addQueryPrefix
+ );
+
+ switch (encoded)
+ {
+ case List enList:
+ valuesFast.AddRange(enList);
+ break;
+ case IEnumerable en and not string:
+ {
+ foreach (var item in en)
+ valuesFast.Add(item);
+ break;
+ }
+ default:
+ valuesFast.Add(encoded);
+ break;
+ }
+ }
+
+ switch (obj)
+ {
+ case IDictionary dObj:
+ valuesFast = new List(dObj.Count);
+ foreach (var kv in dObj)
+ AddKv(kv.Key, kv.Value);
+ return valuesFast;
+ case IDictionary dStr:
+ valuesFast = new List(dStr.Count);
+ foreach (var kv in dStr)
+ AddKv(kv.Key, kv.Value);
+ return valuesFast;
+ case IDictionary map:
+ valuesFast = new List(map.Count);
+ foreach (DictionaryEntry de in map)
+ AddKv(de.Key, de.Value);
+ return valuesFast;
+ case Array arr:
+ valuesFast = new List(arr.Length);
+ for (var i = 0; i < arr.Length; i++)
+ AddKv(i, arr.GetValue(i));
+ return valuesFast;
+ case IList list:
+ valuesFast = new List(list.Count);
+ for (var i = 0; i < list.Count; i++)
+ AddKv(i, list[i]);
+ return valuesFast;
+ default:
+ if (isSeq && seqList != null)
+ {
+ valuesFast = new List(seqList.Count);
+ for (var i = 0; i < seqList.Count; i++)
+ AddKv(i, seqList[i]);
+ return valuesFast;
+ }
+
+ break;
+ }
+ // If we fall through (very uncommon), continue with the generic path below.
+ }
+
+ // Fast path (#2): comma-joined arrays -> build the joined value once and short-circuit the generic path.
+ if (isCommaGen && obj is IEnumerable enumerableC and not string and not IDictionary && sort == null &&
+ filter is not IterableFilter)
+ {
+#if NETSTANDARD2_0
+ // Intentionally gate on encodeDotInKeys only to preserve legacy behavior when AllowDots = false
+ var encodedPrefixC = encodeDotInKeys && keyPrefixStr.IndexOf('.') >= 0
+ ? keyPrefixStr.Replace(".", "%2E")
+ : keyPrefixStr;
+#else
+ // Intentionally gate on encodeDotInKeys only to preserve legacy behavior when AllowDots = false
+ var encodedPrefixC = encodeDotInKeys && keyPrefixStr.Contains('.', StringComparison.Ordinal)
+ ? keyPrefixStr.Replace(".", "%2E", StringComparison.Ordinal)
+ : keyPrefixStr;
+#endif
+ // Materialize once for count checks and iteration
+ var listC = seqList ?? enumerableC.Cast().ToList();
+ var adjustedPrefixC = crt && listC.Count == 1 ? $"{encodedPrefixC}[]" : encodedPrefixC;
+
+ // Honor empty list handling semantics
+ if (allowEmptyLists && listC.Count == 0)
+ return $"{adjustedPrefixC}[]";
+ if (listC.Count == 0)
+ return Array.Empty();
+
+ string joinedC;
+ if (encodeValuesOnly && encoder != null)
+ {
+ // Stream-encode each element and append literal commas between them.
+ var sbJoined = new StringBuilder(listC.Count * 8);
+ for (var i = 0; i < listC.Count; i++)
+ {
+ if (i > 0)
+ sbJoined.Append(
+ ','); // The separator comma is literal and never re-encoded; only commas originating inside element values become "%2C".
+ AppendCommaEncodedValue(sbJoined, listC[i], cs, format, encoder);
+ }
+
+ joinedC = sbJoined.ToString();
+
+ // Match legacy semantics: if the joined value is empty, treat it like `null`.
+ if (!string.IsNullOrEmpty(joinedC)) return $"{fmt(adjustedPrefixC)}={fmt(joinedC)}";
+ if (skipNulls)
+ return Array.Empty();
+
+ if (strictNullHandling)
+ return !encodeValuesOnly
+ ? fmt(encoder(adjustedPrefixC, cs, format))
+ : adjustedPrefixC;
+ // not strict: fall through to return `key=` below
+
+ // In values-only mode we do not encode the key via `encoder`.
+ return $"{fmt(adjustedPrefixC)}={fmt(joinedC)}";
+ }
+
+ // Join raw string representations; apply encoder to the full result if provided.
+ var tmp = new List(listC.Count);
+ foreach (var el in listC)
+ tmp.Add(ToInvariantString(el));
+ joinedC = string.Join(",", tmp);
+
+ // Match legacy semantics: if the joined value is empty, treat it like `null`.
+ if (string.IsNullOrEmpty(joinedC))
+ {
+ if (skipNulls)
+ return Array.Empty();
+
+ if (strictNullHandling)
+ return encoder != null && !encodeValuesOnly
+ ? fmt(encoder(adjustedPrefixC, cs, format))
+ : adjustedPrefixC;
+ // not strict: fall through to return `key=` below
+ }
+
+ if (encoder == null) return $"{fmt(adjustedPrefixC)}={fmt(joinedC)}";
+ var keyPartC = encoder(adjustedPrefixC, cs, format);
+ var valuePartC = encoder(joinedC, cs, format);
+ return $"{fmt(keyPartC)}={fmt(valuePartC)}";
}
List objKeys;
+ var commaElementsAlreadyEncoded = false;
if (isCommaGen && obj is IEnumerable enumerable and not string and not IDictionary)
{
- List strings = [];
+ List strings;
+ if (obj is List listObj)
+ strings = new List(listObj.Count);
+ else if (enumerable is ICollection { Count: > 0 } coll0)
+ strings = new List(coll0.Count);
+ else
+ strings = [];
+
if (encodeValuesOnly && encoder != null)
+ {
foreach (var el in enumerable)
- strings.Add(el is null ? "" : encoder(el.ToString(), null, null));
+ strings.Add(el is null ? "" : encoder(el, cs, format));
+ commaElementsAlreadyEncoded = true;
+ }
else
+ {
foreach (var el in enumerable)
strings.Add(el?.ToString() ?? "");
+ }
if (strings.Count != 0)
{
@@ -229,9 +575,19 @@ public static object Encode(
objKeys.Sort(Comparer.Create(sort));
}
- values.Capacity = Math.Max(values.Capacity, objKeys.Count);
+ var values = new List(objKeys.Count);
- var encodedPrefix = encodeDotInKeys ? keyPrefixStr.Replace(".", "%2E") : keyPrefixStr;
+#if NETSTANDARD2_0
+ // Intentionally gate on encodeDotInKeys only to preserve legacy behavior when AllowDots = false
+ var encodedPrefix = encodeDotInKeys && keyPrefixStr.IndexOf('.') >= 0
+ ? keyPrefixStr.Replace(".", "%2E")
+ : keyPrefixStr;
+#else
+ // Intentionally gate on encodeDotInKeys only to preserve legacy behavior when AllowDots = false
+ var encodedPrefix = encodeDotInKeys && keyPrefixStr.Contains('.', StringComparison.Ordinal)
+ ? keyPrefixStr.Replace(".", "%2E", StringComparison.Ordinal)
+ : keyPrefixStr;
+#endif
var adjustedPrefix =
crt && isSeq && seqList is { Count: 1 }
? $"{encodedPrefix}[]"
@@ -240,6 +596,15 @@ public static object Encode(
if (allowEmptyLists && isSeq && seqList is { Count: 0 })
return $"{adjustedPrefix}[]";
+ // Fast path (#5): mark side-channel once per parent instead of per element
+ var markSideChannel = objKey is not null && (obj is IDictionary || isSeq);
+ if (markSideChannel)
+ sideChannel.Set(objKey!, step);
+
+ // Fast path (#4): hoist child-encoder decision out of the loop.
+ // For comma-joined arrays in values-only mode, do not re-encode the joined string.
+ var childEncoderForElements = commaElementsAlreadyEncoded ? null : encoder;
+
for (var i = 0; i < objKeys.Count; i++)
{
var key = objKeys[i];
@@ -336,8 +701,7 @@ IConvertible when int.TryParse(key.ToString(), out var parsed) =>
break;
}
- case IEnumerable ie
- and not string:
+ case IEnumerable and not string:
{
var idx = key switch
{
@@ -345,7 +709,7 @@ IConvertible when int.TryParse(key.ToString(), out var parsed) =>
IConvertible when int.TryParse(key.ToString(), out var parsed) => parsed,
_ => -1
};
- var list2 = seqList ?? ie.Cast().ToList();
+ var list2 = seqList!;
if ((uint)idx < (uint)list2.Count)
{
value = list2[idx];
@@ -371,29 +735,23 @@ IConvertible when int.TryParse(key.ToString(), out var parsed) => parsed,
var keyStr = key?.ToString() ?? "";
var encodedKey = keyStr;
#if NETSTANDARD2_0
- if (allowDots && encodeDotInKeys && keyStr.IndexOf('.') >= 0)
+ if (dotsAndEncode && keyStr.IndexOf('.') >= 0)
encodedKey = keyStr.Replace(".", "%2E");
#else
- if (allowDots && encodeDotInKeys && keyStr.Contains('.', StringComparison.Ordinal))
+ if (dotsAndEncode && keyStr.Contains('.', StringComparison.Ordinal))
encodedKey = keyStr.Replace(".", "%2E", StringComparison.Ordinal);
#endif
var keyPrefix =
- obj is IEnumerable and not string and not IDictionary
+ isSeq
? gen(adjustedPrefix, encodedKey)
: allowDots
? $"{adjustedPrefix}.{encodedKey}"
: $"{adjustedPrefix}[{encodedKey}]";
- if (objKey is not null && obj is IDictionary or IEnumerable and not string)
- sideChannel.Set(objKey, step);
-
- var childSc = new SideChannelFrame(sideChannel);
+ // Removed per-iteration sideChannel.Set
- var childEncoder =
- isCommaGen && encodeValuesOnly && obj is IEnumerable and not string
- ? null
- : encoder;
+ var childSc = IsLeaf(value, skipNulls) ? sideChannel : new SideChannelFrame(sideChannel);
var encoded = Encode(
value,
@@ -406,7 +764,7 @@ obj is IEnumerable and not string and not IDictionary
strictNullHandling,
skipNulls,
encodeDotInKeys,
- childEncoder,
+ childEncoderForElements,
serializeDate,
sort,
filter,
@@ -418,7 +776,9 @@ obj is IEnumerable and not string and not IDictionary
addQueryPrefix
);
- if (encoded is IEnumerable en and not string)
+ if (encoded is List enList)
+ values.AddRange(enList);
+ else if (encoded is IEnumerable en and not string)
foreach (var item in en)
values.Add(item);
else
diff --git a/QsNet/Internal/Utils.cs b/QsNet/Internal/Utils.cs
index e52d4b6..aabdeab 100644
--- a/QsNet/Internal/Utils.cs
+++ b/QsNet/Internal/Utils.cs
@@ -22,11 +22,6 @@ internal static class Utils
internal static partial class Utils
#endif
{
- ///
- /// The maximum length of a segment to encode in a single pass.
- ///
- private const int SegmentLimit = 1024;
-
///
/// A regex to match percent-encoded characters in the format %XX.
///
@@ -42,21 +37,6 @@ private static Regex MyRegex()
private static partial Regex MyRegex();
#endif
- ///
- /// A regex to match Unicode percent-encoded characters in the format %uXXXX.
- ///
-#if NETSTANDARD2_0
- private static readonly Regex MyRegex1Instance = new("%u[0-9a-f]{4}", RegexOptions.IgnoreCase);
-
- private static Regex MyRegex1()
- {
- return MyRegex1Instance;
- }
-#else
- [GeneratedRegex("%u[0-9a-f]{4}", RegexOptions.IgnoreCase, "en-GB")]
- private static partial Regex MyRegex1();
-#endif
-
///
/// Merges two objects, where the source object overrides the target object. If the source is a
/// Dictionary, it will merge its entries into the target. If the source is an IEnumerable, it will append
@@ -371,20 +351,60 @@ out var code
return sb.ToString();
}
+ // Precomputed ASCII membership tables for fast checks
+ // RFC 3986 unreserved: - . _ ~ 0-9 A-Z a-z
+ private static readonly bool[] UnreservedTable3986 =
+ CreateAsciiTable("-._~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+
+ // RFC 1738 extends RFC 3986 with '(' and ')'
+ private static readonly bool[] UnreservedTable1738 =
+ CreateAsciiTable("()-._~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+
+ // Legacy Latin-1 safe sets:
+ // - '+' is safe (NOT encoded)
+ // - '~' is NOT safe (WILL be encoded)
+ // RFC3986 (no parentheses)
+ private static readonly bool[] Latin1SafeTable3986 =
+ CreateAsciiTable("+-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+
+ // RFC1738 adds '(' and ')'
+ private static readonly bool[] Latin1SafeTable1738 =
+ CreateAsciiTable("()+-._0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
+
+ private static bool[] CreateAsciiTable(string chars)
+ {
+ var t = new bool[128];
+ foreach (var ch in chars)
+ t[ch] = true;
+ return t;
+ }
+
+ private const string Utf8ReplacementPercent = "%EF%BF%BD"; // percent-encoded UTF-8 for U+FFFD
+
///
/// Encodes a value into a URL-encoded string.
///
/// The value to encode.
- /// The character encoding to use for encoding. Defaults to UTF-8.
+ ///
+ /// The character encoding to use for encoding. Defaults to UTF-8. If set to ISO‑8859‑1 (Latin‑1),
+ /// legacy rules apply (see remarks).
+ ///
/// The encoding format to use. Defaults to RFC 3986.
/// The encoded string.
+ ///
+ /// UTF‑8 mode uses precomputed ASCII lookups and a two‑strategy loop (copy runs of safe ASCII or escape‑heavy).
+ /// Latin‑1 mode preserves legacy behavior: '+' is considered safe; '~' is not.
+ /// Characters beyond 0xFF are emitted as percent‑encoded numeric entities (e.g., %26%23{code}%3B ),
+ /// which decode back to &#{code}; . Use after decoding
+ /// if you need those entities resolved to Unicode.
+ ///
public static string Encode(object? value, Encoding? encoding = null, Format? format = null)
{
encoding ??= Encoding.UTF8;
format ??= Format.Rfc3986;
var fmt = format.GetValueOrDefault();
- // These cannot be encoded
+ // Non-scalar inputs (maps/sequences/Undefined) are not encoded by design: return empty.
if (value is IEnumerable and not string and not byte[] or IDictionary or Undefined)
return string.Empty;
@@ -397,122 +417,508 @@ public static string Encode(object? value, Encoding? encoding = null, Format? fo
if (string.IsNullOrEmpty(str))
return string.Empty;
- var nonNullStr = str!;
-
- if (Equals(encoding, Encoding.GetEncoding("ISO-8859-1")))
+ var s = str!;
+ var len = s.Length;
+
+ // Latin-1 (ISO-8859-1) path with an ASCII fast-path.
+ // Legacy rules in this mode:
+ // - '+' is treated as safe (never percent-encoded).
+ // - '~' is NOT safe.
+ // - Code points > 0xFF are emitted as percent-encoded numeric entities ("%26%23{code}%3B"),
+ // which decode back to "{code};". Call InterpretNumericEntities(...) afterwards
+ // if you need those resolved to Unicode characters.
+ if (encoding.CodePage == 28591)
{
-#pragma warning disable CS0618 // Type or member is obsolete
- return MyRegex1()
- .Replace(
- Escape(str!, fmt),
- match =>
+ var table = HexTable.Table;
+
+ if (fmt == Format.Rfc1738)
+ {
+ // Legacy behavior: in Latin-1 mode, treat '+' as safe (do not percent-encode)
+ // Scan to first unsafe ASCII (anything non-ASCII is unsafe for this pass)
+ var asciiSafe = Latin1SafeTable1738;
+ var i = 0;
+ while (i < len && s[i] <= 0x7F && asciiSafe[s[i]]) i++;
+ if (i == len)
+ return s; // all safe ASCII
+
+ // Sample to decide escape density
+ var sampleEnd = Math.Min(len, i + 64);
+ var unsafeCount = 0;
+ for (var k = i; k < sampleEnd; k++)
+ {
+ var ch = s[k];
+ if (ch > 0x7F || !asciiSafe[ch])
+ unsafeCount++;
+ }
+
+ var escapeHeavy = unsafeCount * 4 >= (sampleEnd - i) * 3; // ≥75% unsafe
+ var cap = escapeHeavy ? len >= int.MaxValue / 3 ? int.MaxValue : len * 3 : len + 16;
+ var sb = new StringBuilder(cap);
+
+ if (!escapeHeavy)
+ {
+ var lastSafe = 0;
+ for (var idx = 0; idx < len; idx++)
{
-#if NETSTANDARD2_0
- var code = int.Parse(match.Value.Substring(2), NumberStyles.HexNumber,
- CultureInfo.InvariantCulture);
-#else
- var code = int.Parse(match.Value[2..], NumberStyles.HexNumber, CultureInfo.InvariantCulture);
-#endif
- return $"%26%23{code}%3B";
+ int c = s[idx];
+ var safeAscii = c <= 0x7F && asciiSafe[c];
+ if (safeAscii)
+ continue;
+
+ // flush preceding safe run
+ if (idx > lastSafe)
+ sb.Append(s, lastSafe, idx - lastSafe);
+
+ switch (c)
+ {
+ case 0x20:
+ sb.Append('+'); // RFC1738 space
+ break;
+ case <= 0xFF:
+ sb.Append(table[c]); // %XX for Latin-1 bytes
+ break;
+ default:
+ // For non-Latin1 code units, emit percent-encoded numeric entity: %26%23{code}%3B
+ sb.Append("%26%23");
+ sb.Append(c.ToString(CultureInfo.InvariantCulture));
+ sb.Append("%3B");
+ break;
+ }
+
+ lastSafe = idx + 1;
+ }
+
+ if (lastSafe < len)
+ sb.Append(s, lastSafe, len - lastSafe);
+ }
+ else
+ {
+ // Escape-heavy mode: no run bookkeeping
+ if (i > 0) sb.Append(s, 0, i);
+
+ for (var j = i; j < len; j++)
+ {
+ int c = s[j];
+
+ switch (c)
+ {
+ case <= 0x7F when asciiSafe[c]:
+ sb.Append((char)c);
+ continue;
+ case <= 0xFF:
+ sb.Append(table[c]);
+ break;
+ default:
+ sb.Append("%26%23");
+ sb.Append(c);
+ sb.Append("%3B");
+ break;
+ }
}
- );
-#pragma warning restore CS0618 // Type or member is obsolete
+ }
+
+ return sb.ToString();
+ }
+ else
+ {
+ // Legacy behavior: in Latin-1 mode, treat '+' as safe (do not percent-encode)
+ // RFC3986 path (no parentheses allowed)
+ var asciiSafe = Latin1SafeTable3986;
+ var i = 0;
+ while (i < len && s[i] <= 0x7F && asciiSafe[s[i]]) i++;
+ if (i == len)
+ return s; // all safe ASCII
+
+ var sampleEnd = Math.Min(len, i + 64);
+ var unsafeCount = 0;
+ for (var k = i; k < sampleEnd; k++)
+ {
+ var ch = s[k];
+ if (ch > 0x7F || !asciiSafe[ch])
+ unsafeCount++;
+ }
+
+ var escapeHeavy = unsafeCount * 4 >= (sampleEnd - i) * 3; // ≥75% unsafe
+ var cap = escapeHeavy ? len >= int.MaxValue / 3 ? int.MaxValue : len * 3 : len + 16;
+ var sb = new StringBuilder(cap);
+
+ if (!escapeHeavy)
+ {
+ var lastSafe = 0;
+ for (var idx = 0; idx < len; idx++)
+ {
+ int c = s[idx];
+ var safeAscii = c <= 0x7F && asciiSafe[c];
+ if (safeAscii)
+ continue;
+
+ if (idx > lastSafe)
+ sb.Append(s, lastSafe, idx - lastSafe);
+
+ if (c <= 0xFF)
+ {
+ sb.Append(table[c]);
+ }
+ else
+ {
+ sb.Append("%26%23");
+ sb.Append(c);
+ sb.Append("%3B");
+ }
+
+ lastSafe = idx + 1;
+ }
+
+ if (lastSafe < len)
+ sb.Append(s, lastSafe, len - lastSafe);
+ }
+ else
+ {
+ if (i > 0) sb.Append(s, 0, i);
+
+ for (var j = i; j < len; j++)
+ {
+ int c = s[j];
+
+ switch (c)
+ {
+ case <= 0x7F when asciiSafe[c]:
+ sb.Append((char)c);
+ continue;
+ case <= 0xFF:
+ sb.Append(table[c]);
+ break;
+ default:
+ sb.Append("%26%23");
+ sb.Append(c);
+ sb.Append("%3B");
+ break;
+ }
+ }
+ }
+
+ return sb.ToString();
+ }
}
- var buffer = new StringBuilder();
- var j = 0;
+ // UTF-8 path with two strategies:
+ // 1) run-copy mode for mixed/mostly-safe inputs (lazy flush of safe runs)
+ // 2) escape-heavy mode for mostly-unsafe inputs (big prealloc, simpler loop)
- while (j < nonNullStr.Length)
+ if (fmt == Format.Rfc1738)
{
- // Take up to SegmentLimit characters, but never split a surrogate pair across the boundary.
- var remaining = nonNullStr.Length - j;
- var segmentLen = remaining >= SegmentLimit ? SegmentLimit : remaining;
+ // Scan to first unsafe ASCII (anything non-ASCII is unsafe-by-definition for this pass)
+ var asciiUnreserved = UnreservedTable1738;
+ var i = 0;
+ while (i < len && s[i] <= 0x7F && asciiUnreserved[s[i]]) i++;
+ if (i == len)
+ return s; // all safe ASCII
+
+ // Sample up to 64 chars after first unsafe to decide whether it's escape-heavy
+ var sampleEnd = Math.Min(len, i + 64);
+ var unsafeCount = 0;
+ for (var k = i; k < sampleEnd; k++)
+ {
+ var ch = s[k];
+ if (ch > 0x7F || !asciiUnreserved[ch])
+ unsafeCount++;
+ }
- // If the last char of this segment is a high surrogate and the next char exists and is a low surrogate,
- // shrink the segment by one so the pair is encoded together in the next iteration.
- if (
- segmentLen < remaining &&
- char.IsHighSurrogate(nonNullStr[j + segmentLen - 1]) &&
- char.IsLowSurrogate(nonNullStr[j + segmentLen])
- )
- segmentLen--; // keep the high surrogate with its low surrogate in the next chunk
+ var escapeHeavy = unsafeCount * 4 >= (sampleEnd - i) * 3; // ≥75% unsafe
+ var cap = escapeHeavy ? len >= int.MaxValue / 3 ? int.MaxValue : len * 3 : len + 16;
+ var sb = new StringBuilder(cap);
+ var table = HexTable.Table;
- var segment = nonNullStr.Substring(j, segmentLen);
+ if (!escapeHeavy)
+ {
+ var lastSafe = 0;
+ for (var idx = 0; idx < len; idx++)
+ {
+ int c = s[idx];
+ var safeAscii = c <= 0x7F && asciiUnreserved[c];
+ if (safeAscii)
+ continue;
+
+ // flush preceding safe run
+ if (idx > lastSafe)
+ sb.Append(s, lastSafe, idx - lastSafe);
+ // fast UTF-8 encode, surrogate-aware
+ if (c == 0x20)
+ {
+ sb.Append('+'); // RFC1738 space
+ }
+ else if ((uint)c < 0x80)
+ {
+ sb.Append(table[c]);
+ }
+ else if (c < 0x800)
+ {
+ sb.Append(table[0xC0 | (c >> 6)]);
+ sb.Append(table[0x80 | (c & 0x3F)]);
+ }
+ else if ((uint)(c - 0xD800) <= 0x07FF)
+ {
+ // Surrogates range
+ if ((uint)(c - 0xD800) <= 0x03FF && idx + 1 < len)
+ {
+ int d = s[idx + 1];
+ if ((uint)(d - 0xDC00) <= 0x03FF)
+ {
+ var codePoint = 0x10000 + (((c - 0xD800) << 10) | (d - 0xDC00));
+ sb.Append(table[0xF0 | (codePoint >> 18)]);
+ sb.Append(table[0x80 | ((codePoint >> 12) & 0x3F)]);
+ sb.Append(table[0x80 | ((codePoint >> 6) & 0x3F)]);
+ sb.Append(table[0x80 | (codePoint & 0x3F)]);
+ idx++; // consume low surrogate
+ }
+ else
+ {
+ sb.Append(Utf8ReplacementPercent); // unpaired high surrogate
+ }
+ }
+ else
+ {
+ sb.Append(Utf8ReplacementPercent); // unpaired low surrogate
+ }
+ }
+ else
+ {
+ sb.Append(table[0xE0 | (c >> 12)]);
+ sb.Append(table[0x80 | ((c >> 6) & 0x3F)]);
+ sb.Append(table[0x80 | (c & 0x3F)]);
+ }
+
+ lastSafe = idx + 1;
+ }
+
+ if (lastSafe < len)
+ sb.Append(s, lastSafe, len - lastSafe);
+ }
+ else
+ {
+ // Escape-heavy mode: no run bookkeeping, big prealloc
+ if (i > 0) sb.Append(s, 0, i);
+
+ for (var j = i; j < len; j++)
+ {
+ int c = s[j];
+ if ((uint)c < 0x80)
+ {
+ if (c == 0x20)
+ {
+ sb.Append('+'); // RFC1738 space
+ continue;
+ }
+ if (asciiUnreserved[c])
+ {
+ sb.Append((char)c);
+ continue;
+ }
+
+ sb.Append(table[c]);
+ }
+ else if (c < 0x800)
+ {
+ sb.Append(table[0xC0 | (c >> 6)]);
+ sb.Append(table[0x80 | (c & 0x3F)]);
+ }
+ else if ((uint)(c - 0xD800) <= 0x07FF)
+ {
+ if ((uint)(c - 0xD800) <= 0x03FF && j + 1 < len)
+ {
+ int d = s[j + 1];
+ if ((uint)(d - 0xDC00) <= 0x03FF)
+ {
+ var codePoint = 0x10000 + (((c - 0xD800) << 10) | (d - 0xDC00));
+ sb.Append(table[0xF0 | (codePoint >> 18)]);
+ sb.Append(table[0x80 | ((codePoint >> 12) & 0x3F)]);
+ sb.Append(table[0x80 | ((codePoint >> 6) & 0x3F)]);
+ sb.Append(table[0x80 | (codePoint & 0x3F)]);
+ j++;
+ }
+ else
+ {
+ sb.Append(Utf8ReplacementPercent);
+ }
+ }
+ else
+ {
+ sb.Append(Utf8ReplacementPercent);
+ }
+ }
+ else
+ {
+ sb.Append(table[0xE0 | (c >> 12)]);
+ sb.Append(table[0x80 | ((c >> 6) & 0x3F)]);
+ sb.Append(table[0x80 | (c & 0x3F)]);
+ }
+ }
+ }
+
+ return sb.ToString();
+ }
+ else
+ {
+ // RFC3986 path (no parentheses allowed)
+ var asciiUnreserved = UnreservedTable3986;
var i = 0;
- while (i < segment.Length)
+ while (i < len && s[i] <= 0x7F && asciiUnreserved[s[i]]) i++;
+ if (i == len)
+ return s;
+
+ var sampleEnd = Math.Min(len, i + 64);
+ var unsafeCount = 0;
+ for (var k = i; k < sampleEnd; k++)
{
- var c = (int)segment[i];
+ var ch = s[k];
+ if (ch > 0x7F || !asciiUnreserved[ch])
+ unsafeCount++;
+ }
- switch (c)
+ var escapeHeavy = unsafeCount * 4 >= (sampleEnd - i) * 3; // ≥75% unsafe
+ var cap = escapeHeavy ? len >= int.MaxValue / 3 ? int.MaxValue : len * 3 : len + 16;
+ var sb = new StringBuilder(cap);
+ var table = HexTable.Table;
+
+ if (!escapeHeavy)
+ {
+ var lastSafe = 0;
+ for (var idx = 0; idx < len; idx++)
{
- case 0x2D or 0x2E or 0x5F or 0x7E:
- case >= 0x30 and <= 0x39:
- case >= 0x41 and <= 0x5A:
- case >= 0x61 and <= 0x7A:
- case 0x28 or 0x29 when fmt == Format.Rfc1738:
- buffer.Append(segment[i]);
- i++;
- continue;
- // ASCII
- case < 0x80:
- buffer.Append(HexTable.Table[c]);
- i++;
- continue;
- // 2 bytes
- case < 0x800:
- buffer.Append(HexTable.Table[0xC0 | (c >> 6)]);
- buffer.Append(HexTable.Table[0x80 | (c & 0x3F)]);
- i++;
- continue;
- case < 0xD800:
- // 3 bytes
- case >= 0xE000:
- buffer.Append(HexTable.Table[0xE0 | (c >> 12)]);
- buffer.Append(HexTable.Table[0x80 | ((c >> 6) & 0x3F)]);
- buffer.Append(HexTable.Table[0x80 | (c & 0x3F)]);
- i++;
+ int c = s[idx];
+ var safeAscii = c <= 0x7F && asciiUnreserved[c];
+ if (safeAscii)
continue;
+
+ if (idx > lastSafe)
+ sb.Append(s, lastSafe, idx - lastSafe);
+
+ // fast UTF-8 encode, surrogate-aware
+ if ((uint)c < 0x80)
+ {
+ sb.Append(table[c]);
+ }
+ else if (c < 0x800)
+ {
+ sb.Append(table[0xC0 | (c >> 6)]);
+ sb.Append(table[0x80 | (c & 0x3F)]);
+ }
+ else if ((uint)(c - 0xD800) <= 0x07FF)
+ {
+ // Surrogates range
+ if ((uint)(c - 0xD800) <= 0x03FF && idx + 1 < len)
+ {
+ int d = s[idx + 1];
+ if ((uint)(d - 0xDC00) <= 0x03FF)
+ {
+ var codePoint = 0x10000 + (((c - 0xD800) << 10) | (d - 0xDC00));
+ sb.Append(table[0xF0 | (codePoint >> 18)]);
+ sb.Append(table[0x80 | ((codePoint >> 12) & 0x3F)]);
+ sb.Append(table[0x80 | ((codePoint >> 6) & 0x3F)]);
+ sb.Append(table[0x80 | (codePoint & 0x3F)]);
+ idx++; // consume low surrogate
+ }
+ else
+ {
+ sb.Append(Utf8ReplacementPercent); // unpaired high surrogate
+ }
+ }
+ else
+ {
+ sb.Append(Utf8ReplacementPercent); // unpaired low surrogate
+ }
+ }
+ else
+ {
+ sb.Append(table[0xE0 | (c >> 12)]);
+ sb.Append(table[0x80 | ((c >> 6) & 0x3F)]);
+ sb.Append(table[0x80 | (c & 0x3F)]);
+ }
+
+ lastSafe = idx + 1;
}
- // 4 bytes (surrogate pair) – only if valid pair; otherwise treat as 3-byte fallback
- if (i + 1 >= segment.Length || !char.IsSurrogatePair(segment[i], segment[i + 1]))
+ if (lastSafe < len)
+ sb.Append(s, lastSafe, len - lastSafe);
+ }
+ else
+ {
+ if (i > 0) sb.Append(s, 0, i);
+
+ for (var j = i; j < len; j++)
{
- // Fallback: percent-encode the single surrogate code unit to remain lossless
- buffer.Append(HexTable.Table[0xE0 | (c >> 12)]);
- buffer.Append(HexTable.Table[0x80 | ((c >> 6) & 0x3F)]);
- buffer.Append(HexTable.Table[0x80 | (c & 0x3F)]);
- i++;
- continue;
- }
+ int c = s[j];
+ if ((uint)c < 0x80)
+ {
+ if (asciiUnreserved[c])
+ {
+ sb.Append((char)c);
+ continue;
+ }
- var nextC = segment[i + 1];
- var codePoint = char.ConvertToUtf32((char)c, nextC);
- buffer.Append(HexTable.Table[0xF0 | (codePoint >> 18)]);
- buffer.Append(HexTable.Table[0x80 | ((codePoint >> 12) & 0x3F)]);
- buffer.Append(HexTable.Table[0x80 | ((codePoint >> 6) & 0x3F)]);
- buffer.Append(HexTable.Table[0x80 | (codePoint & 0x3F)]);
- i += 2; // Skip the next character as it's part of the surrogate pair
+ sb.Append(table[c]);
+ }
+ else if (c < 0x800)
+ {
+ sb.Append(table[0xC0 | (c >> 6)]);
+ sb.Append(table[0x80 | (c & 0x3F)]);
+ }
+ else if ((uint)(c - 0xD800) <= 0x07FF)
+ {
+ if ((uint)(c - 0xD800) <= 0x03FF && j + 1 < len)
+ {
+ int d = s[j + 1];
+ if ((uint)(d - 0xDC00) <= 0x03FF)
+ {
+ var codePoint = 0x10000 + (((c - 0xD800) << 10) | (d - 0xDC00));
+ sb.Append(table[0xF0 | (codePoint >> 18)]);
+ sb.Append(table[0x80 | ((codePoint >> 12) & 0x3F)]);
+ sb.Append(table[0x80 | ((codePoint >> 6) & 0x3F)]);
+ sb.Append(table[0x80 | (codePoint & 0x3F)]);
+ j++;
+ }
+ else
+ {
+ sb.Append(Utf8ReplacementPercent);
+ }
+ }
+ else
+ {
+ sb.Append(Utf8ReplacementPercent);
+ }
+ }
+ else
+ {
+ sb.Append(table[0xE0 | (c >> 12)]);
+ sb.Append(table[0x80 | ((c >> 6) & 0x3F)]);
+ sb.Append(table[0x80 | (c & 0x3F)]);
+ }
+ }
}
- j += segment.Length; // advance by the actual processed count
+ return sb.ToString();
}
-
- return buffer.ToString();
}
///
- /// Decodes a URL-encoded string into its original form.
+ /// Decodes a URL-encoded string.
///
/// The URL-encoded string to decode.
/// The character encoding to use for decoding. Defaults to UTF-8.
/// The decoded string, or null if the input is null.
+ ///
+ /// In UTF‑8 mode this delegates to .
+ /// In Latin‑1 mode it decodes %XX byte escapes and leaves characters beyond 0xFF as numeric entities
+ /// (e.g., 〹 ) if they were produced by . Call
+ /// to convert those entities to Unicode code points if desired.
+ ///
public static string? Decode(string? str, Encoding? encoding = null)
{
encoding ??= Encoding.UTF8;
var strWithoutPlus = str?.Replace('+', ' ');
- if (Equals(encoding, Encoding.GetEncoding("ISO-8859-1")))
+ if (encoding.CodePage == 28591) // ISO-8859-1 (Latin-1)
try
{
return MyRegex()
@@ -750,7 +1156,10 @@ void AddOne(object? x)
/// Checks if a value is a non-nullish primitive type.
///
/// The value to check.
- /// If true, empty strings and URIs are not considered non-nullish.
+ ///
+ /// If true, empty strings and values with an empty textual form are treated as
+ /// nullish.
+ ///
/// True if the value is a non-nullish primitive, false otherwise.
public static bool IsNonNullishPrimitive(object? value, bool skipNulls = false)
{
@@ -844,14 +1253,13 @@ public static string InterpretNumericEntities(string str)
if (j < n && str[j] == ';' && j > startDigits)
{
- int code;
#if NETSTANDARD2_0
var digits = str.Substring(startDigits, j - startDigits);
var ok = int.TryParse(
digits,
hex ? NumberStyles.HexNumber : NumberStyles.Integer,
CultureInfo.InvariantCulture,
- out code
+ out var code
);
#else
var digits = str.AsSpan(startDigits, j - startDigits);
@@ -859,7 +1267,7 @@ out code
digits,
hex ? NumberStyles.HexNumber : NumberStyles.Integer,
CultureInfo.InvariantCulture,
- out code
+ out var code
);
#endif
if (!ok)
diff --git a/benchmarks/QsNet.Benchmarks/EncodeBenchmarks.cs b/benchmarks/QsNet.Benchmarks/EncodeBenchmarks.cs
new file mode 100644
index 0000000..aa7f459
--- /dev/null
+++ b/benchmarks/QsNet.Benchmarks/EncodeBenchmarks.cs
@@ -0,0 +1,109 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Jobs;
+using BenchmarkDotNet.Order;
+using QsNet;
+using QsNet.Models;
+using QsNet.Enums;
+
+namespace QsNet.Benchmarks;
+
+[MemoryDiagnoser]
+[SimpleJob(RuntimeMoniker.Net80)]
+[Orderer(SummaryOrderPolicy.FastestToSlowest)]
+public class EncodeBenchmarks
+{
+ public enum DotMode
+ {
+ None, // AllowDots=false, EncodeDotInKeys=false
+ AllowDots, // AllowDots=true, EncodeDotInKeys=false
+ AllowDotsAndEncode // AllowDots=true, EncodeDotInKeys=true
+ }
+
+ // Size & shape
+ [Params(10, 100, 1000)] public int Count { get; set; }
+ [Params(8, 40)] public int ValueLen { get; set; }
+ [Params(0, 50)] public int NeedsEscPercent { get; set; }
+
+ // Option toggles that materially affect Encode()
+ [Params(false, true)] public bool CommaLists { get; set; }
+ [Params(false, true)] public bool EncodeValuesOnly { get; set; }
+ [Params(DotMode.None, DotMode.AllowDots, DotMode.AllowDotsAndEncode)] public DotMode Dots { get; set; }
+
+ private static string MakeValue(int len, int escPercent, Random rnd)
+ {
+ if (escPercent <= 0)
+ {
+ return new string('x', len);
+ }
+
+ var chars = new char[len];
+ for (int i = 0; i < len; i++)
+ {
+ bool needsEsc = rnd.Next(0, 100) < escPercent;
+ if (!needsEsc)
+ {
+ chars[i] = 'x';
+ continue;
+ }
+
+ // Mix of characters that typically require escaping
+ switch (rnd.Next(0, 4))
+ {
+ case 0: chars[i] = ' '; break; // space -> %20 or +
+ case 1: chars[i] = '%'; break; // percent -> %25
+ case 2: chars[i] = '\u00E4'; break; // non-ASCII -> UTF-8 percent-encoded
+ default: chars[i] = ','; break; // comma (should be encoded inside list items)
+ }
+ }
+ return new string(chars);
+ }
+
+ private object _data = default!;
+ private EncodeOptions _options = default!;
+
+ [GlobalSetup]
+ public void Setup()
+ {
+ var rnd = new Random(12345);
+
+ // Build a realistic object graph to exercise the encoder:
+ // - list under key "a" (affected by ListFormat)
+ // - dotted key under nested dictionary (affected by EncodeDotInKeys)
+ // - a date and a boolean for primitive branches
+ var list = Enumerable.Range(0, Count)
+ .Select(_ => (object?)MakeValue(ValueLen, NeedsEscPercent, rnd))
+ .ToList();
+
+ _data = new Dictionary
+ {
+ ["a"] = list,
+ ["a_empty"] = new List(),
+ ["a_nested"] = new List { new List { MakeValue(ValueLen, NeedsEscPercent, rnd) } },
+ ["b"] = new Dictionary
+ {
+ ["x.y"] = MakeValue(ValueLen, NeedsEscPercent, rnd),
+ ["inner"] = new Dictionary
+ {
+ ["z"] = MakeValue(ValueLen, NeedsEscPercent, rnd)
+ }
+ },
+ ["c"] = new DateTimeOffset(2024, 1, 2, 3, 4, 5, TimeSpan.Zero),
+ ["d"] = true
+ };
+
+ _options = new EncodeOptions
+ {
+ ListFormat = CommaLists ? ListFormat.Comma : ListFormat.Indices,
+ EncodeValuesOnly = EncodeValuesOnly,
+ AllowDots = Dots != DotMode.None,
+ EncodeDotInKeys = Dots == DotMode.AllowDotsAndEncode,
+ // Leave other toggles at defaults to mirror common usage.
+ };
+ }
+
+ [Benchmark]
+ public string Encode_Public() => Qs.Encode(_data, _options);
+}
\ No newline at end of file
diff --git a/benchmarks/QsNet.Benchmarks/UtilsEncodeBenchmarks.cs b/benchmarks/QsNet.Benchmarks/UtilsEncodeBenchmarks.cs
new file mode 100644
index 0000000..3227278
--- /dev/null
+++ b/benchmarks/QsNet.Benchmarks/UtilsEncodeBenchmarks.cs
@@ -0,0 +1,62 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Jobs;
+using BenchmarkDotNet.Order;
+using QsNet.Enums;
+using QsNet.Models;
+using QsNet.Internal;
+
+[MemoryDiagnoser]
+public class UtilsEncodeBenchmarks
+{
+ [Params(0, 8, 40, 512, 4096)]
+ public int Len;
+
+ [Params(Format.Rfc3986, Format.Rfc1738)]
+ public Format Fmt;
+
+ // Encoding under test
+ [Params("UTF8", "Latin1")]
+ public string EncName { get; set; } = "UTF8";
+ private Encoding _enc = default!;
+
+ // Workload shape
+ [Params("ascii-safe", "utf8-mixed", "latin1-fallback", "reserved-heavy")]
+ public string DataKind { get; set; } = "ascii-safe";
+
+ private string _input = default!;
+
+ [GlobalSetup]
+ public void Setup()
+ {
+ _enc = EncName == "Latin1" ? Encoding.GetEncoding("ISO-8859-1") : new UTF8Encoding(false);
+
+ // note: () included to exercise RFC1738 paren allowance
+ var asciiSafeBase = "abcDEF-_.~0123456789() ";
+ var utfMixedBase = "Café 北京 – ☕️ 😀 ";
+ var latin1Fallback = "Café – € àèìòù "; // '€' not in ISO-8859-1 -> numeric-entity fallback
+ var reservedHeavy = "name=obj[a]&b=c d/%[]()+=";
+
+ var seed = DataKind switch
+ {
+ "ascii-safe" => asciiSafeBase,
+ "utf8-mixed" => utfMixedBase,
+ "latin1-fallback" => latin1Fallback,
+ "reserved-heavy" => reservedHeavy,
+ _ => asciiSafeBase
+ };
+
+ _input = string.Concat(Enumerable.Repeat(seed, Math.Max(1, (Len + seed.Length - 1) / seed.Length)))
+ .Substring(0, Len);
+ }
+
+ [Benchmark(Baseline = true)]
+ public string Encode() => QsNet.Internal.Utils.Encode(_input, _enc, Fmt);
+
+ // Orientation-only reference (different semantics for spaces/legacy, but useful for perf smell tests)
+ [Benchmark]
+ public string UriEscape() => Uri.EscapeDataString(_input);
+}
\ No newline at end of file