From 0560cce98dc3ad61790cee7e05c0e631c374d018 Mon Sep 17 00:00:00 2001 From: nick evans Date: Mon, 24 Nov 2025 12:16:33 -0500 Subject: [PATCH] =?UTF-8?q?=E2=9A=A1=20Faster=20`SequenceSet#normalize`=20?= =?UTF-8?q?when=20frozen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Calling `SequenceSet#normalize` on a frozen set can be more than 4x faster, by simply re-parsing `@string` and scanning its elements, rather than fully generating a new string and comparing it with `@string`. ``` normal reparse and check: 20449.2 i/s generate and compare: 20267.2 i/s - 1.01x slower v0.5.12: 3090.2 i/s - 6.62x slower frozen and normal generate and compare: 19328485.2 i/s reparse and check: 17455122.3 i/s - 1.11x slower v0.5.12: 3730.0 i/s - 5181.95x slower unsorted reparse and check: 16936.2 i/s generate and compare: 16872.9 i/s - 1.00x slower v0.5.12: 2583.6 i/s - 6.56x slower abnormal generate and compare: 17610.8 i/s reparse and check: 16596.1 i/s - 1.06x slower v0.5.12: 2560.3 i/s - 6.88x slower frozen unsorted reparse and check: 10089.5 i/s v0.5.12: 2333.7 i/s - 4.32x slower generate and compare: 2093.1 i/s - 4.82x slower frozen abnormal reparse and check: 10392.1 i/s v0.5.12: 2354.5 i/s - 4.41x slower generate and compare: 2124.3 i/s - 4.89x slower ``` Please note that these results do vary based on benchmark settings, e.g: size of the sequence set. --- benchmarks/sequence_set-normalize.yml | 30 +++++++++++++++++++++++++-- lib/net/imap/sequence_set.rb | 4 +++- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/benchmarks/sequence_set-normalize.yml b/benchmarks/sequence_set-normalize.yml index 7755381b..af7d35ed 100644 --- a/benchmarks/sequence_set-normalize.yml +++ b/benchmarks/sequence_set-normalize.yml @@ -42,7 +42,7 @@ prelude: | end end - def init_unsorted_sets(...) + def make_unsorted_sets(...) init_sets(...) .each do |seqset| entries = shuffle_entries(seqset) @@ -51,6 +51,10 @@ prelude: | seqset.append entry end end + end + + def init_unsorted_sets(...) + make_unsorted_sets(...) .tap do maybe_profile("seqset-normalize-unsorted") end end @@ -68,14 +72,30 @@ prelude: | .join(",") end - def init_abnormal_sets(...) + def make_abnormal_sets(...) init_sets(...) .each do |seqset| seqset.string = abnormal_form(seqset) end + end + + def init_abnormal_sets(...) + make_abnormal_sets(...) .tap do maybe_profile("seqset-normalize-abnormal") end end + def init_frozen_unsorted_sets(...) + make_unsorted_sets(...) + .map(&:freeze) + .tap do maybe_profile("seqset-normalize-frozen-unsorted") end + end + + def init_frozen_abnormal_sets(...) + make_abnormal_sets(...) + .map(&:freeze) + .tap do maybe_profile("seqset-normalize-frozen-abnormal") end + end + # warmup (esp. for JIT) WARMUP_RUNS.times do init_sets(count: 20, set_size: 100, max: 120).each do |set| @@ -96,6 +116,12 @@ benchmark: - name: "abnormal" prelude: $sets = init_abnormal_sets script: $sets.sample.normalize + - name: "frozen unsorted" + prelude: $sets = init_frozen_unsorted_sets + script: $sets.sample.normalize + - name: "frozen abnormal" + prelude: $sets = init_frozen_abnormal_sets + script: $sets.sample.normalize contexts: # n.b: can't use anything newer as the baseline: it's over 500x faster! diff --git a/lib/net/imap/sequence_set.rb b/lib/net/imap/sequence_set.rb index 15fa6847..cdabb4b7 100644 --- a/lib/net/imap/sequence_set.rb +++ b/lib/net/imap/sequence_set.rb @@ -1708,7 +1708,7 @@ def xor!(other) # # Related: #normalize!, #normalized_string def normalize - return self if frozen? && (@string.nil? || @string == normalized_string) + return self if frozen? && (@string.nil? || normal_string?(@string)) remain_frozen dup.normalize! end @@ -1908,6 +1908,8 @@ def each_parsed_entry(str) str&.split(",", -1) do |entry| yield parse_string_entry(entry) end end + def normal_string?(str) normalized_entries? each_parsed_entry str end + def normalized_entries?(entries) max = nil entries.each do |first, last|