Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 59 additions & 14 deletions benchmarks/sequence_set-new.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,17 @@ prelude: |
N_RAND = 100

def rand_nums(n, min: 1, max: (n * 1.25).to_i) = Array.new(n) { rand(1..max) }
def rand_entries(...) = SeqSet[rand_nums(...)].elements.shuffle
def rand_string(...) = SeqSet[rand_nums(...)].string.split(?,).shuffle.join(?,)
def rand_entries(...) = SeqSet[rand_nums(...)].elements
def rand_string(...) = SeqSet[rand_nums(...)].string

def shuffle(inputs)
inputs.map! do
case _1
in Array => elements then elements.shuffle
in String => string then string.split(?,).shuffle.join(?,)
end
end
end

def build_string_inputs(n, n_rand, **)
Array.new(n_rand) { rand_string(n, **) }
Expand All @@ -35,51 +44,83 @@ prelude: |

benchmark:

- name: n=10 ints
- name: n= 10 ints (sorted)
prelude: inputs = build_int_inputs 10, N_RAND
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n=10 string
- name: n= 10 string (sorted)
prelude: inputs = build_string_inputs 10, N_RAND
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n=100 ints
- name: n= 10 ints (shuffled)
prelude: inputs = build_int_inputs 10, N_RAND and shuffle inputs
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n= 10 string (shuffled)
prelude: inputs = build_string_inputs 10, N_RAND and shuffle inputs
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n= 100 ints (sorted)
prelude: inputs = build_int_inputs 100, N_RAND
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n=100 string
- name: n= 100 string (sorted)
prelude: inputs = build_string_inputs 100, N_RAND
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n=1000 ints
- name: n= 100 ints (shuffled)
prelude: inputs = build_int_inputs 100, N_RAND and shuffle inputs
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n= 100 string (shuffled)
prelude: inputs = build_string_inputs 100, N_RAND and shuffle inputs
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n= 1,000 ints (sorted)
prelude: inputs = build_int_inputs 1000, N_RAND
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n=1000 string
- name: n= 1,000 string (sorted)
prelude: inputs = build_string_inputs 1000, N_RAND
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n=10,000 ints
- name: n= 1,000 ints (shuffled)
prelude: inputs = build_int_inputs 1000, N_RAND and shuffle inputs
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n= 1,000 string (shuffled)
prelude: inputs = build_string_inputs 1000, N_RAND and shuffle inputs
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n= 10,000 ints (sorted)
prelude: inputs = build_int_inputs 10_000, N_RAND
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n=10,000 string
- name: n= 10,000 string (sorted)
prelude: inputs = build_string_inputs 10_000, N_RAND
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n=100,000 ints
- name: n= 10,000 ints (shuffled)
prelude: inputs = build_int_inputs 10_000, N_RAND and shuffle inputs
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n= 10,000 string (shuffled)
prelude: inputs = build_string_inputs 10_000, N_RAND and shuffle inputs
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n=100,000 ints (sorted)
prelude: inputs = build_int_inputs 100_000, N_RAND / 2
script: SeqSet[inputs[i = (i+1) % N_RAND]]

- name: n=100,000 string
- name: n=100,000 string (sorted)
prelude: inputs = build_string_inputs 100_000, N_RAND / 2
script: SeqSet[inputs[i = (i+1) % (N_RAND / 2)]]

# - name: n=1,000,000 ints
# - name: n=1,000,000 ints
# prelude: inputs = build_int_inputs 1_000_000
# script: SeqSet[inputs[i = (i+1) % N_RAND]]

# - name: n=10,000,000 ints
# - name: n=10,000,000 ints
# prelude: inputs = build_int_inputs 10_000_000
# script: SeqSet[inputs[i = (i+1) % N_RAND]]

Expand All @@ -89,6 +130,10 @@ contexts:
$LOAD_PATH.unshift "./lib"
$allowed_to_profile = true # only profile local code
require: false
- name: v0.5.12
gems:
net-imap: 0.5.12
require: false
- name: v0.5.9
gems:
net-imap: 0.5.9
Expand Down
4 changes: 4 additions & 0 deletions benchmarks/sequence_set-normalize.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ benchmark:

contexts:
# n.b: can't use anything newer as the baseline: it's over 500x faster!
- name: v0.5.12
gems:
net-imap: 0.5.12
require: false
- name: v0.5.9
gems:
net-imap: 0.5.9
Expand Down
59 changes: 44 additions & 15 deletions lib/net/imap/sequence_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ class IMAP
# #entries and #elements are identical. Use #append to preserve #entries
# order while modifying a set.
#
# Non-normalized sets store both representations of the set, which can more
# than double memory usage. Very large sequence sets should avoid
# denormalizing methods (such as #append) unless order is significant.
#
# == Using <tt>*</tt>
#
# \IMAP sequence sets may contain a special value <tt>"*"</tt>, which
Expand Down Expand Up @@ -586,7 +590,7 @@ def valid_string
# the set is updated the string will be normalized.
#
# Related: #valid_string, #normalized_string, #to_s, #inspect
def string; @string ||= normalized_string if valid? end
def string; @string || normalized_string if valid? end

# Returns an array with #normalized_string when valid and an empty array
# otherwise.
Expand All @@ -605,13 +609,18 @@ def string=(input)
clear
elsif (str = String.try_convert(input))
modifying! # short-circuit before parsing the string
tuples = str_to_tuples str
@tuples, @string = [], -str
tuples_add tuples
entries = each_parsed_entry(str).to_a
clear
if normalized_entries?(entries)
@tuples.replace entries.map!(&:minmax)
else
tuples_add entries.map!(&:minmax)
@string = -str
end
else
raise ArgumentError, "expected a string or nil, got #{input.class}"
end
str
input
end

# Returns the \IMAP +sequence-set+ string representation, or an empty
Expand All @@ -624,7 +633,6 @@ def to_s; string || "" end
# Freezes and returns the set. A frozen SequenceSet is Ractor-safe.
def freeze
return self if frozen?
string
@tuples.each(&:freeze).freeze
super
end
Expand Down Expand Up @@ -971,7 +979,10 @@ def add(element)
# set = Net::IMAP::SequenceSet.new("2,1,9:10")
# set.append(11..12) # => Net::IMAP::SequenceSet("2,1,9:12")
#
# See SequenceSet@Ordered+and+Normalized+sets.
# Non-normalized sets store the string <em>in addition to</em> an internal
# normalized uint32 set representation. This can more than double memory
# usage, so large sets should avoid using #append unless preserving order
# is required. See SequenceSet@Ordered+and+Normalized+sets.
#
# Related: #add, #merge, #union
def append(entry)
Expand Down Expand Up @@ -1685,20 +1696,20 @@ def xor!(other)
merge(other).subtract(both)
end

# Returns a new SequenceSet with a normalized string representation.
# Returns a SequenceSet with a normalized string representation: entries
# have been sorted, deduplicated, and coalesced, and all entries
# are in normal form. Returns +self+ for frozen normalized sets, and a
# normalized duplicate otherwise.
#
# The returned set's #string is sorted and deduplicated. Adjacent or
# overlapping elements will be merged into a single larger range.
# See SequenceSet@Ordered+and+Normalized+sets.
#
# Net::IMAP::SequenceSet["1:5,3:7,10:9,10:11"].normalize
# #=> Net::IMAP::SequenceSet["1:7,9:11"]
#
# Related: #normalize!, #normalized_string
def normalize
str = normalized_string
return self if frozen? && str == string
remain_frozen dup.instance_exec { @string = str&.-@; self }
return self if frozen? && (@string.nil? || @string == normalized_string)
remain_frozen dup.normalize!
end

# Resets #string to be sorted, deduplicated, and coalesced. Returns
Expand Down Expand Up @@ -1884,9 +1895,27 @@ def export_string_entries(entries)

def tuple_to_str(tuple) tuple.uniq.map{ from_tuple_int _1 }.join(":") end
def str_to_tuples(str) str.split(",", -1).map! { str_to_tuple _1 } end
def str_to_tuple(str)
def str_to_tuple(str) parse_string_entry(str).minmax end

def parse_string_entry(str)
raise DataFormatError, "invalid sequence set string" if str.empty?
str.split(":", 2).map! { to_tuple_int _1 }.minmax
str.split(":", 2).map! { to_tuple_int _1 }
end

# yields validated but unsorted [num] or [num, num]
def each_parsed_entry(str)
return to_enum(__method__, str) unless block_given?
str&.split(",", -1) do |entry| yield parse_string_entry(entry) end
end

def normalized_entries?(entries)
max = nil
entries.each do |first, last|
return false if last && last <= first # 1:1 or 2:1
return false if max && first <= max + 1 # 2,1 or 1,1 or 1,2
max = last || first
end
true
end

def include_tuple?((min, max)) range_gte_to(min)&.cover?(min..max) end
Expand Down