Skip to content

Commit

Permalink
[rubygems/rubygems] Aggressively optimize allocations in SafeMarshal
Browse files Browse the repository at this point in the history
Reduces allocations in a bundle install --full-index by an order of magnitude

Main wins are (a) getting rid of exessive string allocations for exception message stack

(b) Avoiding hash allocations caused by kwargs for #initialize

(c) avoid using unpack to do bit math, its easy enough to do by hand

(d) special case the most common elements so they can be read without an allocation

(e) avoid string allocations every time a symbol->string lookup is done by using symbol#name

rubygems/rubygems@7d2ee51402
  • Loading branch information
segiddins authored and matzbot committed Sep 21, 2023
1 parent a49d17a commit 0a423d4
Show file tree
Hide file tree
Showing 4 changed files with 253 additions and 89 deletions.
30 changes: 15 additions & 15 deletions lib/rubygems/safe_marshal/elements.rb
Expand Up @@ -7,14 +7,14 @@ class Element
end

class Symbol < Element
def initialize(name:)
def initialize(name)
@name = name
end
attr_reader :name
end

class UserDefined < Element
def initialize(name:, binary_string:)
def initialize(name, binary_string)
@name = name
@binary_string = binary_string
end
Expand All @@ -23,7 +23,7 @@ def initialize(name:, binary_string:)
end

class UserMarshal < Element
def initialize(name:, data:)
def initialize(name, data)
@name = name
@data = data
end
Expand All @@ -32,40 +32,40 @@ def initialize(name:, data:)
end

class String < Element
def initialize(str:)
def initialize(str)
@str = str
end

attr_reader :str
end

class Hash < Element
def initialize(pairs:)
def initialize(pairs)
@pairs = pairs
end

attr_reader :pairs
end

class HashWithDefaultValue < Hash
def initialize(default:, **kwargs)
super(**kwargs)
def initialize(pairs, default)
super(pairs)
@default = default
end

attr_reader :default
end

class Array < Element
def initialize(elements:)
def initialize(elements)
@elements = elements
end

attr_reader :elements
end

class Integer < Element
def initialize(int:)
def initialize(int)
@int = int
end

Expand All @@ -86,7 +86,7 @@ def initialize
end

class WithIvars < Element
def initialize(object:,ivars:)
def initialize(object, ivars)
@object = object
@ivars = ivars
end
Expand All @@ -95,7 +95,7 @@ def initialize(object:,ivars:)
end

class Object < Element
def initialize(name:)
def initialize(name)
@name = name
end
attr_reader :name
Expand All @@ -106,28 +106,28 @@ class Nil < Element
end

class ObjectLink < Element
def initialize(offset:)
def initialize(offset)
@offset = offset
end
attr_reader :offset
end

class SymbolLink < Element
def initialize(offset:)
def initialize(offset)
@offset = offset
end
attr_reader :offset
end

class Float < Element
def initialize(string:)
def initialize(string)
@string = string
end
attr_reader :string
end

class Bignum < Element # rubocop:disable Lint/UnifiedInteger
def initialize(sign:, data:)
def initialize(sign, data)
@sign = sign
@data = data
end
Expand Down
158 changes: 117 additions & 41 deletions lib/rubygems/safe_marshal/reader.rb
Expand Up @@ -49,19 +49,19 @@ def read_integer
when 0x00
0
when 0x01
@io.read(1).unpack1("C")
read_byte
when 0x02
@io.read(2).unpack1("S<")
read_byte | (read_byte << 8)
when 0x03
(@io.read(3) + "\0").unpack1("L<")
read_byte | (read_byte << 8) | (read_byte << 16)
when 0x04
@io.read(4).unpack1("L<")
read_byte | (read_byte << 8) | (read_byte << 16) | (read_byte << 24)
when 0xFC
@io.read(4).unpack1("L<") | -0x100000000
read_byte | (read_byte << 8) | (read_byte << 16) | (read_byte << 24) | -0x100000000
when 0xFD
(@io.read(3) + "\0").unpack1("L<") | -0x1000000
read_byte | (read_byte << 8) | (read_byte << 16) | -0x1000000
when 0xFE
@io.read(2).unpack1("s<") | -0x10000
read_byte | (read_byte << 8) | -0x10000
when 0xFF
read_byte | -0x100
else
Expand All @@ -88,31 +88,51 @@ def read_element
when 85 then read_user_marshal # ?U
when 91 then read_array # ?[
when 102 then read_float # ?f
when 105 then Elements::Integer.new int: read_integer # ?i
when 105 then Elements::Integer.new(read_integer) # ?i
when 108 then read_bignum # ?l
when 111 then read_object # ?o
when 117 then read_user_defined # ?u
when 123 then read_hash # ?{
when 125 then read_hash_with_default_value # ?}
when "e".ord then read_extended_object
when "c".ord then read_class
when "m".ord then read_module
when "M".ord then read_class_or_module
when "d".ord then read_data
when "/".ord then read_regexp
when "S".ord then read_struct
when "C".ord then read_user_class
when 101 then read_extended_object # ?e
when 99 then read_class # ?c
when 109 then read_module # ?m
when 77 then read_class_or_module # ?M
when 100 then read_data # ?d
when 47 then read_regexp # ?/
when 83 then read_struct # ?S
when 67 then read_user_class # ?C
else
raise Error, "Unknown marshal type discriminator #{type.chr.inspect} (#{type})"
end
end

STRING_E_SYMBOL = Elements::Symbol.new("E").freeze
private_constant :STRING_E_SYMBOL

def read_symbol
Elements::Symbol.new name: @io.read(read_integer)
len = read_integer
if len == 1
byte = read_byte
if byte == 69 # ?E
STRING_E_SYMBOL
else
Elements::Symbol.new(byte.chr)
end
else
name = -@io.read(len)
Elements::Symbol.new(name)
end
end

EMPTY_STRING = Elements::String.new("".b.freeze).freeze
private_constant :EMPTY_STRING

def read_string
Elements::String.new(str: @io.read(read_integer))
length = read_integer
return EMPTY_STRING if length == 0
str = @io.read(length)
Elements::String.new(str)
end

def read_true
Expand All @@ -124,67 +144,123 @@ def read_false
end

def read_user_defined
Elements::UserDefined.new(name: read_element, binary_string: @io.read(read_integer))
name = read_element
binary_string = @io.read(read_integer)
Elements::UserDefined.new(name, binary_string)
end

EMPTY_ARRAY = Elements::Array.new([].freeze).freeze
private_constant :EMPTY_ARRAY

def read_array
Elements::Array.new(elements: Array.new(read_integer) do |_i|
read_element
end)
length = read_integer
return EMPTY_ARRAY if length == 0
elements = Array.new(length) do
read_element
end
Elements::Array.new(elements)
end

def read_object_with_ivars
Elements::WithIvars.new(object: read_element, ivars:
Array.new(read_integer) do
[read_element, read_element]
end)
object = read_element
ivars = Array.new(read_integer) do
[read_element, read_element]
end
Elements::WithIvars.new(object, ivars)
end

def read_symbol_link
Elements::SymbolLink.new offset: read_integer
offset = read_integer
Elements::SymbolLink.new(offset)
end

def read_user_marshal
Elements::UserMarshal.new(name: read_element, data: read_element)
end
name = read_element
data = read_element
Elements::UserMarshal.new(name, data)
end

# profiling bundle install --full-index shows that
# offset 6 is by far the most common object link,
# so we special case it to avoid allocating a new
# object a third of the time.
# the following are all the object links that
# appear more than 10000 times in my profiling

OBJECT_LINKS = {
6 => Elements::ObjectLink.new(6).freeze,
30 => Elements::ObjectLink.new(30).freeze,
81 => Elements::ObjectLink.new(81).freeze,
34 => Elements::ObjectLink.new(34).freeze,
38 => Elements::ObjectLink.new(38).freeze,
50 => Elements::ObjectLink.new(50).freeze,
91 => Elements::ObjectLink.new(91).freeze,
42 => Elements::ObjectLink.new(42).freeze,
46 => Elements::ObjectLink.new(46).freeze,
150 => Elements::ObjectLink.new(150).freeze,
100 => Elements::ObjectLink.new(100).freeze,
104 => Elements::ObjectLink.new(104).freeze,
108 => Elements::ObjectLink.new(108).freeze,
242 => Elements::ObjectLink.new(242).freeze,
246 => Elements::ObjectLink.new(246).freeze,
139 => Elements::ObjectLink.new(139).freeze,
143 => Elements::ObjectLink.new(143).freeze,
114 => Elements::ObjectLink.new(114).freeze,
308 => Elements::ObjectLink.new(308).freeze,
200 => Elements::ObjectLink.new(200).freeze,
54 => Elements::ObjectLink.new(54).freeze,
62 => Elements::ObjectLink.new(62).freeze,
1_286_245 => Elements::ObjectLink.new(1_286_245).freeze,
}.freeze
private_constant :OBJECT_LINKS

def read_object_link
Elements::ObjectLink.new(offset: read_integer)
offset = read_integer
OBJECT_LINKS[offset] || Elements::ObjectLink.new(offset)
end

EMPTY_HASH = Elements::Hash.new([].freeze).freeze
private_constant :EMPTY_HASH

def read_hash
pairs = Array.new(read_integer) do
length = read_integer
return EMPTY_HASH if length == 0
pairs = Array.new(length) do
[read_element, read_element]
end
Elements::Hash.new(pairs: pairs)
Elements::Hash.new(pairs)
end

def read_hash_with_default_value
pairs = Array.new(read_integer) do
[read_element, read_element]
end
Elements::HashWithDefaultValue.new(pairs: pairs, default: read_element)
default = read_element
Elements::HashWithDefaultValue.new(pairs, default)
end

def read_object
Elements::WithIvars.new(
object: Elements::Object.new(name: read_element),
ivars: Array.new(read_integer) do
[read_element, read_element]
end
)
name = read_element
object = Elements::Object.new(name)
ivars = Array.new(read_integer) do
[read_element, read_element]
end
Elements::WithIvars.new(object, ivars)
end

def read_nil
Elements::Nil::NIL
end

def read_float
Elements::Float.new string: @io.read(read_integer)
string = @io.read(read_integer)
Elements::Float.new(string)
end

def read_bignum
Elements::Bignum.new(sign: read_byte, data: @io.read(read_integer * 2))
sign = read_byte
data = @io.read(read_integer * 2)
Elements::Bignum.new(sign, data)
end

def read_extended_object
Expand Down

0 comments on commit 0a423d4

Please sign in to comment.