From 505715ddf17e004d184c0b71afb40a31e2e8c98e Mon Sep 17 00:00:00 2001 From: Samuel Giddins Date: Mon, 18 Sep 2023 18:51:15 -0700 Subject: [PATCH] [rubygems/rubygems] Fewer allocations in gem installation For now, on a small rails app I have hanging around: ``` ==> memprof.after.txt <== Total allocated: 872.51 MB (465330 objects) Total retained: 40.48 kB (326 objects) ==> memprof.before.txt <== Total allocated: 890.79 MB (1494026 objects) Total retained: 40.40 kB (328 objects) ``` Not a huge difference in memory usage, but it's a drastic improvement in total number of allocations. Additionally, this will pay huge dividends once https://github.com/ruby/zlib/pull/61 is merged, as it will allow us to completely avoid allocations in the repeated calls to readpartial, which currently accounts for most of the memory usage shown above. https://github.com/rubygems/rubygems/commit/f78d45d927 --- lib/rubygems/installer.rb | 6 ++--- lib/rubygems/package.rb | 24 ++++++++++--------- lib/rubygems/package/tar_header.rb | 38 +++++++++++++++++------------- 3 files changed, 37 insertions(+), 31 deletions(-) diff --git a/lib/rubygems/installer.rb b/lib/rubygems/installer.rb index 45405a805f5fb0..0c3c28e9af5ea3 100644 --- a/lib/rubygems/installer.rb +++ b/lib/rubygems/installer.rb @@ -224,11 +224,11 @@ def check_executable_overwrite(filename) # :nodoc: File.open generated_bin, "rb" do |io| line = io.gets - shebang = /^#!.*ruby/ + shebang = /^#!.*ruby/o # TruffleRuby uses a bash prelude in default launchers if load_relative_enabled? || RUBY_ENGINE == "truffleruby" - until line.nil? || line =~ shebang do + until line.nil? || shebang.match?(line) do line = io.gets end end @@ -239,7 +239,7 @@ def check_executable_overwrite(filename) # :nodoc: # TODO: detect a specially formatted comment instead of trying # to find a string inside Ruby code. - next unless io.gets.to_s.include?("This file was generated by RubyGems") + next unless io.gets&.include?("This file was generated by RubyGems") ruby_executable = true existing = io.read.slice(/ diff --git a/lib/rubygems/package.rb b/lib/rubygems/package.rb index 68517cd4c0cd89..f3c1cb2895d516 100644 --- a/lib/rubygems/package.rb +++ b/lib/rubygems/package.rb @@ -357,18 +357,21 @@ def contents def digest(entry) # :nodoc: algorithms = if @checksums - @checksums.keys - else - [Gem::Security::DIGEST_NAME].compact + @checksums.to_h {|algorithm, _| [algorithm, Gem::Security.create_digest(algorithm)] } + elsif Gem::Security::DIGEST_NAME + { Gem::Security::DIGEST_NAME => Gem::Security.create_digest(Gem::Security::DIGEST_NAME) } end - algorithms.each do |algorithm| - digester = Gem::Security.create_digest(algorithm) - - digester << entry.readpartial(16_384) until entry.eof? + return @digests if algorithms.nil? || algorithms.empty? - entry.rewind + buf = String.new(capacity: 16_384, encoding: Encoding::BINARY) + until entry.eof? + entry.readpartial(16_384, buf) + algorithms.each_value {|digester| digester << buf } + end + entry.rewind + algorithms.each do |algorithm, digester| @digests[algorithm][entry.full_name] = digester end @@ -437,8 +440,6 @@ def extract_tar_gz(io, destination_dir, pattern = "*") # :nodoc: FileUtils.rm_rf destination - mkdir_options = {} - mkdir_options[:mode] = dir_mode ? 0o755 : (entry.header.mode if entry.directory?) mkdir = if entry.directory? destination @@ -447,7 +448,7 @@ def extract_tar_gz(io, destination_dir, pattern = "*") # :nodoc: end unless directories.include?(mkdir) - FileUtils.mkdir_p mkdir, **mkdir_options + FileUtils.mkdir_p mkdir, mode: dir_mode ? 0o755 : (entry.header.mode if entry.directory?) directories << mkdir end @@ -707,6 +708,7 @@ def verify_files(gem) def verify_gz(entry) # :nodoc: Zlib::GzipReader.wrap entry do |gzio| + # TODO: read into a buffer once zlib supports it gzio.read 16_384 until gzio.eof? # gzip checksum verification end rescue Zlib::GzipFile::Error => e diff --git a/lib/rubygems/package/tar_header.rb b/lib/rubygems/package/tar_header.rb index 7deafa994f496d..087f13f6c9e117 100644 --- a/lib/rubygems/package/tar_header.rb +++ b/lib/rubygems/package/tar_header.rb @@ -127,7 +127,8 @@ def self.from(stream) end def self.strict_oct(str) - return str.strip.oct if /\A[0-7]*\z/.match?(str.strip) + str.strip! + return str.oct if /\A[0-7]*\z/.match?(str) raise ArgumentError, "#{str.inspect} is not an octal string" end @@ -137,7 +138,8 @@ def self.oct_or_256based(str) # \ff flags a negative 256-based number # In case we have a match, parse it as a signed binary value # in big-endian order, except that the high-order bit is ignored. - return str.unpack("N2").last if /\A[\x80\xff]/n.match?(str) + + return str.unpack1("@4N") if /\A[\x80\xff]/n.match?(str) strict_oct(str) end @@ -149,21 +151,23 @@ def initialize(vals) raise ArgumentError, ":name, :size, :prefix and :mode required" end - vals[:uid] ||= 0 - vals[:gid] ||= 0 - vals[:mtime] ||= 0 - vals[:checksum] ||= "" - vals[:typeflag] = "0" if vals[:typeflag].nil? || vals[:typeflag].empty? - vals[:magic] ||= "ustar" - vals[:version] ||= "00" - vals[:uname] ||= "wheel" - vals[:gname] ||= "wheel" - vals[:devmajor] ||= 0 - vals[:devminor] ||= 0 - - FIELDS.each do |name| - instance_variable_set "@#{name}", vals[name] - end + @checksum = vals[:checksum] || "" + @devmajor = vals[:devmajor] || 0 + @devminor = vals[:devminor] || 0 + @gid = vals[:gid] || 0 + @gname = vals[:gname] || "wheel" + @linkname = vals[:linkname] + @magic = vals[:magic] || "ustar" + @mode = vals[:mode] + @mtime = vals[:mtime] || 0 + @name = vals[:name] + @prefix = vals[:prefix] + @size = vals[:size] + @typeflag = vals[:typeflag] + @typeflag = "0" if @typeflag.nil? || @typeflag.empty? + @uid = vals[:uid] || 0 + @uname = vals[:uname] || "wheel" + @version = vals[:version] || "00" @empty = vals[:empty] end