Skip to content

Commit

Permalink
Write gem entry checksums to a single entry
Browse files Browse the repository at this point in the history
Previously checksums were written to an extra entry per entry in the gem.
(data.tar.gz -> data.tar.gz.sum which is a cost of 2N files per gem.)
Now checksums are written to a single checksums.yaml.gz as the last
entry in the gem.  (Cost of N + 1 files per gem.)

This reduces gem file size, especially if new files are added to the gem
format (a gem is a tar file, so the cost is at least 1024 bytes per file).

Additionally, the checksums are stored as a YAML document containing the
digest type and file name and digest contents which allows easier
parsing.  The previous format used a tab-separated list of digest
algorithms and digest values per line.
  • Loading branch information
drbrain committed Jul 31, 2012
1 parent 82c9110 commit 3f2e059
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 70 deletions.
99 changes: 76 additions & 23 deletions lib/rubygems/package.rb
Expand Up @@ -44,6 +44,8 @@ class TooLongFileName < Error; end

class TarInvalidError < Error; end

attr_accessor :build_time # :nodoc:

##
# The files in this package. This is not the contents of the gem, just the
# files in the top-level container.
Expand Down Expand Up @@ -100,20 +102,41 @@ def initialize gem # :notnew:
@security_policy = nil
@spec = nil
@signer = nil
@checksums = {}
@build_time = Time.now
end

##
# Adds a checksum for each entry in the gem to checksums.yaml.gz.

def add_checksums tar
checksums_by_algorithm = Hash.new { |h, algorithm| h[algorithm] = {} }

@checksums.each do |name, digest|
checksums_by_algorithm[digest.name][name] = digest.hexdigest
end

tar.add_file_signed 'checksums.yaml.gz', 0444, @signer do |io|
gzip_to io do |gz_io|
YAML.dump checksums_by_algorithm, gz_io
end
end
end

##
# Adds the files listed in the packages's Gem::Specification to data.tar.gz
# and adds this file to the +tar+.

def add_contents tar # :nodoc:
tar.add_file_signed 'data.tar.gz', 0444, @signer do |io|
Zlib::GzipWriter.wrap io do |gz_io|
digest = tar.add_file_signed 'data.tar.gz', 0444, @signer do |io|
gzip_to io do |gz_io|
Gem::Package::TarWriter.new gz_io do |data_tar|
add_files data_tar
end
end
end

@checksums['data.tar.gz'] = digest
end

##
Expand All @@ -135,37 +158,32 @@ def add_files tar # :nodoc:
# Adds the package's Gem::Specification to the +tar+ file

def add_metadata tar # :nodoc:
metadata = @spec.to_yaml
metadata_gz = Gem.gzip metadata

tar.add_file_signed 'metadata.gz', 0444, @signer do |io|
io.write metadata_gz
digest = tar.add_file_signed 'metadata.gz', 0444, @signer do |io|
gzip_to io do |gz_io|
gz_io.mtime = @build_time
gz_io.write @spec.to_yaml
end
end

@checksums['metadata.gz'] = digest
end

##
# Builds this package based on the specification set by #spec=

def build(skip_validation=false)
def build skip_validation = false
require 'rubygems/security'

@spec.validate unless skip_validation
@spec.mark_version

if @spec.signing_key then
@signer = Gem::Security::Signer.new @spec.signing_key, @spec.cert_chain
@spec.signing_key = nil
@spec.cert_chain = @signer.cert_chain.map { |cert| cert.to_s }
else
@signer = Gem::Security::Signer.new nil, nil
@spec.cert_chain = @signer.cert_chain.map { |cert| cert.to_pem } if
@signer.cert_chain
end
setup_signer

open @gem, 'wb' do |gem_io|
Gem::Package::TarWriter.new gem_io do |gem|
add_metadata gem
add_contents gem
add_checksums gem
end
end

Expand Down Expand Up @@ -268,6 +286,21 @@ def extract_tar_gz io, destination_dir # :nodoc:
end
end

##
# Gzips content written to +gz_io+ to +io+.
#--
# Also sets the gzip modification time to the package build time to ease
# testing.

def gzip_to io # :yields: gz_io
gz_io = Zlib::GzipWriter.new io
gz_io.mtime = @build_time

yield gz_io
ensure
gz_io.close
end

##
# Returns the full path for installing +filename+.
#
Expand Down Expand Up @@ -316,6 +349,22 @@ def open_tar_gz io # :nodoc:
end
end

##
# Prepares the gem for signing and checksum generation. If a signing
# certificate and key are not present only checksum generation is set up.

def setup_signer
if @spec.signing_key then
@signer = Gem::Security::Signer.new @spec.signing_key, @spec.cert_chain
@spec.signing_key = nil
@spec.cert_chain = @signer.cert_chain.map { |cert| cert.to_s }
else
@signer = Gem::Security::Signer.new nil, nil
@spec.cert_chain = @signer.cert_chain.map { |cert| cert.to_pem } if
@signer.cert_chain
end
end

##
# The spec for this gem.
#
Expand Down Expand Up @@ -345,7 +394,7 @@ def verify

digests = {}
signatures = {}
checksums = {}
checksums = nil

open @gem, 'rb' do |io|
reader = Gem::Package::TarReader.new io
Expand All @@ -358,8 +407,11 @@ def verify
when /\.sig$/ then
signatures[$`] = entry.read if @security_policy
next
when /\.sum$/ then
checksums[$`] = entry.read
when 'checksums.yaml.gz' then
Zlib::GzipReader.wrap entry do |io|
checksums = YAML.load io.read
end

next
else
digests[file_name] = digest entry
Expand Down Expand Up @@ -411,11 +463,12 @@ def verify_gz entry # :nodoc:
# cryptographically secure. Missing checksums are ignored.

def verify_checksums digests, checksums # :nodoc:
checksums.sort.each do |name, checksum|
return unless checksums

checksums['SHA1'].sort.each do |name, checksum|
digest = digests[name]
checksum =~ /#{digest.name}\t(.*)/

unless digest.hexdigest == $1 then
unless digest.hexdigest == checksum then
raise Gem::Package::FormatError.new("checksum mismatch for #{name}",
@gem)
end
Expand Down
10 changes: 4 additions & 6 deletions lib/rubygems/package/tar_writer.rb
Expand Up @@ -146,12 +146,6 @@ def add_file_digest name, mode, digest_algorithm # :yields: io
end
end

checksum = "#{digest.name}\t#{digest.hexdigest}\n"

add_file_simple "#{name}.sum", 0444, checksum.length do |io|
io.write checksum
end

digest
end

Expand All @@ -160,6 +154,8 @@ def add_file_digest name, mode, digest_algorithm # :yields: io
# the file. The +signer+ is used to add a digest file using its
# digest_algorithm per add_file_digest and a cryptographic signature in
# +name+.sig. If the signer has no key only the checksum file is added.
#
# Returns the digest.

def add_file_signed name, mode, signer
digest = add_file_digest name, mode, signer.digest_algorithm do |io|
Expand All @@ -171,6 +167,8 @@ def add_file_signed name, mode, signer
add_file_simple "#{name}.sig", 0444, signature.length do |io|
io.write signature
end if signature

digest
end

##
Expand Down
94 changes: 72 additions & 22 deletions test/rubygems/test_gem_package.rb
Expand Up @@ -30,6 +30,60 @@ def test_class_new_old_format
assert package.spec
end

def test_add_checksums
gem_io = StringIO.new

spec = Gem::Specification.new 'build', '1'
spec.summary = 'build'
spec.authors = 'build'
spec.files = ['lib/code.rb']
spec.date = Time.at 0
spec.rubygems_version = Gem::Version.new '0'

FileUtils.mkdir 'lib'

open 'lib/code.rb', 'w' do |io|
io.write '# lib/code.rb'
end

package = Gem::Package.new spec.file_name
package.spec = spec
package.build_time = 1 # 0 uses current time
package.setup_signer

Gem::Package::TarWriter.new gem_io do |gem|
package.add_metadata gem
package.add_contents gem
package.add_checksums gem
end

gem_io.rewind

reader = Gem::Package::TarReader.new gem_io

checksums = nil

reader.each_entry do |entry|
next unless entry.full_name == 'checksums.yaml.gz'

Zlib::GzipReader.wrap entry do |io|
checksums = io.read
break
end
end

checksums = YAML.load checksums

expected = {
'SHA1' => {
'metadata.gz' => 'e22e0b3a9f30f2befd3822b6101df0952a0977b7',
'data.tar.gz' => '05f3fec98096f5056407960c0a8a5291a9171657',
},
}

assert_equal expected, checksums
end

def test_add_files
spec = Gem::Specification.new
spec.files = 'lib/code.rb'
Expand Down Expand Up @@ -81,7 +135,7 @@ def test_build
reader = Gem::Package.new spec.file_name
assert_equal spec, reader.spec

assert_equal %w[metadata.gz metadata.gz.sum data.tar.gz data.tar.gz.sum],
assert_equal %w[metadata.gz data.tar.gz checksums.yaml.gz],
reader.files

assert_equal %w[lib/code.rb], reader.contents
Expand Down Expand Up @@ -118,8 +172,9 @@ def test_build_auto_signed

assert_equal [PUBLIC_CERT.to_pem], reader.spec.cert_chain

assert_equal %w[metadata.gz metadata.gz.sum metadata.gz.sig
data.tar.gz data.tar.gz.sum data.tar.gz.sig],
assert_equal %w[metadata.gz metadata.gz.sig
data.tar.gz data.tar.gz.sig
checksums.yaml.gz checksums.yaml.gz.sig],
reader.files

assert_equal %w[lib/code.rb], reader.contents
Expand Down Expand Up @@ -165,8 +220,9 @@ def test_build_signed

assert_equal spec, reader.spec

assert_equal %w[metadata.gz metadata.gz.sum metadata.gz.sig
data.tar.gz data.tar.gz.sum data.tar.gz.sig],
assert_equal %w[metadata.gz metadata.gz.sig
data.tar.gz data.tar.gz.sig
checksums.yaml.gz checksums.yaml.gz.sig],
reader.files

assert_equal %w[lib/code.rb], reader.contents
Expand Down Expand Up @@ -286,7 +342,7 @@ def test_verify
package.verify

assert_equal @spec, package.spec
assert_equal %w[data.tar.gz data.tar.gz.sum metadata.gz metadata.gz.sum],
assert_equal %w[checksums.yaml.gz data.tar.gz metadata.gz],
package.files.sort
end

Expand All @@ -306,26 +362,20 @@ def test_verify_checksum_bad
io.write metadata_gz
end

digest = OpenSSL::Digest::SHA1.new
digest << metadata_gz
digest << 'bogus'
checksum = "#{digest.name}\t#{digest.hexdigest}\n"

tar.add_file 'metadata.gz.sum', 0444 do |io|
io.write checksum
end

tar.add_file 'data.tar.gz', 0444 do |io|
io.write data_tgz
end

digest = OpenSSL::Digest::SHA1.new
digest << data_tgz
digest << 'bogus'
checksum = "#{digest.name}\t#{digest.hexdigest}\n"

tar.add_file 'data.tar.gz.sum', 0444 do |io|
io.write checksum
bogus_checksums = {
'SHA1' => {
'data.tar.gz' => 'bogus',
'metadata.gz' => 'bogus',
},
}
tar.add_file 'checksums.yaml.gz', 0444 do |io|
Zlib::GzipWriter.wrap io do |gz_io|
gz_io.write YAML.dump bogus_checksums
end
end
end

Expand Down

0 comments on commit 3f2e059

Please sign in to comment.