Permalink
Browse files

Write gem entry checksums to a single entry

Previously checksums were written to an extra entry per entry in the gem.
(data.tar.gz -> data.tar.gz.sum which is a cost of 2N files per gem.)
Now checksums are written to a single checksums.yaml.gz as the last
entry in the gem.  (Cost of N + 1 files per gem.)

This reduces gem file size, especially if new files are added to the gem
format (a gem is a tar file, so the cost is at least 1024 bytes per file).

Additionally, the checksums are stored as a YAML document containing the
digest type and file name and digest contents which allows easier
parsing.  The previous format used a tab-separated list of digest
algorithms and digest values per line.
  • Loading branch information...
drbrain committed Jul 31, 2012
1 parent 82c9110 commit 3f2e05972c85d4f4d9cd5e56e5b033bfb4d11b84
View
@@ -44,6 +44,8 @@ class TooLongFileName < Error; end
class TarInvalidError < Error; end
attr_accessor :build_time # :nodoc:
##
# The files in this package. This is not the contents of the gem, just the
# files in the top-level container.
@@ -100,20 +102,41 @@ def initialize gem # :notnew:
@security_policy = nil
@spec = nil
@signer = nil
@checksums = {}
@build_time = Time.now
end
##
# Adds a checksum for each entry in the gem to checksums.yaml.gz.
def add_checksums tar
checksums_by_algorithm = Hash.new { |h, algorithm| h[algorithm] = {} }
@checksums.each do |name, digest|
checksums_by_algorithm[digest.name][name] = digest.hexdigest
end
tar.add_file_signed 'checksums.yaml.gz', 0444, @signer do |io|
gzip_to io do |gz_io|
YAML.dump checksums_by_algorithm, gz_io
end
end
end
##
# Adds the files listed in the packages's Gem::Specification to data.tar.gz
# and adds this file to the +tar+.
def add_contents tar # :nodoc:
tar.add_file_signed 'data.tar.gz', 0444, @signer do |io|
Zlib::GzipWriter.wrap io do |gz_io|
digest = tar.add_file_signed 'data.tar.gz', 0444, @signer do |io|
gzip_to io do |gz_io|
Gem::Package::TarWriter.new gz_io do |data_tar|
add_files data_tar
end
end
end
@checksums['data.tar.gz'] = digest
end
##
@@ -135,37 +158,32 @@ def add_files tar # :nodoc:
# Adds the package's Gem::Specification to the +tar+ file
def add_metadata tar # :nodoc:
metadata = @spec.to_yaml
metadata_gz = Gem.gzip metadata
tar.add_file_signed 'metadata.gz', 0444, @signer do |io|
io.write metadata_gz
digest = tar.add_file_signed 'metadata.gz', 0444, @signer do |io|
gzip_to io do |gz_io|
gz_io.mtime = @build_time
gz_io.write @spec.to_yaml
end
end
@checksums['metadata.gz'] = digest
end
##
# Builds this package based on the specification set by #spec=
def build(skip_validation=false)
def build skip_validation = false
require 'rubygems/security'
@spec.validate unless skip_validation
@spec.mark_version
if @spec.signing_key then
@signer = Gem::Security::Signer.new @spec.signing_key, @spec.cert_chain
@spec.signing_key = nil
@spec.cert_chain = @signer.cert_chain.map { |cert| cert.to_s }
else
@signer = Gem::Security::Signer.new nil, nil
@spec.cert_chain = @signer.cert_chain.map { |cert| cert.to_pem } if
@signer.cert_chain
end
setup_signer
open @gem, 'wb' do |gem_io|
Gem::Package::TarWriter.new gem_io do |gem|
add_metadata gem
add_contents gem
add_checksums gem
end
end
@@ -268,6 +286,21 @@ def extract_tar_gz io, destination_dir # :nodoc:
end
end
##
# Gzips content written to +gz_io+ to +io+.
#--
# Also sets the gzip modification time to the package build time to ease
# testing.
def gzip_to io # :yields: gz_io
gz_io = Zlib::GzipWriter.new io
gz_io.mtime = @build_time
yield gz_io
ensure
gz_io.close
end
##
# Returns the full path for installing +filename+.
#
@@ -316,6 +349,22 @@ def open_tar_gz io # :nodoc:
end
end
##
# Prepares the gem for signing and checksum generation. If a signing
# certificate and key are not present only checksum generation is set up.
def setup_signer
if @spec.signing_key then
@signer = Gem::Security::Signer.new @spec.signing_key, @spec.cert_chain
@spec.signing_key = nil
@spec.cert_chain = @signer.cert_chain.map { |cert| cert.to_s }
else
@signer = Gem::Security::Signer.new nil, nil
@spec.cert_chain = @signer.cert_chain.map { |cert| cert.to_pem } if
@signer.cert_chain
end
end
##
# The spec for this gem.
#
@@ -345,7 +394,7 @@ def verify
digests = {}
signatures = {}
checksums = {}
checksums = nil
open @gem, 'rb' do |io|
reader = Gem::Package::TarReader.new io
@@ -358,8 +407,11 @@ def verify
when /\.sig$/ then
signatures[$`] = entry.read if @security_policy
next
when /\.sum$/ then
checksums[$`] = entry.read
when 'checksums.yaml.gz' then
Zlib::GzipReader.wrap entry do |io|
checksums = YAML.load io.read
end
next
else
digests[file_name] = digest entry
@@ -411,11 +463,12 @@ def verify_gz entry # :nodoc:
# cryptographically secure. Missing checksums are ignored.
def verify_checksums digests, checksums # :nodoc:
checksums.sort.each do |name, checksum|
return unless checksums
checksums['SHA1'].sort.each do |name, checksum|
digest = digests[name]
checksum =~ /#{digest.name}\t(.*)/
unless digest.hexdigest == $1 then
unless digest.hexdigest == checksum then
raise Gem::Package::FormatError.new("checksum mismatch for #{name}",
@gem)
end
@@ -146,12 +146,6 @@ def add_file_digest name, mode, digest_algorithm # :yields: io
end
end
checksum = "#{digest.name}\t#{digest.hexdigest}\n"
add_file_simple "#{name}.sum", 0444, checksum.length do |io|
io.write checksum
end
digest
end
@@ -160,6 +154,8 @@ def add_file_digest name, mode, digest_algorithm # :yields: io
# the file. The +signer+ is used to add a digest file using its
# digest_algorithm per add_file_digest and a cryptographic signature in
# +name+.sig. If the signer has no key only the checksum file is added.
#
# Returns the digest.
def add_file_signed name, mode, signer
digest = add_file_digest name, mode, signer.digest_algorithm do |io|
@@ -171,6 +167,8 @@ def add_file_signed name, mode, signer
add_file_simple "#{name}.sig", 0444, signature.length do |io|
io.write signature
end if signature
digest
end
##
@@ -30,6 +30,60 @@ def test_class_new_old_format
assert package.spec
end
def test_add_checksums
gem_io = StringIO.new
spec = Gem::Specification.new 'build', '1'
spec.summary = 'build'
spec.authors = 'build'
spec.files = ['lib/code.rb']
spec.date = Time.at 0
spec.rubygems_version = Gem::Version.new '0'
FileUtils.mkdir 'lib'
open 'lib/code.rb', 'w' do |io|
io.write '# lib/code.rb'
end
package = Gem::Package.new spec.file_name
package.spec = spec
package.build_time = 1 # 0 uses current time
package.setup_signer
Gem::Package::TarWriter.new gem_io do |gem|
package.add_metadata gem
package.add_contents gem
package.add_checksums gem
end
gem_io.rewind
reader = Gem::Package::TarReader.new gem_io
checksums = nil
reader.each_entry do |entry|
next unless entry.full_name == 'checksums.yaml.gz'
Zlib::GzipReader.wrap entry do |io|
checksums = io.read
break
end
end
checksums = YAML.load checksums
expected = {
'SHA1' => {
'metadata.gz' => 'e22e0b3a9f30f2befd3822b6101df0952a0977b7',
'data.tar.gz' => '05f3fec98096f5056407960c0a8a5291a9171657',
},
}
assert_equal expected, checksums
end
def test_add_files
spec = Gem::Specification.new
spec.files = 'lib/code.rb'
@@ -81,7 +135,7 @@ def test_build
reader = Gem::Package.new spec.file_name
assert_equal spec, reader.spec
assert_equal %w[metadata.gz metadata.gz.sum data.tar.gz data.tar.gz.sum],
assert_equal %w[metadata.gz data.tar.gz checksums.yaml.gz],
reader.files
assert_equal %w[lib/code.rb], reader.contents
@@ -118,8 +172,9 @@ def test_build_auto_signed
assert_equal [PUBLIC_CERT.to_pem], reader.spec.cert_chain
assert_equal %w[metadata.gz metadata.gz.sum metadata.gz.sig
data.tar.gz data.tar.gz.sum data.tar.gz.sig],
assert_equal %w[metadata.gz metadata.gz.sig
data.tar.gz data.tar.gz.sig
checksums.yaml.gz checksums.yaml.gz.sig],
reader.files
assert_equal %w[lib/code.rb], reader.contents
@@ -165,8 +220,9 @@ def test_build_signed
assert_equal spec, reader.spec
assert_equal %w[metadata.gz metadata.gz.sum metadata.gz.sig
data.tar.gz data.tar.gz.sum data.tar.gz.sig],
assert_equal %w[metadata.gz metadata.gz.sig
data.tar.gz data.tar.gz.sig
checksums.yaml.gz checksums.yaml.gz.sig],
reader.files
assert_equal %w[lib/code.rb], reader.contents
@@ -286,7 +342,7 @@ def test_verify
package.verify
assert_equal @spec, package.spec
assert_equal %w[data.tar.gz data.tar.gz.sum metadata.gz metadata.gz.sum],
assert_equal %w[checksums.yaml.gz data.tar.gz metadata.gz],
package.files.sort
end
@@ -306,26 +362,20 @@ def test_verify_checksum_bad
io.write metadata_gz
end
digest = OpenSSL::Digest::SHA1.new
digest << metadata_gz
digest << 'bogus'
checksum = "#{digest.name}\t#{digest.hexdigest}\n"
tar.add_file 'metadata.gz.sum', 0444 do |io|
io.write checksum
end
tar.add_file 'data.tar.gz', 0444 do |io|
io.write data_tgz
end
digest = OpenSSL::Digest::SHA1.new
digest << data_tgz
digest << 'bogus'
checksum = "#{digest.name}\t#{digest.hexdigest}\n"
tar.add_file 'data.tar.gz.sum', 0444 do |io|
io.write checksum
bogus_checksums = {
'SHA1' => {
'data.tar.gz' => 'bogus',
'metadata.gz' => 'bogus',
},
}
tar.add_file 'checksums.yaml.gz', 0444 do |io|
Zlib::GzipWriter.wrap io do |gz_io|
gz_io.write YAML.dump bogus_checksums
end
end
end
Oops, something went wrong.

0 comments on commit 3f2e059

Please sign in to comment.