Skip to content

Commit

Permalink
[rubygems/rubygems] Add TarReader::Entry#seek to seek within the tar …
Browse files Browse the repository at this point in the history
…file entry

TarReader#each previously implemented a partial version of seek.
This code moved to Entry#seek for use from TarReader#each.

Entry#close now returns nil instead of true, like IO#close.

Closing an Entry now seeks to the end of the Entry, seeking past
any remaining zero byte tar file padding and moving the io to the
correcty position to read the next file in the archive.

Uses seek for Entry#rewind and #pos=, fixing the tar->gzip->tar nested
rewind that would break previous to this change.

Add Entry.open that behaves more like File.open.

rubygems/rubygems@f5149565d5
  • Loading branch information
martinemde authored and matzbot committed Mar 7, 2023
1 parent 719a772 commit 85a1738
Show file tree
Hide file tree
Showing 6 changed files with 281 additions and 86 deletions.
28 changes: 0 additions & 28 deletions lib/rubygems/package/tar_reader.rb
Expand Up @@ -53,39 +53,11 @@ def close
def each
return enum_for __method__ unless block_given?

use_seek = @io.respond_to?(:seek)

until @io.eof? do
header = Gem::Package::TarHeader.from @io
return if header.empty?

entry = Gem::Package::TarReader::Entry.new header, @io
size = entry.header.size

yield entry

skip = (512 - (size % 512)) % 512
pending = size - entry.bytes_read

if use_seek
begin
# avoid reading if the @io supports seeking
@io.seek pending, IO::SEEK_CUR
pending = 0
rescue Errno::EINVAL
end
end

# if seeking isn't supported or failed
while pending > 0 do
bytes_read = @io.read([pending, 4096].min).size
raise UnexpectedEOF if @io.eof?
pending -= bytes_read
end

@io.read skip # discard trailing zeros

# make sure nobody can use #read, #getc or #rewind anymore
entry.close
end
end
Expand Down
85 changes: 82 additions & 3 deletions lib/rubygems/package/tar_reader/entry.rb
Expand Up @@ -8,6 +8,20 @@
# Class for reading entries out of a tar file

class Gem::Package::TarReader::Entry
##
# Creates a new tar entry for +header+ that will be read from +io+
# If a block is given, the entry is yielded and then closed.

def self.open(header, io, &block)
entry = new header, io
return entry unless block_given?
begin
yield entry
ensure
entry.close
end
end

##
# Header for this tar entry

Expand All @@ -21,6 +35,7 @@ def initialize(header, io)
@header = header
@io = io
@orig_pos = @io.pos
@end_pos = @orig_pos + @header.size
@read = 0
end

Expand All @@ -39,7 +54,14 @@ def bytes_read
# Closes the tar entry

def close
return if closed?
# Seek to the end of the entry if it wasn't fully read
seek(0, IO::SEEK_END)
# discard trailing zeros
skip = (512 - (@header.size % 512)) % 512
@io.read(skip)
@closed = true
nil
end

##
Expand Down Expand Up @@ -117,6 +139,14 @@ def pos
bytes_read
end

##
# Seek to the position in the tar entry

def pos=(new_pos)
seek(new_pos, IO::SEEK_SET)
new_pos
end

def size
@header.size
end
Expand Down Expand Up @@ -157,13 +187,62 @@ def readpartial(maxlen = nil, outbuf = "".b)
outbuf
end

##
# Seeks to +offset+ bytes into the tar file entry
# +whence+ can be IO::SEEK_SET, IO::SEEK_CUR, or IO::SEEK_END

def seek(offset, whence = IO::SEEK_SET)
check_closed

new_pos =
case whence
when IO::SEEK_SET then @orig_pos + offset
when IO::SEEK_CUR then @io.pos + offset
when IO::SEEK_END then @end_pos + offset
else
raise ArgumentError, "invalid whence"
end

if new_pos < @orig_pos
new_pos = @orig_pos
elsif new_pos > @end_pos
new_pos = @end_pos
end

pending = new_pos - @io.pos

if @io.respond_to?(:seek)
begin
# avoid reading if the @io supports seeking
@io.seek new_pos, IO::SEEK_SET
pending = 0
rescue Errno::EINVAL
end
end

# if seeking isn't supported or failed
# negative seek requires that we rewind and read
if pending < 0
@io.rewind
pending = new_pos
end

while pending > 0 do
size_read = @io.read([pending, 4096].min).size
raise UnexpectedEOF if @io.eof?
pending -= size_read
end

@read = @io.pos - @orig_pos

0
end

##
# Rewinds to the beginning of the tar file entry

def rewind
check_closed

@io.pos = @orig_pos
@read = 0
seek(0, IO::SEEK_SET)
end
end
63 changes: 49 additions & 14 deletions test/rubygems/package/tar_test_case.rb
Expand Up @@ -90,43 +90,52 @@ def header(type, fname, dname, length, mode, mtime, checksum = nil, linkname = "
ASCIIZ("wheel", 32), # char gname[32]; ASCIIZ
Z(to_oct(0, 7)), # char devmajor[8]; 0 padded, octal, null
Z(to_oct(0, 7)), # char devminor[8]; 0 padded, octal, null
ASCIIZ(dname, 155), # char prefix[155]; ASCII + (Z unless filled)
ASCIIZ(dname, 155), # char prefix[155]; ASCII + (Z unless filled)
]

h = arr.join
ret = h + "\0" * (512 - h.size)
ret = ASCIIZ(h, 512)
assert_equal(512, ret.size)
ret
end

def tar_dir_header(name, prefix, mode, mtime)
h = header("5", name, prefix, 0, mode, mtime)
def header_with_checksum(type, fname, dname, length, mode, mtime, linkname = "")
h = header(type, fname, dname, length, mode, mtime, nil, linkname)
checksum = calc_checksum(h)
header("5", name, prefix, 0, mode, mtime, checksum)
header(type, fname, dname, length, mode, mtime, checksum, linkname)
end

def tar_dir_header(name, prefix, mode, mtime)
header_with_checksum("5", name, prefix, 0, mode, mtime)
end

def tar_file_header(fname, dname, mode, length, mtime)
h = header("0", fname, dname, length, mode, mtime)
checksum = calc_checksum(h)
header("0", fname, dname, length, mode, mtime, checksum)
header_with_checksum("0", fname, dname, length, mode, mtime)
end

def tar_symlink_header(fname, prefix, mode, mtime, linkname)
h = header("2", fname, prefix, 0, mode, mtime, nil, linkname)
checksum = calc_checksum(h)
header("2", fname, prefix, 0, mode, mtime, checksum, linkname)
def tar_symlink_header(fname, dname, mode, mtime, linkname)
header_with_checksum("2", fname, dname, 0, mode, mtime, linkname)
end

def tar_file_contents(content)
pad = (512 - (content.size % 512)) % 512
content + "\0" * pad
end

def to_oct(n, pad_size)
"%0#{pad_size}o" % n
end

def util_entry(tar)
io = TempIO.new tar
io = tar.respond_to?(:read) ? tar : TempIO.new(tar)

header = Gem::Package::TarHeader.from io

Gem::Package::TarReader::Entry.new header, io
Gem::Package::TarReader::Entry.open header, io
end

def close_util_entry(entry)
entry.instance_variable_get(:@io).close!
end

def util_dir_entry
Expand All @@ -136,4 +145,30 @@ def util_dir_entry
def util_symlink_entry
util_entry tar_symlink_header("foo", "bar", 0, Time.now, "link")
end

def util_tar(&block)
tar_io = StringIO.new
Gem::Package::TarWriter.new(tar_io, &block)
tar_io.rewind
tar_io
end

def util_tar_gz(&block)
tar_io = util_tar(&block)
StringIO.new util_gzip(tar_io.string)
end

def util_gem_data_tar(spec = nil, &block)
data_tgz = util_tar_gz(&block)
util_tar do |tar|
if spec
tar.add_file "metadata.gz", 0444 do |io|
io.write util_gzip(spec.to_yaml)
end
end
tar.add_file "data.tar.gz", 0644 do |io|
io.write data_tgz.string
end
end
end
end
25 changes: 0 additions & 25 deletions test/rubygems/test_gem_package.rb
Expand Up @@ -1187,29 +1187,4 @@ def test_contents_from_io

assert_equal %w[lib/code.rb], package.contents
end

def util_tar
tar_io = StringIO.new

Gem::Package::TarWriter.new tar_io do |tar|
yield tar
end

tar_io.rewind

tar_io
end

def util_tar_gz(&block)
tar_io = util_tar(&block)

tgz_io = StringIO.new

# can't wrap TarWriter because it seeks
Zlib::GzipWriter.wrap tgz_io do |io|
io.write tar_io.string
end

StringIO.new tgz_io.string
end
end
49 changes: 48 additions & 1 deletion test/rubygems/test_gem_package_tar_reader.rb
Expand Up @@ -56,12 +56,14 @@ def test_seek
io = TempIO.new tar

Gem::Package::TarReader.new io do |tar_reader|
tar_reader.seek "baz/bar" do |entry|
retval = tar_reader.seek "baz/bar" do |entry|
assert_kind_of Gem::Package::TarReader::Entry, entry

assert_equal "baz/bar", entry.full_name
entry.read
end

assert_equal "", retval
assert_equal 0, io.pos
end
ensure
Expand All @@ -84,4 +86,49 @@ def test_seek_missing
ensure
io.close!
end

def test_read_in_gem_data
gem_tar = util_gem_data_tar do |tar|
tar.add_file "lib/code.rb", 0444 do |io|
io.write "# lib/code.rb"
end
end

count = 0
Gem::Package::TarReader.new(gem_tar).each do |entry|
next unless entry.full_name == "data.tar.gz"

Zlib::GzipReader.wrap entry do |gzio|
Gem::Package::TarReader.new(gzio).each do |contents_entry|
assert_equal "# lib/code.rb", contents_entry.read
count += 1
end
end
end

assert_equal 1, count, "should have found one file"
end

def test_seek_in_gem_data
gem_tar = util_gem_data_tar do |tar|
tar.add_file "lib/code.rb", 0444 do |io|
io.write "# lib/code.rb"
end
tar.add_file "lib/foo.rb", 0444 do |io|
io.write "# lib/foo.rb"
end
end

count = 0
Gem::Package::TarReader.new(gem_tar).seek("data.tar.gz") do |entry|
Zlib::GzipReader.wrap entry do |gzio|
Gem::Package::TarReader.new(gzio).seek("lib/foo.rb") do |contents_entry|
assert_equal "# lib/foo.rb", contents_entry.read
count += 1
end
end
end

assert_equal 1, count, "should have found one file"
end
end

0 comments on commit 85a1738

Please sign in to comment.