Skip to content

Commit

Permalink
Changes to not loop indefinitely on gzip without last member footer (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
joachimmetz committed Dec 2, 2018
1 parent 2fcecea commit 8e158d9
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 16 deletions.
6 changes: 5 additions & 1 deletion dfvfs/file_io/gzip_file_io.py
Expand Up @@ -200,10 +200,14 @@ def _Open(self, path_spec=None, mode='rb'):

self._gzip_file_object = resolver.Resolver.OpenFileObject(
path_spec.parent, resolver_context=self._resolver_context)
file_size = self._gzip_file_object.get_size()

self._gzip_file_object.seek(0, os.SEEK_SET)

uncompressed_data_offset = 0
next_member_offset = 0
while next_member_offset < self._gzip_file_object.get_size():

while next_member_offset < file_size:
member = gzipfile.GzipMember(
self._gzip_file_object, next_member_offset, uncompressed_data_offset)
uncompressed_data_offset = (
Expand Down
16 changes: 11 additions & 5 deletions dfvfs/lib/gzipfile.py
Expand Up @@ -182,6 +182,7 @@ def __init__(

self._LoadDataIntoCache(file_object, 0, read_all_data=True)

# TODO: gracefully handle missing footer.
self._ReadMemberFooter(file_object)

# Offset to the end of the member in the parent file object.
Expand All @@ -196,7 +197,7 @@ def _ReadMemberHeader(self, file_object):
Raises:
FileFormatError: if the member header cannot be read.
"""
file_offset = file_object.tell()
file_offset = file_object.get_offset()
member_header = self._ReadStructure(
file_object, file_offset, self._MEMBER_HEADER_SIZE,
self._MEMBER_HEADER, 'member header')
Expand All @@ -214,22 +215,22 @@ def _ReadMemberHeader(self, file_object):
self.operating_system = member_header.operating_system

if member_header.flags & self._FLAG_FEXTRA:
file_offset = file_object.tell()
file_offset = file_object.get_offset()
extra_field_data_size = self._ReadStructure(
file_object, file_offset, self._UINT16LE_SIZE,
self._UINT16LE, 'extra field data size')

file_object.seek(extra_field_data_size, os.SEEK_CUR)

if member_header.flags & self._FLAG_FNAME:
file_offset = file_object.tell()
file_offset = file_object.get_offset()
string_value = self._ReadString(
file_object, file_offset, self._CSTRING, 'original filename')

self.original_filename = string_value.rstrip('\x00')

if member_header.flags & self._FLAG_FCOMMENT:
file_offset = file_object.tell()
file_offset = file_object.get_offset()
string_value = self._ReadString(
file_object, file_offset, self._CSTRING, 'comment')

Expand All @@ -247,7 +248,7 @@ def _ReadMemberFooter(self, file_object):
Raises:
FileFormatError: if the member footer cannot be read.
"""
file_offset = file_object.tell()
file_offset = file_object.get_offset()
member_footer = self._ReadStructure(
file_object, file_offset, self._MEMBER_FOOTER_SIZE,
self._MEMBER_FOOTER, 'member footer')
Expand Down Expand Up @@ -354,6 +355,11 @@ def _LoadDataIntoCache(

while not self.IsCacheFull() or read_all_data:
decompressed_data = self._decompressor_state.Read(file_object)
# Note that decompressed_data will be empty if there is no data left
# to read and decompress.
if not decompressed_data:
break

decompressed_data_length = len(decompressed_data)
decompressed_end_offset = self._decompressor_state.uncompressed_offset
decompressed_start_offset = (
Expand Down
Binary file added test_data/corrupt1.gz
Binary file not shown.
42 changes: 32 additions & 10 deletions tests/file_io/gzip_file_io.py
Expand Up @@ -7,8 +7,9 @@
import unittest

from dfvfs.file_io import gzip_file_io
from dfvfs.path import gzip_path_spec
from dfvfs.path import os_path_spec
from dfvfs.lib import definitions
from dfvfs.lib import errors
from dfvfs.path import factory as path_spec_factory
from dfvfs.resolver import context

from tests import test_lib as shared_test_lib
Expand All @@ -17,14 +18,16 @@

@shared_test_lib.skipUnlessHasTestFile(['syslog.gz'])
class GzipFileTest(test_lib.SylogTestCase):
"""The unit test for a gzip file-like object."""
"""Tests a gzip file-like object."""

def setUp(self):
"""Sets up the needed objects used throughout the test."""
self._resolver_context = context.Context()
test_file = self._GetTestFilePath(['syslog.gz'])
path_spec = os_path_spec.OSPathSpec(location=test_file)
self._gzip_path_spec = gzip_path_spec.GzipPathSpec(parent=path_spec)
test_path = self._GetTestFilePath(['syslog.gz'])
test_os_path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_OS, location=test_path)
self._gzip_path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_GZIP, parent=test_os_path_spec)

def testOpenClosePathSpec(self):
"""Test the open and close functionality using a path specification."""
Expand Down Expand Up @@ -58,13 +61,32 @@ def testRead(self):

file_object.close()

@shared_test_lib.skipUnlessHasTestFile(['corrupt1.gz'])
def testReadCorrupt(self):
"""Tests reading a file that is corrupt."""
# The corrupt gzip has no member footer.
test_path = self._GetTestFilePath(['corrupt1.gz'])
test_os_path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_OS, location=test_path)
test_gzip_path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_GZIP, parent=test_os_path_spec)

file_object = gzip_file_io.GzipFile(self._resolver_context)

with self.assertRaises(errors.FileFormatError):
file_object.open(path_spec=test_gzip_path_spec)

@shared_test_lib.skipUnlessHasTestFile(['fsevents_000000000000b208'])
def testReadMultipleMembers(self):
"""Tests reading a file that contains multiple gzip members."""
test_file = self._GetTestFilePath(['fsevents_000000000000b208'])
parent_path_spec = os_path_spec.OSPathSpec(location=test_file)
path_spec = gzip_path_spec.GzipPathSpec(parent=parent_path_spec)
test_path = self._GetTestFilePath(['fsevents_000000000000b208'])
test_os_path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_OS, location=test_path)
test_gzip_path_spec = path_spec_factory.Factory.NewPathSpec(
definitions.TYPE_INDICATOR_GZIP, parent=test_os_path_spec)

file_object = gzip_file_io.GzipFile(self._resolver_context)
file_object.open(path_spec=path_spec)
file_object.open(path_spec=test_gzip_path_spec)

self.assertEqual(file_object.uncompressed_data_size, 506631)
self.assertEqual(file_object.modification_times, [0, 0])
Expand Down

0 comments on commit 8e158d9

Please sign in to comment.