Skip to content

Commit

Permalink
Make utils.extract_tar work
Browse files Browse the repository at this point in the history
The same way as it was working with extractcode.

Signed-off-by: Philippe Ombredanne <pombredanne@nexb.com>
  • Loading branch information
pombredanne committed May 9, 2022
1 parent 5b61891 commit 63ed384
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 30 deletions.
8 changes: 4 additions & 4 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ v31.0.0

This is a major release with bug fixes and API changes.

- Remove dependency on extractcode. "extract_tar" has been removed too. Use
instead extract_tar_with_symlinks.
- Adopted the latest skeleton.
- Add new os tests
- Remove dependency on extractcode as images only need basic tar to extract.
- "utils.extract_tar" function now accepts a skip_symlink argument
- Adopt the latest skeleton.
- Add new os-release tests


v30.0.0
Expand Down
12 changes: 7 additions & 5 deletions src/container_inspector/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -450,14 +450,15 @@ def get_installed_packages(self, packages_getter):
yield purl, package, layer

@staticmethod
def extract(archive_location, extracted_location):
def extract(archive_location, extracted_location, skip_symlinks=False):
"""
Extract the image archive tarball at ``archive_location`` to
``extracted_location``.
``extracted_location``. Skip symlinks and links if ``skip_symlinks`` is True.
"""
utils.extract_tar_keeping_symlinks(
utils.extract_tar(
location=archive_location,
target_dir=extracted_location,
skip_symlinks=skip_symlinks,
)

@staticmethod
Expand Down Expand Up @@ -1070,15 +1071,16 @@ def __attrs_post_init__(self, *args, **kwargs):
if not self.size:
self.size = os.path.getsize(self.archive_location)

def extract(self, extracted_location):
def extract(self, extracted_location, skip_symlinks=True):
"""
Extract this layer archive in the `extracted_location` directory and set
this Layer ``extracted_location`` attribute to ``extracted_location``.
"""
self.extracted_location = extracted_location
utils.extract_tar_keeping_symlinks(
utils.extract_tar(
location=self.archive_location,
target_dir=extracted_location,
skip_symlinks=skip_symlinks,
)

def get_resources(self, with_dir=False, walker=os.walk):
Expand Down
59 changes: 38 additions & 21 deletions src/container_inspector/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,40 +80,57 @@ def get_labels(config, container_config):
return dict(sorted(labels.items()))


def extract_tar(location, target_dir):
def extract_tar(location, target_dir, skip_symlinks=True):
"""
Extract a tar archive at `location` in the `target_dir` directory.
Ignore special device files and symlinks and hardlinks.
Do not preserve the permissions and owners.
Raise an Exception on error.
"""
raise NotImplementedError('This function has been removed, use extract_tar_keeping_symlinks instead')


def extract_tar_keeping_symlinks(location, target_dir):
"""
Extract a tar archive at `location` in the `target_dir` directory.
Ignore special device files. Keep symlinks and hardlinks
Ignore special device files. Skip symlinks and hardlinks if skip_symlinks is True.
Do not preserve the permissions and owners.
Raise exceptions on possible problematic relative paths.
Issue a warning if skip_symlinks is True and links target are missing.
"""
import tarfile
if TRACE: logger.debug(f'extract_tar_keeping_symlinks: {location} to {target_dir}')
tarfile.TarInfo
if TRACE: logger.debug(f'_extract_tar: {location} to {target_dir} skip_symlinks: {skip_symlinks}')

fileutils.create_dir(target_dir)

with tarfile.open(location) as tarball:
# never extract character device, block and fifo files:
# we extract dirs, files and links only
for tinfo in tarball:
if tinfo.isdev():
to_extract = []
for tarinfo in tarball:
if TRACE: logger.debug(f'_extract_tar: {tarinfo}')

if tarinfo.isdev() or tarinfo.ischr() or tarinfo.isblk() or tarinfo.isfifo() or tarinfo.sparse:
if TRACE:
logger.debug(f'_extract_tar: skipping unsupported {tarinfo} file type: block, chr, dev or sparse file')
continue
if TRACE: logger.debug(f'extract_tar_keeping_symlinks: {tinfo}')
tarball.extract(
member=tinfo,
path=target_dir,
set_attrs=False,
)

if '..' in tarinfo.name:
if TRACE: logger.debug(f'_extract_tar: skipping unsupported {tarinfo} with relative path')
continue

if tarinfo.islnk() or tarinfo.issym():
try:
target = tarball._find_link_target(tarinfo)
if not target:
if TRACE:
logger.debug(f'_extract_tar: skipping link with missing target: {tarinfo}')
continue

except Exception:
import traceback
if TRACE:
logger.debug(f'_extract_tar: skipping link with missing target: {tarinfo}: {traceback.format_exc()}')
continue

tarinfo.mode = 0o755
tarinfo.name = tarinfo.name.lstrip('/')
tarball.extract(member=tarinfo, path=target_dir, set_attrs=False,)


def extract_tar_with_symlinks(location, target_dir):
return extract_tar(location, target_dir, skip_symlinks=False)


def lower_keys(mapping):
Expand Down

0 comments on commit 63ed384

Please sign in to comment.