Skip to content

Commit

Permalink
Merge pull request #45 from nexB/extract-symlinks
Browse files Browse the repository at this point in the history
Extract symlinks correctly
  • Loading branch information
pombredanne committed Jul 30, 2022
2 parents b4c76b9 + 050c05b commit 3061713
Show file tree
Hide file tree
Showing 22 changed files with 462 additions and 121 deletions.
18 changes: 18 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,24 @@
Changelog
=========

v32.0.0
--------

This is a minor release with bug fixes and an output change.

- We no longer support Python 3.6, only 3.7 and up.

- "utils.extract_tar" function now behaves correctly with links and return
either a list of error message strings (the previous default) but with updated
messages or a list of ExtractEvent to better track extraction errors and warnings.
The behavious is driven by the "as_events" argument.

- In all places where extract is callable (Image, Layer) there is a new
"skip_symlinks" argument defaulting to True. If True, we skip symlinks and links.
The same applies with the "as_events" available in these places as these
functions now return a list (rather than nothing before).


v31.1.0
--------

Expand Down
8 changes: 4 additions & 4 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,23 @@ jobs:
parameters:
job_name: ubuntu18_cpython
image_name: ubuntu-18.04
python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10']
python_versions: ['3.7', '3.8', '3.9', '3.10']
test_suites:
all: venv/bin/pytest -n 2 -vvs

- template: etc/ci/azure-posix.yml
parameters:
job_name: ubuntu20_cpython
image_name: ubuntu-20.04
python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10']
python_versions: ['3.7', '3.8', '3.9', '3.10']
test_suites:
all: venv/bin/pytest -n 2 -vvs

- template: etc/ci/azure-posix.yml
parameters:
job_name: macos1015_cpython
image_name: macos-10.15
python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10']
python_versions: ['3.7', '3.8', '3.9', '3.10']
test_suites:
all: venv/bin/pytest -n 2 -vvs

Expand All @@ -43,7 +43,7 @@ jobs:
# parameters:
# job_name: win2019_cpython
# image_name: windows-2019
# python_versions: ['3.6', '3.7', '3.8', '3.9', '3.10']
# python_versions: ['3.7', '3.8', '3.9', '3.10']
# test_suites:
# all: venv\Scripts\pytest -n 2 -vvs
#
Expand Down
33 changes: 20 additions & 13 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,24 +1,31 @@
aboutcode-toolkit==7.0.1
bleach==4.1.0
aboutcode-toolkit==7.0.2
black==22.6.0
bleach==5.0.1
build==0.7.0
commonmark==0.9.1
docutils==0.18.1
docutils==0.19
et-xmlfile==1.1.0
execnet==1.9.0
iniconfig==1.1.1
jeepney==0.7.1
keyring==23.4.1
openpyxl==3.0.9
isort==5.10.1
jeepney==0.8.0
keyring==23.7.0
mypy-extensions==0.4.3
openpyxl==3.0.10
pathspec==0.9.0
pep517==0.12.0
pkginfo==1.8.2
pkginfo==1.8.3
platformdirs==2.5.2
py==1.11.0
pytest==7.0.1
pytest==7.1.2
pytest-forked==1.4.0
pytest-xdist==2.5.0
readme-renderer==34.0
readme-renderer==35.0
requests-toolbelt==0.9.1
rfc3986==1.5.0
rich==12.3.0
rfc3986==2.0.0
rich==12.5.1
secretstorage==3.3.2
tomli==1.2.3
twine==3.8.0
tomli==2.0.1
tqdm==4.64.0
twine==4.0.1
typing_extensions==4.3.0
64 changes: 32 additions & 32 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,78 +2,78 @@ attrs==21.4.0
banal==1.0.6
beautifulsoup4==4.11.1
binaryornot==0.4.4
boolean.py==3.8
certifi==2021.10.8
cffi==1.15.0
chardet==4.0.0
charset-normalizer==2.0.12
click==8.0.4
colorama==0.4.4
commoncode==30.2.0
boolean.py==4.0
certifi==2022.6.15
cffi==1.15.1
chardet==5.0.0
charset-normalizer==2.1.0
click==8.1.3
colorama==0.4.5
commoncode==31.0.0b4
construct==2.10.68
cryptography==36.0.2
debian-inspector==30.0.0
cryptography==37.0.4
debian-inspector==31.0.0b1
dockerfile-parse==1.2.0
dparse2==0.6.1
extractcode==30.0.0
extractcode==31.0.0
extractcode-7z==16.5.210531
extractcode-libarchive==3.5.1.210531
fasteners==0.17.3
fingerprints==1.0.3
ftfy==6.0.3
ftfy==6.1.1
future==0.18.2
gemfileparser==0.8.0
html5lib==1.1
idna==3.3
importlib-metadata==4.8.3
importlib-metadata==4.12.0
inflection==0.5.1
intbitset==3.0.1
isodate==0.6.1
jaraco.functools==3.4.0
jaraco.functools==3.5.1
javaproperties==0.8.1
Jinja2==3.0.3
Jinja2==3.1.2
jsonstreams==0.6.0
license-expression==21.6.14
lxml==4.8.0
MarkupSafe==2.0.1
libfwsi-python==20220123
license-expression==30.0.0
lxml==4.9.1
MarkupSafe==2.1.1
more-itertools==8.13.0
normality==2.3.3
packagedcode-msitools==0.101.210706
packageurl-python==0.9.9
packageurl-python==0.10.0
packaging==21.3
parameter-expansion-patched==0.3.1
patch==1.16
pdfminer.six==20220506
pefile==2021.9.3
pdfminer.six==20220524
pefile==2022.5.30
pip-requirements-parser==31.2.0
pkginfo2==30.0.0
pluggy==1.0.0
plugincode==21.1.21
plugincode==31.0.0b1
ply==3.11
publicsuffix2==2.20191221
pyahocorasick==2.0.0b1
pycparser==2.21
pygmars==0.7.0
Pygments==2.12.0
pymaven-patch==0.3.0
pyparsing==3.0.8
pyparsing==3.0.9
pytz==2022.1
PyYAML==6.0
rdflib==5.0.0
regipy==2.2.2
requests==2.27.1
rdflib==6.2.0
regipy==3.0.2
requests==2.28.1
rpm-inspector-rpm==4.16.1.3.210404
saneyaml==0.5.2
six==1.16.0
soupsieve==2.3.1
soupsieve==2.3.2.post1
spdx-tools==0.7.0a3
text-unidecode==1.3
toml==0.10.2
typecode==21.6.1
typecode==30.0.0
typecode-libmagic==5.39.210531
urllib3==1.26.9
urllib3==1.26.11
urlpy==0.5
wcwidth==0.2.5
webencodings==0.5.1
xmltodict==0.12.0
zipp==3.6.0
xmltodict==0.13.0
zipp==3.8.1
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ zip_safe = false

setup_requires = setuptools_scm[toml] >= 4

python_requires = >=3.6.*
python_requires = >=3.7.*

install_requires =
click >= 6.7, !=7.0, !=8.0.3
Expand Down
48 changes: 36 additions & 12 deletions src/container_inspector/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,14 +356,23 @@ def bottom_layer(self):
"""
return self.layers[0]

def extract_layers(self, extracted_location):
def extract_layers(self, extracted_location, as_events=False, skip_symlinks=True):
"""
Extract all layer archives to the `extracted_location` directory.
Each layer is extracted to its own directory named after its `layer_id`.
Skip symlinks and links if ``skip_symlinks`` is True.
Return a list of ExtractEvent if ``as_events`` is True or a list of message strings otherwise.
"""
all_events = []
for layer in self.layers:
exloc = os.path.join(extracted_location, layer.layer_id)
layer.extract(extracted_location=exloc)
events = layer.extract(
extracted_location=exloc,
skip_symlinks=skip_symlinks,
as_events=as_events,
)
all_events.extend(events)
return events

def get_layers_resources(self, with_dir=False):
"""
Expand Down Expand Up @@ -450,41 +459,53 @@ def get_installed_packages(self, packages_getter):
yield purl, package, layer

@staticmethod
def extract(archive_location, extracted_location, skip_symlinks=False):
def extract(archive_location, extracted_location, as_events=False, skip_symlinks=False):
"""
Extract the image archive tarball at ``archive_location`` to
``extracted_location``. Skip symlinks and links if ``skip_symlinks`` is True.
``extracted_location``.
Skip symlinks and links if ``skip_symlinks`` is True.
Return a list of ExtractEvent if ``as_events`` is True or a list of message strings otherwise.
"""
utils.extract_tar(
return utils.extract_tar(
location=archive_location,
target_dir=extracted_location,
skip_symlinks=skip_symlinks,
as_events=as_events,
)

@staticmethod
def get_images_from_tarball(
archive_location,
extracted_location,
verify=True,
skip_symlinks=False,
):
"""
Return a list of Images found in the tarball at `archive_location` that
will be extracted to `extracted_location`. The tarball must be in the
Return a list of Images found in the tarball at ``archive_location`` that
will be extracted to ``extracted_location``. The tarball must be in the
format of a "docker save" command tarball.
If `verify` is True, perform extra checks on the config data and layers
If ``verify`` is True, perform extra checks on the config data and layers
checksums.
Skip symlinks and links if ``skip_symlinks`` is True.
Ignore the extract events from extraction.
"""
if TRACE:
logger.debug(
f'get_images_from_tarball: {archive_location} , '
f'get_images_from_tarball: {archive_location} '
f'extracting to: {extracted_location}'
)

Image.extract(
# TODO: do not ignore extract events
_events = Image.extract(
archive_location=archive_location,
extracted_location=extracted_location,
skip_symlinks=skip_symlinks,
)
if TRACE:
logger.debug(f'get_images_from_tarball: events')
for e in _events:
logger.debug(str(e))

return Image.get_images_from_dir(
extracted_location=extracted_location,
Expand Down Expand Up @@ -1071,16 +1092,19 @@ def __attrs_post_init__(self, *args, **kwargs):
if not self.size:
self.size = os.path.getsize(self.archive_location)

def extract(self, extracted_location, skip_symlinks=True):
def extract(self, extracted_location, as_events=False, skip_symlinks=False):
"""
Extract this layer archive in the `extracted_location` directory and set
this Layer ``extracted_location`` attribute to ``extracted_location``.
Skip symlinks and links if ``skip_symlinks`` is True.
Return a list of ExtractEvent if ``as_events`` is True or a list of message strings otherwise.
"""
self.extracted_location = extracted_location
utils.extract_tar(
return utils.extract_tar(
location=self.archive_location,
target_dir=extracted_location,
skip_symlinks=skip_symlinks,
as_events=as_events,
)

def get_resources(self, with_dir=False, walker=os.walk):
Expand Down
15 changes: 12 additions & 3 deletions src/container_inspector/rootfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,16 @@ class InconsistentLayersError(Exception):
pass


def rebuild_rootfs(img, target_dir):
def rebuild_rootfs(img, target_dir, skip_symlinks=True):
"""
Extract and merge or "squash" all layers of the `image` Image in a single
rootfs in `target_dir`. Extraction is done in sequence from the bottom (root
or initial) layer to the top (or latest) layer and the "whiteouts"
unionfs/overlayfs procedure is applied at each step as per the OCI spec:
https://github.com/opencontainers/image-spec/blob/master/layer.md#whiteouts
Skip symlinks and links if ``skip_symlinks`` is True.
Return a list of deleted "whiteout" files.
Raise an Exception on errrors.
Expand Down Expand Up @@ -73,8 +75,15 @@ def rebuild_rootfs(img, target_dir):
# 1. extract a layer to temp.
# Note that we are not preserving any special file and any file permission
extracted_loc = tempfile.mkdtemp('container_inspector-docker')
layer.extract(extracted_location=extracted_loc)
if TRACE: logger.debug(f' Extracted layer to: {extracted_loc}')
# TODO: do not ignore extract events
_events = layer.extract(
extracted_location=extracted_loc,
skip_symlinks=skip_symlinks,
)
if TRACE:
logger.debug(f' Extracted layer to: {extracted_loc} with skip_symlinks: {skip_symlinks}')
for ev in _events:
logger.debug(f' {ev}')

# 2. find whiteouts in that layer.
whiteouts = list(find_whiteouts(extracted_loc))
Expand Down

0 comments on commit 3061713

Please sign in to comment.