Skip to content
This repository has been archived by the owner on Dec 7, 2022. It is now read-only.

Commit

Permalink
v2s2 to v2s1 converter class
Browse files Browse the repository at this point in the history
The conveter takes a v2s2 manifest and a config layer, and produces
a signed v2s1 manifest.

closes #5635
https://pulp.plan.io/issues/4244
  • Loading branch information
mibanescu committed Oct 29, 2019
1 parent 17c9f15 commit c612b93
Show file tree
Hide file tree
Showing 5 changed files with 359 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGES/5635.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added v2s2 to v2s1 converter.
274 changes: 274 additions & 0 deletions pulp_docker/docker_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
#!/usr/bin/env python

import argparse
import base64
import binascii
import datetime
import ecdsa
import hashlib
import itertools
import json
import logging
import sys
from collections import namedtuple
from jwkest import jws, jwk, ecc

log = logging.getLogger(__name__)

FS_Layer = namedtuple("FS_Layer", "layer_id uncompressed_digest history")


def main():
"""
Command line entry point for validation purposes.
"""
logging.basicConfig(level=logging.ERROR, format='%(asctime)s %(levelname)s %(message)s')
parser = argparse.ArgumentParser()
parser.add_argument('--manifest', help='v2s1 manifest', required=True, type=argparse.FileType())
parser.add_argument('--config-layer', help='Config layer', type=argparse.FileType())
parser.add_argument('--namespace', help='Namespace', default='myself')
parser.add_argument('--repository', help='Image name (repository)', default='dummy')
parser.add_argument('--tag', help='Tag', default='latest')

parser.add_argument('-v', '--verbose', action='count', default=0, help='Increase verbosity')

args = parser.parse_args()
logLevel = logging.INFO
if args.verbose > 1:
logLevel = logging.DEBUG
log.setLevel(logLevel)

converter = Converter_s2_to_s1(
json.load(args.manifest), json.load(args.config_layer),
namespace=args.namespace, repository=args.repository,
tag=args.tag)
manif_data = converter.convert()
print(manif_data)


class Converter_s2_to_s1:
"""
Convertor class from schema 2 to schema 1.
Initialize it with a manifest and a config layer JSON documents,
and call convert() to obtain the signed manifest, as a JSON-encoded string.
"""

EMPTY_LAYER = "sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4"

def __init__(self, manifest, config_layer, namespace=None, repository=None, tag=None):
"""
Initializer needs a manifest and a config layer as JSON documents.
"""
self.namespace = namespace or "ignored"
self.repository = repository or "test"
self.tag = tag or "latest"
self.manifest = manifest
self.config_layer = config_layer
self.fs_layers = []
self.history = []

def convert(self):
"""
Convert manifest from schema 2 to schema 1
"""
if self.manifest.get("schemaVersion") == 1:
log.info("Manifest is already schema 1")
return _jsonDumps(self.manifest)
log.info("Converting manifest to schema 1")
name = "%s/%s" % (self.namespace, self.repository)
self.compute_layers()
manifest = dict(name=name, tag=self.tag, architecture=self.config_layer['architecture'],
schemaVersion=1, fsLayers=self.fs_layers, history=self.history)
key = jwk.ECKey().load_key(ecc.P256)
key.kid = getKeyId(key)
manifData = sign(manifest, key)
return manifData

def compute_layers(self):
"""
Compute layers to be present in the converted image.
Empty (throwaway) layers will be created to store image metadata
"""
# Layers in v2s1 are in reverse order from v2s2
fs_layers = self._compute_fs_layers()
self.fs_layers = [dict(blobSum=x[0]) for x in fs_layers]
# Compute v1 compatibility
parent = None
history_entries = self.history = []

fs_layers_count = len(fs_layers)
# Reverse list so we can compute parent/child properly
fs_layers.reverse()
for i, fs_layer in enumerate(fs_layers):
layer_id = self._compute_layer_id(fs_layer.layer_id, fs_layer.uncompressed_digest, i)
config = self._compute_v1_compatibility_config(
layer_id, fs_layer, last_layer=(i == fs_layers_count - 1))
if parent is not None:
config['parent'] = parent
parent = layer_id
history_entries.append(dict(v1Compatibility=_jsonDumpsCompact(config)))
# Reverse again for proper order
history_entries.reverse()

def _compute_fs_layers(self):
"""Utility function to return a list of FS_Layer objects"""
layers = reversed(self.manifest['layers'])
config_layer_history = reversed(self.config_layer['history'])
diff_ids = reversed(self.config_layer['rootfs']['diff_ids'])
fs_layers = []
curr_compressed_dig = next(layers)['digest']
curr_uncompressed_dig = next(diff_ids)
for curr_hist in config_layer_history:
if curr_hist.get("empty_layer"):
layer_id = self.EMPTY_LAYER
uncompressed_dig = None
else:
layer_id = curr_compressed_dig
uncompressed_dig = curr_uncompressed_dig
try:
curr_compressed_dig = next(layers)['digest']
curr_uncompressed_dig = next(diff_ids)
except StopIteration:
curr_compressed_dig = self.EMPTY_LAYER
curr_uncompressed_dig = None
fs_layers.append(FS_Layer(layer_id, uncompressed_dig, curr_hist))
return fs_layers

def _compute_v1_compatibility_config(self, layer_id, fs_layer, last_layer=False):
"""Utility function to compute the v1 compatibility"""
if last_layer:
# The whole config layer becomes part of the v1compatibility
# (minus history and rootfs)
config = dict(self.config_layer)
config.pop("history", None)
config.pop("rootfs", None)
else:
config = dict(created=fs_layer.history['created'],
container_config=dict(Cmd=fs_layer.history['created_by']))
if fs_layer.uncompressed_digest is None:
config['throwaway'] = True
config['id'] = layer_id
return config

@classmethod
def _compute_layer_id(cls, compressed_dig, uncompressed_dig, layer_index):
"""
We need to make up an image ID for each layer.
We will digest:
* the compressed digest of the layer
* the uncompressed digest (if present; it will be missing for throw-away layers)
* the zero-padded integer of the layer number
The last one is added so we can get different image IDs for throw-away layers.
"""
dig = hashlib.sha256(compressed_dig.encode("ascii"))
if uncompressed_dig:
dig.update(uncompressed_dig.encode("ascii"))
layer_count = "%06d" % layer_index
dig.update(layer_count.encode("ascii"))
layer_id = dig.hexdigest()
return layer_id


def _jsonDumps(data):
return json.dumps(data, indent=3, sort_keys=True, separators=(',', ': '))


def _jsonDumpsCompact(data):
return json.dumps(data, sort_keys=True, separators=(',', ':'))


def sign(data, key):
"""
Sign the JSON document with a elliptic curve key
"""
jdata = _jsonDumps(data)
now = datetime.datetime.utcnow().replace(microsecond=0).isoformat() + 'Z'
header = dict(alg="ES256", jwk=key.serialize())
protected = dict(formatLength=len(jdata) - 2,
formatTail=jws.b64encode_item(jdata[-2:]),
time=now)
_jws = jws.JWS(jdata, **header)
protectedHeader, payload, signature = _jws.sign_compact([key], protected=protected).split(".")
signatures = [dict(header=header, signature=signature, protected=protectedHeader)]
jsig = _jsonDumps(dict(signatures=signatures))[1:-2]
arr = [jdata[:-2], ',', jsig, jdata[-2:]]
# Add the signature block at the end of the json string, keeping the
# formatting
jdata2 = ''.join(arr)
return jdata2


def validate_signature(signed_mf):
"""
Validate the signature of a signed manifest
A signed manifest is a JSON document with a signature attribute
as the last element.
"""
# In order to validate the signature, we need the exact original payload
# (the document without the signature). We cannot json.load the document
# and get rid of the signature, the payload would likely end up
# differently because of differences in field ordering and indentation.
# So we need to strip the signature using plain string manipulation, and
# add back a trailing }

# strip the signature block
payload, sep, signatures = signed_mf.partition(' "signatures"')
# get rid of the trailing ,\n, and add \n}
jw_payload = payload[:-2] + '\n}'
# base64-encode and remove any trailing =
jw_payload = base64.urlsafe_b64encode(jw_payload.encode('ascii')).decode('ascii').rstrip("=")
# add payload as a json attribute, and then add the signatures back
complete_msg = payload + ' "payload": "{}",\n'.format(jw_payload) + sep + signatures
_jws = jws.JWS()
_jws.verify_json(complete_msg.encode('ascii'))


def getKeyId(key):
"""
DER-encode the key and represent it in the format XXXX:YYYY:...
"""
derRepr = toDer(key)
shaRepr = hashlib.sha256(derRepr).digest()[:30]
b32Repr = base64.b32encode(shaRepr).decode()
return ':'.join(byN(b32Repr, 4))


def toDer(key):
"""Return the DER-encoded representation of the key"""
point = b"\x00\x04" + number2string(key.x, key.curve.bytes) + \
number2string(key.y, key.curve.bytes)
der = ecdsa.der
curveEncodedOid = der.encode_oid(1, 2, 840, 10045, 3, 1, 7)
return der.encode_sequence(
der.encode_sequence(ecdsa.keys.encoded_oid_ecPublicKey, curveEncodedOid),
der.encode_bitstring(point))


def byN(strobj, N):
"""
Yield consecutive substrings of length N from string strobj
"""
it = iter(strobj)
while True:
substr = ''.join(itertools.islice(it, N))
if not substr:
return
yield substr


def number2string(num, order):
"""
Hex-encode the number and return a zero-padded (to the left) to a total
length of 2*order
"""
# convert to hex
nhex = "%x" % num
# Zero-pad to the left so the length of the resulting unhexified string is order
nhex = nhex.rjust(2 * order, '0')
return binascii.unhexlify(nhex)


if __name__ == '__main__':
sys.exit(main())
80 changes: 80 additions & 0 deletions pulp_docker/tests/functional/test_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from pulp_docker import docker_convert


class Test:
"""Converter_s2_to_s1 test class"""

def test_convert(self):
"""Test schema converter on a known manifest"""
cnv = docker_convert.Converter_s2_to_s1(MANIFEST, CONFIG_LAYER)
signed_mf = cnv.convert()
docker_convert.validate_signature(signed_mf)

empty = dict(blobSum=cnv.EMPTY_LAYER)
assert [dict(blobSum="sha256:layer1"), empty, empty, empty,
dict(blobSum="sha256:base")] == cnv.fs_layers

def test_compute_layers(self):
"""Test that computing the layers produces the expected data"""
cnv = docker_convert.Converter_s2_to_s1(MANIFEST, CONFIG_LAYER)
cnv.compute_layers()
empty = dict(blobSum=cnv.EMPTY_LAYER)
assert [dict(blobSum="sha256:layer1"), empty, empty, empty,
dict(blobSum="sha256:base")] == cnv.fs_layers
assert [
{'v1Compatibility': '{"architecture":"amd64","author":"Mihai Ibanescu <mihai.ibanescu@gmail.com>","config":{"Cmd":["/bin/bash"],"Hostname":"decafbad"},"container_config":{"Hostname":"decafbad","Tty":false},"created":"2019-09-05T21:28:52.173079282Z","docker_version":"1.13.1","id":"d7b329ed9d186ff20c25399e848116430ee9b6ae022cb9f3dc3406144ec3685d","parent":"6474547c15d178825c70a42efdc59a88c6e30d764d184b415f32484562803446"}'}, # noqa
{'v1Compatibility': '{"container_config":{"Cmd":"/bin/sh -c #(nop) MAINTAINER Mihai Ibanescu <mihai.ibanescu@gmail.com>"},"created":"2019-09-05T21:28:43.305854958Z","id":"6474547c15d178825c70a42efdc59a88c6e30d764d184b415f32484562803446","parent":"5708420291e0a86d8dc08ec40b2c1b1799117c33fe85032b87227632f70c1018","throwaway":true}'}, # noqa
{'v1Compatibility': '{"container_config":{"Cmd":"/bin/sh -c #(nop) CMD [\\"/bin/bash\\"]"},"created":"2018-03-06T00:48:12.679169547Z","id":"5708420291e0a86d8dc08ec40b2c1b1799117c33fe85032b87227632f70c1018","parent":"9e9220abceaf86f2ad7820ae8124d01223d8ec022b9a6cb8c99a8ae1747137ea","throwaway":true}'}, # noqa
{'v1Compatibility': '{"container_config":{"Cmd":"/bin/sh -c #(nop) LABEL name=CentOS Base Image vendor=CentOS license=GPLv2 build-date=20180302"},"created":"2018-03-06T00:48:12.458578213Z","id":"9e9220abceaf86f2ad7820ae8124d01223d8ec022b9a6cb8c99a8ae1747137ea","parent":"cb48c1db9c0a1ede7c85c85351856fc3e40e750931295c8fac837c63b403586a","throwaway":true}'}, # noqa
{'v1Compatibility': '{"container_config":{"Cmd":"/bin/sh -c #(nop) ADD file:FILE_CHECKSUM in / "},"created":"2018-03-06T00:48:12.077095981Z","id":"cb48c1db9c0a1ede7c85c85351856fc3e40e750931295c8fac837c63b403586a"}'}, # noqa
] == cnv.history


MANIFEST = dict(schemaVersion=2, layers=[
dict(digest="sha256:base"),
dict(digest="sha256:layer1"),
])

CONFIG_LAYER = dict(
architecture="amd64",
author="Mihai Ibanescu <mihai.ibanescu@gmail.com>",
config=dict(Hostname="decafbad", Cmd=["/bin/bash"]),
container_config=dict(Hostname="decafbad", Tty=False),
created="2019-09-05T21:28:52.173079282Z",
docker_version="1.13.1",
history=[
{
"created": "2018-03-06T00:48:12.077095981Z",
"created_by": "/bin/sh -c #(nop) ADD file:FILE_CHECKSUM in / "
},
{
"created": "2018-03-06T00:48:12.458578213Z",
"created_by": "/bin/sh -c #(nop) LABEL name=CentOS Base Image vendor=CentOS "
"license=GPLv2 build-date=20180302",
"empty_layer": True
},
{
"created": "2018-03-06T00:48:12.679169547Z",
"created_by": "/bin/sh -c #(nop) CMD [\"/bin/bash\"]",
"empty_layer": True
},
{
"created": "2019-09-05T21:28:43.305854958Z",
"author": "Mihai Ibanescu <mihai.ibanescu@gmail.com>",
"created_by": "/bin/sh -c #(nop) MAINTAINER Mihai Ibanescu <mihai.ibanescu@gmail.com>",
"empty_layer": True
},
{
"created": "2019-09-05T21:28:52.173079282Z",
"author": "Mihai Ibanescu <mihai.ibanescu@gmail.com>",
"created_by": "/bin/sh -c touch /usr/share/dummy.txt"
},
],
rootfs={
"type": "layers",
"diff_ids": [
"sha256:uncompressed_base",
"sha256:uncompressed_layer1"
],
},
)
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

requirements = [
"pulpcore~=3.0rc7",
'ecdsa~=0.13.2',
'pyjwkest~=1.4.0',
]


Expand Down
2 changes: 2 additions & 0 deletions test_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ git+https://github.com/PulpQE/pulp-smash.git#egg=pulp-smash
# pin pydocstyle until https://gitlab.com/pycqa/flake8-docstrings/issues/36 is resolved
pydocstyle<4
pytest
ecdsa~=0.13.2
pyjwkest~=1.4.0

0 comments on commit c612b93

Please sign in to comment.