diff --git a/CHANGES/5635.feature b/CHANGES/5635.feature new file mode 100644 index 00000000..5d35c237 --- /dev/null +++ b/CHANGES/5635.feature @@ -0,0 +1 @@ +Added v2s2 to v2s1 converter. diff --git a/pulp_docker/docker_convert.py b/pulp_docker/docker_convert.py new file mode 100644 index 00000000..272e6ccd --- /dev/null +++ b/pulp_docker/docker_convert.py @@ -0,0 +1,274 @@ +#!/usr/bin/env python + +import argparse +import base64 +import binascii +import datetime +import ecdsa +import hashlib +import itertools +import json +import logging +import sys +from collections import namedtuple +from jwkest import jws, jwk, ecc + +log = logging.getLogger(__name__) + +FS_Layer = namedtuple("FS_Layer", "layer_id uncompressed_digest history") + + +def main(): + """ + Command line entry point for validation purposes. + """ + logging.basicConfig(level=logging.ERROR, format='%(asctime)s %(levelname)s %(message)s') + parser = argparse.ArgumentParser() + parser.add_argument('--manifest', help='v2s1 manifest', required=True, type=argparse.FileType()) + parser.add_argument('--config-layer', help='Config layer', type=argparse.FileType()) + parser.add_argument('--namespace', help='Namespace', default='myself') + parser.add_argument('--repository', help='Image name (repository)', default='dummy') + parser.add_argument('--tag', help='Tag', default='latest') + + parser.add_argument('-v', '--verbose', action='count', default=0, help='Increase verbosity') + + args = parser.parse_args() + logLevel = logging.INFO + if args.verbose > 1: + logLevel = logging.DEBUG + log.setLevel(logLevel) + + converter = Converter_s2_to_s1( + json.load(args.manifest), json.load(args.config_layer), + namespace=args.namespace, repository=args.repository, + tag=args.tag) + manif_data = converter.convert() + print(manif_data) + + +class Converter_s2_to_s1: + """ + Convertor class from schema 2 to schema 1. + + Initialize it with a manifest and a config layer JSON documents, + and call convert() to obtain the signed manifest, as a JSON-encoded string. + """ + + EMPTY_LAYER = "sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4" + + def __init__(self, manifest, config_layer, namespace=None, repository=None, tag=None): + """ + Initializer needs a manifest and a config layer as JSON documents. + """ + self.namespace = namespace or "ignored" + self.repository = repository or "test" + self.tag = tag or "latest" + self.manifest = manifest + self.config_layer = config_layer + self.fs_layers = [] + self.history = [] + + def convert(self): + """ + Convert manifest from schema 2 to schema 1 + """ + if self.manifest.get("schemaVersion") == 1: + log.info("Manifest is already schema 1") + return _jsonDumps(self.manifest) + log.info("Converting manifest to schema 1") + name = "%s/%s" % (self.namespace, self.repository) + self.compute_layers() + manifest = dict(name=name, tag=self.tag, architecture=self.config_layer['architecture'], + schemaVersion=1, fsLayers=self.fs_layers, history=self.history) + key = jwk.ECKey().load_key(ecc.P256) + key.kid = getKeyId(key) + manifData = sign(manifest, key) + return manifData + + def compute_layers(self): + """ + Compute layers to be present in the converted image. + Empty (throwaway) layers will be created to store image metadata + """ + # Layers in v2s1 are in reverse order from v2s2 + fs_layers = self._compute_fs_layers() + self.fs_layers = [dict(blobSum=x[0]) for x in fs_layers] + # Compute v1 compatibility + parent = None + history_entries = self.history = [] + + fs_layers_count = len(fs_layers) + # Reverse list so we can compute parent/child properly + fs_layers.reverse() + for i, fs_layer in enumerate(fs_layers): + layer_id = self._compute_layer_id(fs_layer.layer_id, fs_layer.uncompressed_digest, i) + config = self._compute_v1_compatibility_config( + layer_id, fs_layer, last_layer=(i == fs_layers_count - 1)) + if parent is not None: + config['parent'] = parent + parent = layer_id + history_entries.append(dict(v1Compatibility=_jsonDumpsCompact(config))) + # Reverse again for proper order + history_entries.reverse() + + def _compute_fs_layers(self): + """Utility function to return a list of FS_Layer objects""" + layers = reversed(self.manifest['layers']) + config_layer_history = reversed(self.config_layer['history']) + diff_ids = reversed(self.config_layer['rootfs']['diff_ids']) + fs_layers = [] + curr_compressed_dig = next(layers)['digest'] + curr_uncompressed_dig = next(diff_ids) + for curr_hist in config_layer_history: + if curr_hist.get("empty_layer"): + layer_id = self.EMPTY_LAYER + uncompressed_dig = None + else: + layer_id = curr_compressed_dig + uncompressed_dig = curr_uncompressed_dig + try: + curr_compressed_dig = next(layers)['digest'] + curr_uncompressed_dig = next(diff_ids) + except StopIteration: + curr_compressed_dig = self.EMPTY_LAYER + curr_uncompressed_dig = None + fs_layers.append(FS_Layer(layer_id, uncompressed_dig, curr_hist)) + return fs_layers + + def _compute_v1_compatibility_config(self, layer_id, fs_layer, last_layer=False): + """Utility function to compute the v1 compatibility""" + if last_layer: + # The whole config layer becomes part of the v1compatibility + # (minus history and rootfs) + config = dict(self.config_layer) + config.pop("history", None) + config.pop("rootfs", None) + else: + config = dict(created=fs_layer.history['created'], + container_config=dict(Cmd=fs_layer.history['created_by'])) + if fs_layer.uncompressed_digest is None: + config['throwaway'] = True + config['id'] = layer_id + return config + + @classmethod + def _compute_layer_id(cls, compressed_dig, uncompressed_dig, layer_index): + """ + We need to make up an image ID for each layer. + We will digest: + * the compressed digest of the layer + * the uncompressed digest (if present; it will be missing for throw-away layers) + * the zero-padded integer of the layer number + The last one is added so we can get different image IDs for throw-away layers. + """ + dig = hashlib.sha256(compressed_dig.encode("ascii")) + if uncompressed_dig: + dig.update(uncompressed_dig.encode("ascii")) + layer_count = "%06d" % layer_index + dig.update(layer_count.encode("ascii")) + layer_id = dig.hexdigest() + return layer_id + + +def _jsonDumps(data): + return json.dumps(data, indent=3, sort_keys=True, separators=(',', ': ')) + + +def _jsonDumpsCompact(data): + return json.dumps(data, sort_keys=True, separators=(',', ':')) + + +def sign(data, key): + """ + Sign the JSON document with a elliptic curve key + """ + jdata = _jsonDumps(data) + now = datetime.datetime.utcnow().replace(microsecond=0).isoformat() + 'Z' + header = dict(alg="ES256", jwk=key.serialize()) + protected = dict(formatLength=len(jdata) - 2, + formatTail=jws.b64encode_item(jdata[-2:]), + time=now) + _jws = jws.JWS(jdata, **header) + protectedHeader, payload, signature = _jws.sign_compact([key], protected=protected).split(".") + signatures = [dict(header=header, signature=signature, protected=protectedHeader)] + jsig = _jsonDumps(dict(signatures=signatures))[1:-2] + arr = [jdata[:-2], ',', jsig, jdata[-2:]] + # Add the signature block at the end of the json string, keeping the + # formatting + jdata2 = ''.join(arr) + return jdata2 + + +def validate_signature(signed_mf): + """ + Validate the signature of a signed manifest + + A signed manifest is a JSON document with a signature attribute + as the last element. + """ + # In order to validate the signature, we need the exact original payload + # (the document without the signature). We cannot json.load the document + # and get rid of the signature, the payload would likely end up + # differently because of differences in field ordering and indentation. + # So we need to strip the signature using plain string manipulation, and + # add back a trailing } + + # strip the signature block + payload, sep, signatures = signed_mf.partition(' "signatures"') + # get rid of the trailing ,\n, and add \n} + jw_payload = payload[:-2] + '\n}' + # base64-encode and remove any trailing = + jw_payload = base64.urlsafe_b64encode(jw_payload.encode('ascii')).decode('ascii').rstrip("=") + # add payload as a json attribute, and then add the signatures back + complete_msg = payload + ' "payload": "{}",\n'.format(jw_payload) + sep + signatures + _jws = jws.JWS() + _jws.verify_json(complete_msg.encode('ascii')) + + +def getKeyId(key): + """ + DER-encode the key and represent it in the format XXXX:YYYY:... + """ + derRepr = toDer(key) + shaRepr = hashlib.sha256(derRepr).digest()[:30] + b32Repr = base64.b32encode(shaRepr).decode() + return ':'.join(byN(b32Repr, 4)) + + +def toDer(key): + """Return the DER-encoded representation of the key""" + point = b"\x00\x04" + number2string(key.x, key.curve.bytes) + \ + number2string(key.y, key.curve.bytes) + der = ecdsa.der + curveEncodedOid = der.encode_oid(1, 2, 840, 10045, 3, 1, 7) + return der.encode_sequence( + der.encode_sequence(ecdsa.keys.encoded_oid_ecPublicKey, curveEncodedOid), + der.encode_bitstring(point)) + + +def byN(strobj, N): + """ + Yield consecutive substrings of length N from string strobj + """ + it = iter(strobj) + while True: + substr = ''.join(itertools.islice(it, N)) + if not substr: + return + yield substr + + +def number2string(num, order): + """ + Hex-encode the number and return a zero-padded (to the left) to a total + length of 2*order + """ + # convert to hex + nhex = "%x" % num + # Zero-pad to the left so the length of the resulting unhexified string is order + nhex = nhex.rjust(2 * order, '0') + return binascii.unhexlify(nhex) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/pulp_docker/tests/functional/test_convert.py b/pulp_docker/tests/functional/test_convert.py new file mode 100644 index 00000000..6570f578 --- /dev/null +++ b/pulp_docker/tests/functional/test_convert.py @@ -0,0 +1,80 @@ +from pulp_docker import docker_convert + + +class Test: + """Converter_s2_to_s1 test class""" + + def test_convert(self): + """Test schema converter on a known manifest""" + cnv = docker_convert.Converter_s2_to_s1(MANIFEST, CONFIG_LAYER) + signed_mf = cnv.convert() + docker_convert.validate_signature(signed_mf) + + empty = dict(blobSum=cnv.EMPTY_LAYER) + assert [dict(blobSum="sha256:layer1"), empty, empty, empty, + dict(blobSum="sha256:base")] == cnv.fs_layers + + def test_compute_layers(self): + """Test that computing the layers produces the expected data""" + cnv = docker_convert.Converter_s2_to_s1(MANIFEST, CONFIG_LAYER) + cnv.compute_layers() + empty = dict(blobSum=cnv.EMPTY_LAYER) + assert [dict(blobSum="sha256:layer1"), empty, empty, empty, + dict(blobSum="sha256:base")] == cnv.fs_layers + assert [ + {'v1Compatibility': '{"architecture":"amd64","author":"Mihai Ibanescu ","config":{"Cmd":["/bin/bash"],"Hostname":"decafbad"},"container_config":{"Hostname":"decafbad","Tty":false},"created":"2019-09-05T21:28:52.173079282Z","docker_version":"1.13.1","id":"d7b329ed9d186ff20c25399e848116430ee9b6ae022cb9f3dc3406144ec3685d","parent":"6474547c15d178825c70a42efdc59a88c6e30d764d184b415f32484562803446"}'}, # noqa + {'v1Compatibility': '{"container_config":{"Cmd":"/bin/sh -c #(nop) MAINTAINER Mihai Ibanescu "},"created":"2019-09-05T21:28:43.305854958Z","id":"6474547c15d178825c70a42efdc59a88c6e30d764d184b415f32484562803446","parent":"5708420291e0a86d8dc08ec40b2c1b1799117c33fe85032b87227632f70c1018","throwaway":true}'}, # noqa + {'v1Compatibility': '{"container_config":{"Cmd":"/bin/sh -c #(nop) CMD [\\"/bin/bash\\"]"},"created":"2018-03-06T00:48:12.679169547Z","id":"5708420291e0a86d8dc08ec40b2c1b1799117c33fe85032b87227632f70c1018","parent":"9e9220abceaf86f2ad7820ae8124d01223d8ec022b9a6cb8c99a8ae1747137ea","throwaway":true}'}, # noqa + {'v1Compatibility': '{"container_config":{"Cmd":"/bin/sh -c #(nop) LABEL name=CentOS Base Image vendor=CentOS license=GPLv2 build-date=20180302"},"created":"2018-03-06T00:48:12.458578213Z","id":"9e9220abceaf86f2ad7820ae8124d01223d8ec022b9a6cb8c99a8ae1747137ea","parent":"cb48c1db9c0a1ede7c85c85351856fc3e40e750931295c8fac837c63b403586a","throwaway":true}'}, # noqa + {'v1Compatibility': '{"container_config":{"Cmd":"/bin/sh -c #(nop) ADD file:FILE_CHECKSUM in / "},"created":"2018-03-06T00:48:12.077095981Z","id":"cb48c1db9c0a1ede7c85c85351856fc3e40e750931295c8fac837c63b403586a"}'}, # noqa + ] == cnv.history + + +MANIFEST = dict(schemaVersion=2, layers=[ + dict(digest="sha256:base"), + dict(digest="sha256:layer1"), +]) + +CONFIG_LAYER = dict( + architecture="amd64", + author="Mihai Ibanescu ", + config=dict(Hostname="decafbad", Cmd=["/bin/bash"]), + container_config=dict(Hostname="decafbad", Tty=False), + created="2019-09-05T21:28:52.173079282Z", + docker_version="1.13.1", + history=[ + { + "created": "2018-03-06T00:48:12.077095981Z", + "created_by": "/bin/sh -c #(nop) ADD file:FILE_CHECKSUM in / " + }, + { + "created": "2018-03-06T00:48:12.458578213Z", + "created_by": "/bin/sh -c #(nop) LABEL name=CentOS Base Image vendor=CentOS " + "license=GPLv2 build-date=20180302", + "empty_layer": True + }, + { + "created": "2018-03-06T00:48:12.679169547Z", + "created_by": "/bin/sh -c #(nop) CMD [\"/bin/bash\"]", + "empty_layer": True + }, + { + "created": "2019-09-05T21:28:43.305854958Z", + "author": "Mihai Ibanescu ", + "created_by": "/bin/sh -c #(nop) MAINTAINER Mihai Ibanescu ", + "empty_layer": True + }, + { + "created": "2019-09-05T21:28:52.173079282Z", + "author": "Mihai Ibanescu ", + "created_by": "/bin/sh -c touch /usr/share/dummy.txt" + }, + ], + rootfs={ + "type": "layers", + "diff_ids": [ + "sha256:uncompressed_base", + "sha256:uncompressed_layer1" + ], + }, +) diff --git a/setup.py b/setup.py index c71e5dd9..27f7d4af 100644 --- a/setup.py +++ b/setup.py @@ -4,6 +4,8 @@ requirements = [ "pulpcore~=3.0rc7", + 'ecdsa~=0.13.2', + 'pyjwkest~=1.4.0', ] diff --git a/test_requirements.txt b/test_requirements.txt index b0f6f2a1..5ef61aa9 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -8,3 +8,5 @@ git+https://github.com/PulpQE/pulp-smash.git#egg=pulp-smash # pin pydocstyle until https://gitlab.com/pycqa/flake8-docstrings/issues/36 is resolved pydocstyle<4 pytest +ecdsa~=0.13.2 +pyjwkest~=1.4.0