v2s2 to v2s1 converter class

The conveter takes a v2s2 manifest and a config layer, and produces a signed v2s1 manifest. closes #5635 https://pulp.plan.io/issues/4244
pulp · Oct 29, 2019 · c612b93 · c612b93
1 parent 17c9f15
commit c612b93
Show file tree

Hide file tree

Showing 5 changed files with 359 additions and 0 deletions.
diff --git a/CHANGES/5635.feature b/CHANGES/5635.feature
@@ -0,0 +1 @@
+Added v2s2 to v2s1 converter.
diff --git a/pulp_docker/docker_convert.py b/pulp_docker/docker_convert.py
@@ -0,0 +1,274 @@
+#!/usr/bin/env python
+
+import argparse
+import base64
+import binascii
+import datetime
+import ecdsa
+import hashlib
+import itertools
+import json
+import logging
+import sys
+from collections import namedtuple
+from jwkest import jws, jwk, ecc
+
+log = logging.getLogger(__name__)
+
+FS_Layer = namedtuple("FS_Layer", "layer_id uncompressed_digest history")
+
+
+def main():
+    """
+    Command line entry point for validation purposes.
+    """
+    logging.basicConfig(level=logging.ERROR, format='%(asctime)s %(levelname)s %(message)s')
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--manifest', help='v2s1 manifest', required=True, type=argparse.FileType())
+    parser.add_argument('--config-layer', help='Config layer', type=argparse.FileType())
+    parser.add_argument('--namespace', help='Namespace', default='myself')
+    parser.add_argument('--repository', help='Image name (repository)', default='dummy')
+    parser.add_argument('--tag', help='Tag', default='latest')
+
+    parser.add_argument('-v', '--verbose', action='count', default=0, help='Increase verbosity')
+
+    args = parser.parse_args()
+    logLevel = logging.INFO
+    if args.verbose > 1:
+        logLevel = logging.DEBUG
+    log.setLevel(logLevel)
+
+    converter = Converter_s2_to_s1(
+        json.load(args.manifest), json.load(args.config_layer),
+        namespace=args.namespace, repository=args.repository,
+        tag=args.tag)
+    manif_data = converter.convert()
+    print(manif_data)
+
+
+class Converter_s2_to_s1:
+    """
+    Convertor class from schema 2 to schema 1.
+
+    Initialize it with a manifest and a config layer JSON documents,
+    and call convert() to obtain the signed manifest, as a JSON-encoded string.
+    """
+
+    EMPTY_LAYER = "sha256:a3ed95caeb02ffe68cdd9fd84406680ae93d633cb16422d00e8a7c22955b46d4"
+
+    def __init__(self, manifest, config_layer, namespace=None, repository=None, tag=None):
+        """
+        Initializer needs a manifest and a config layer as JSON documents.
+        """
+        self.namespace = namespace or "ignored"
+        self.repository = repository or "test"
+        self.tag = tag or "latest"
+        self.manifest = manifest
+        self.config_layer = config_layer
+        self.fs_layers = []
+        self.history = []
+
+    def convert(self):
+        """
+        Convert manifest from schema 2 to schema 1
+        """
+        if self.manifest.get("schemaVersion") == 1:
+            log.info("Manifest is already schema 1")
+            return _jsonDumps(self.manifest)
+        log.info("Converting manifest to schema 1")
+        name = "%s/%s" % (self.namespace, self.repository)
+        self.compute_layers()
+        manifest = dict(name=name, tag=self.tag, architecture=self.config_layer['architecture'],
+                        schemaVersion=1, fsLayers=self.fs_layers, history=self.history)
+        key = jwk.ECKey().load_key(ecc.P256)
+        key.kid = getKeyId(key)
+        manifData = sign(manifest, key)
+        return manifData
+
+    def compute_layers(self):
+        """
+        Compute layers to be present in the converted image.
+        Empty (throwaway) layers will be created to store image metadata
+        """
+        # Layers in v2s1 are in reverse order from v2s2
+        fs_layers = self._compute_fs_layers()
+        self.fs_layers = [dict(blobSum=x[0]) for x in fs_layers]
+        # Compute v1 compatibility
+        parent = None
+        history_entries = self.history = []
+
+        fs_layers_count = len(fs_layers)
+        # Reverse list so we can compute parent/child properly
+        fs_layers.reverse()
+        for i, fs_layer in enumerate(fs_layers):
+            layer_id = self._compute_layer_id(fs_layer.layer_id, fs_layer.uncompressed_digest, i)
+            config = self._compute_v1_compatibility_config(
+                layer_id, fs_layer, last_layer=(i == fs_layers_count - 1))
+            if parent is not None:
+                config['parent'] = parent
+            parent = layer_id
+            history_entries.append(dict(v1Compatibility=_jsonDumpsCompact(config)))
+        # Reverse again for proper order
+        history_entries.reverse()
+
+    def _compute_fs_layers(self):
+        """Utility function to return a list of FS_Layer objects"""
+        layers = reversed(self.manifest['layers'])
+        config_layer_history = reversed(self.config_layer['history'])
+        diff_ids = reversed(self.config_layer['rootfs']['diff_ids'])
+        fs_layers = []
+        curr_compressed_dig = next(layers)['digest']
+        curr_uncompressed_dig = next(diff_ids)
+        for curr_hist in config_layer_history:
+            if curr_hist.get("empty_layer"):
+                layer_id = self.EMPTY_LAYER
+                uncompressed_dig = None
+            else:
+                layer_id = curr_compressed_dig
+                uncompressed_dig = curr_uncompressed_dig
+                try:
+                    curr_compressed_dig = next(layers)['digest']
+                    curr_uncompressed_dig = next(diff_ids)
+                except StopIteration:
+                    curr_compressed_dig = self.EMPTY_LAYER
+                    curr_uncompressed_dig = None
+            fs_layers.append(FS_Layer(layer_id, uncompressed_dig, curr_hist))
+        return fs_layers
+
+    def _compute_v1_compatibility_config(self, layer_id, fs_layer, last_layer=False):
+        """Utility function to compute the v1 compatibility"""
+        if last_layer:
+            # The whole config layer becomes part of the v1compatibility
+            # (minus history and rootfs)
+            config = dict(self.config_layer)
+            config.pop("history", None)
+            config.pop("rootfs", None)
+        else:
+            config = dict(created=fs_layer.history['created'],
+                          container_config=dict(Cmd=fs_layer.history['created_by']))
+        if fs_layer.uncompressed_digest is None:
+            config['throwaway'] = True
+        config['id'] = layer_id
+        return config
+
+    @classmethod
+    def _compute_layer_id(cls, compressed_dig, uncompressed_dig, layer_index):
+        """
+        We need to make up an image ID for each layer.
+        We will digest:
+        * the compressed digest of the layer
+        * the uncompressed digest (if present; it will be missing for throw-away layers)
+        * the zero-padded integer of the layer number
+        The last one is added so we can get different image IDs for throw-away layers.
+        """
+        dig = hashlib.sha256(compressed_dig.encode("ascii"))
+        if uncompressed_dig:
+            dig.update(uncompressed_dig.encode("ascii"))
+        layer_count = "%06d" % layer_index
+        dig.update(layer_count.encode("ascii"))
+        layer_id = dig.hexdigest()
+        return layer_id
+
+
+def _jsonDumps(data):
+    return json.dumps(data, indent=3, sort_keys=True, separators=(',', ': '))
+
+
+def _jsonDumpsCompact(data):
+    return json.dumps(data, sort_keys=True, separators=(',', ':'))
+
+
+def sign(data, key):
+    """
+    Sign the JSON document with a elliptic curve key
+    """
+    jdata = _jsonDumps(data)
+    now = datetime.datetime.utcnow().replace(microsecond=0).isoformat() + 'Z'
+    header = dict(alg="ES256", jwk=key.serialize())
+    protected = dict(formatLength=len(jdata) - 2,
+                     formatTail=jws.b64encode_item(jdata[-2:]),
+                     time=now)
+    _jws = jws.JWS(jdata, **header)
+    protectedHeader, payload, signature = _jws.sign_compact([key], protected=protected).split(".")
+    signatures = [dict(header=header, signature=signature, protected=protectedHeader)]
+    jsig = _jsonDumps(dict(signatures=signatures))[1:-2]
+    arr = [jdata[:-2], ',', jsig, jdata[-2:]]
+    # Add the signature block at the end of the json string, keeping the
+    # formatting
+    jdata2 = ''.join(arr)
+    return jdata2
+
+
+def validate_signature(signed_mf):
+    """
+    Validate the signature of a signed manifest
+
+    A signed manifest is a JSON document with a signature attribute
+    as the last element.
+    """
+    # In order to validate the signature, we need the exact original payload
+    # (the document without the signature). We cannot json.load the document
+    # and get rid of the signature, the payload would likely end up
+    # differently because of differences in field ordering and indentation.
+    # So we need to strip the signature using plain string manipulation, and
+    # add back a trailing }
+
+    # strip the signature block
+    payload, sep, signatures = signed_mf.partition('   "signatures"')
+    # get rid of the trailing ,\n, and add \n}
+    jw_payload = payload[:-2] + '\n}'
+    # base64-encode and remove any trailing =
+    jw_payload = base64.urlsafe_b64encode(jw_payload.encode('ascii')).decode('ascii').rstrip("=")
+    # add payload as a json attribute, and then add the signatures back
+    complete_msg = payload + '   "payload": "{}",\n'.format(jw_payload) + sep + signatures
+    _jws = jws.JWS()
+    _jws.verify_json(complete_msg.encode('ascii'))
+
+
+def getKeyId(key):
+    """
+    DER-encode the key and represent it in the format XXXX:YYYY:...
+    """
+    derRepr = toDer(key)
+    shaRepr = hashlib.sha256(derRepr).digest()[:30]
+    b32Repr = base64.b32encode(shaRepr).decode()
+    return ':'.join(byN(b32Repr, 4))
+
+
+def toDer(key):
+    """Return the DER-encoded representation of the key"""
+    point = b"\x00\x04" + number2string(key.x, key.curve.bytes) + \
+        number2string(key.y, key.curve.bytes)
+    der = ecdsa.der
+    curveEncodedOid = der.encode_oid(1, 2, 840, 10045, 3, 1, 7)
+    return der.encode_sequence(
+        der.encode_sequence(ecdsa.keys.encoded_oid_ecPublicKey, curveEncodedOid),
+        der.encode_bitstring(point))
+
+
+def byN(strobj, N):
+    """
+    Yield consecutive substrings of length N from string strobj
+    """
+    it = iter(strobj)
+    while True:
+        substr = ''.join(itertools.islice(it, N))
+        if not substr:
+            return
+        yield substr
+
+
+def number2string(num, order):
+    """
+    Hex-encode the number and return a zero-padded (to the left) to a total
+    length of 2*order
+    """
+    # convert to hex
+    nhex = "%x" % num
+    # Zero-pad to the left so the length of the resulting unhexified string is order
+    nhex = nhex.rjust(2 * order, '0')
+    return binascii.unhexlify(nhex)
+
+
+if __name__ == '__main__':
+    sys.exit(main())
diff --git a/pulp_docker/tests/functional/test_convert.py b/pulp_docker/tests/functional/test_convert.py
@@ -0,0 +1,80 @@
+from pulp_docker import docker_convert
+
+
+class Test:
+    """Converter_s2_to_s1 test class"""
+
+    def test_convert(self):
+        """Test schema converter on a known manifest"""
+        cnv = docker_convert.Converter_s2_to_s1(MANIFEST, CONFIG_LAYER)
+        signed_mf = cnv.convert()
+        docker_convert.validate_signature(signed_mf)
+
+        empty = dict(blobSum=cnv.EMPTY_LAYER)
+        assert [dict(blobSum="sha256:layer1"), empty, empty, empty,
+                dict(blobSum="sha256:base")] == cnv.fs_layers
+
+    def test_compute_layers(self):
+        """Test that computing the layers produces the expected data"""
+        cnv = docker_convert.Converter_s2_to_s1(MANIFEST, CONFIG_LAYER)
+        cnv.compute_layers()
+        empty = dict(blobSum=cnv.EMPTY_LAYER)
+        assert [dict(blobSum="sha256:layer1"), empty, empty, empty,
+                dict(blobSum="sha256:base")] == cnv.fs_layers
+        assert [
+            {'v1Compatibility': '{"architecture":"amd64","author":"Mihai Ibanescu <mihai.ibanescu@gmail.com>","config":{"Cmd":["/bin/bash"],"Hostname":"decafbad"},"container_config":{"Hostname":"decafbad","Tty":false},"created":"2019-09-05T21:28:52.173079282Z","docker_version":"1.13.1","id":"d7b329ed9d186ff20c25399e848116430ee9b6ae022cb9f3dc3406144ec3685d","parent":"6474547c15d178825c70a42efdc59a88c6e30d764d184b415f32484562803446"}'},  # noqa
+            {'v1Compatibility': '{"container_config":{"Cmd":"/bin/sh -c #(nop)  MAINTAINER Mihai Ibanescu <mihai.ibanescu@gmail.com>"},"created":"2019-09-05T21:28:43.305854958Z","id":"6474547c15d178825c70a42efdc59a88c6e30d764d184b415f32484562803446","parent":"5708420291e0a86d8dc08ec40b2c1b1799117c33fe85032b87227632f70c1018","throwaway":true}'},  # noqa
+            {'v1Compatibility': '{"container_config":{"Cmd":"/bin/sh -c #(nop)  CMD [\\"/bin/bash\\"]"},"created":"2018-03-06T00:48:12.679169547Z","id":"5708420291e0a86d8dc08ec40b2c1b1799117c33fe85032b87227632f70c1018","parent":"9e9220abceaf86f2ad7820ae8124d01223d8ec022b9a6cb8c99a8ae1747137ea","throwaway":true}'},  # noqa
+            {'v1Compatibility': '{"container_config":{"Cmd":"/bin/sh -c #(nop)  LABEL name=CentOS Base Image vendor=CentOS license=GPLv2 build-date=20180302"},"created":"2018-03-06T00:48:12.458578213Z","id":"9e9220abceaf86f2ad7820ae8124d01223d8ec022b9a6cb8c99a8ae1747137ea","parent":"cb48c1db9c0a1ede7c85c85351856fc3e40e750931295c8fac837c63b403586a","throwaway":true}'},  # noqa
+            {'v1Compatibility': '{"container_config":{"Cmd":"/bin/sh -c #(nop) ADD file:FILE_CHECKSUM in / "},"created":"2018-03-06T00:48:12.077095981Z","id":"cb48c1db9c0a1ede7c85c85351856fc3e40e750931295c8fac837c63b403586a"}'},  # noqa
+        ] == cnv.history
+
+
+MANIFEST = dict(schemaVersion=2, layers=[
+    dict(digest="sha256:base"),
+    dict(digest="sha256:layer1"),
+])
+
+CONFIG_LAYER = dict(
+    architecture="amd64",
+    author="Mihai Ibanescu <mihai.ibanescu@gmail.com>",
+    config=dict(Hostname="decafbad", Cmd=["/bin/bash"]),
+    container_config=dict(Hostname="decafbad", Tty=False),
+    created="2019-09-05T21:28:52.173079282Z",
+    docker_version="1.13.1",
+    history=[
+        {
+            "created": "2018-03-06T00:48:12.077095981Z",
+            "created_by": "/bin/sh -c #(nop) ADD file:FILE_CHECKSUM in / "
+        },
+        {
+            "created": "2018-03-06T00:48:12.458578213Z",
+            "created_by": "/bin/sh -c #(nop)  LABEL name=CentOS Base Image vendor=CentOS "
+            "license=GPLv2 build-date=20180302",
+            "empty_layer": True
+        },
+        {
+            "created": "2018-03-06T00:48:12.679169547Z",
+            "created_by": "/bin/sh -c #(nop)  CMD [\"/bin/bash\"]",
+            "empty_layer": True
+        },
+        {
+            "created": "2019-09-05T21:28:43.305854958Z",
+            "author": "Mihai Ibanescu <mihai.ibanescu@gmail.com>",
+            "created_by": "/bin/sh -c #(nop)  MAINTAINER Mihai Ibanescu <mihai.ibanescu@gmail.com>",
+            "empty_layer": True
+        },
+        {
+            "created": "2019-09-05T21:28:52.173079282Z",
+            "author": "Mihai Ibanescu <mihai.ibanescu@gmail.com>",
+            "created_by": "/bin/sh -c touch /usr/share/dummy.txt"
+        },
+    ],
+    rootfs={
+        "type": "layers",
+        "diff_ids": [
+            "sha256:uncompressed_base",
+            "sha256:uncompressed_layer1"
+        ],
+    },
+)
diff --git a/setup.py b/setup.py
@@ -4,6 +4,8 @@
 
 requirements = [
     "pulpcore~=3.0rc7",
+    'ecdsa~=0.13.2',
+    'pyjwkest~=1.4.0',
 ]
 
 

diff --git a/test_requirements.txt b/test_requirements.txt
@@ -8,3 +8,5 @@ git+https://github.com/PulpQE/pulp-smash.git#egg=pulp-smash
 # pin pydocstyle until https://gitlab.com/pycqa/flake8-docstrings/issues/36 is resolved
 pydocstyle<4
 pytest
+ecdsa~=0.13.2
+pyjwkest~=1.4.0