Skip to content
This repository has been archived by the owner on Dec 7, 2022. It is now read-only.

Commit

Permalink
Create a new Blob model.
Browse files Browse the repository at this point in the history
This commit introduces a new Unit type called Blob, and converts the
Docker v2 sync code to use it instead of overloading the Image model
that is also used by v1 sync and publish. This will simplify the
concepts in Pulp and make the v2 distributor easier to write.
Conveniently, it also separates the locations of the Units on the
filesystem as well.

https://pulp.plan.io/issues/967

re #967
  • Loading branch information
Randy Barlow committed Aug 6, 2015
1 parent e069fa4 commit 6de0ca7
Show file tree
Hide file tree
Showing 7 changed files with 238 additions and 138 deletions.
81 changes: 68 additions & 13 deletions common/pulp_docker/common/models.py
Expand Up @@ -7,23 +7,72 @@
from pulp_docker.common import constants


class Blob(object):
"""
This class is used to represent Docker v2 blobs.
"""
TYPE_ID = 'docker_blob'

def __init__(self, digest):
"""
Initialize the Blob.
:param image_id: This field will store the blob's digest.
:type image_id: basestring
"""
self.digest = digest

@property
def unit_key(self):
"""
Return the Blob's unit key.
:return: unit key
:rtype: dict
"""
return {
'digest': self.digest
}

@property
def metadata(self):
"""
A blob has no metadata, so return an empty dictionary.
:return: Empty dictionary
:rtype: dict
"""
return {}

@property
def relative_path(self):
"""
Return the Blob's relative path for filesystem storage.
:return: the relative path to where this Blob should live
:rtype: basestring
"""
return self.digest


class Image(object):
"""
This class is used to represent Docker v1 images and Docker v2 blobs.
This class is used to represent Docker v1 images.
"""
TYPE_ID = constants.IMAGE_TYPE_ID

def __init__(self, image_id, parent_id, size):
"""
:param image_id: For Docker v1 images, this field will store the image_id. For Docker v2
blobs, this field will store the blob's digest.
:type image_id: basestring
:param parent_id: parent's unique image ID
:type parent_id: basestring
:param size: size of the image in bytes, as reported by docker.
This can be None, because some very old docker images
do not contain it in their metadata.
:type size: int or NoneType
Initialize the Image.
:param image_id: The Image's id.
:type image_id: basestring
:param parent_id: parent's unique image ID
:type parent_id: basestring
:param size: size of the image in bytes, as reported by docker.
This can be None, because some very old docker images
do not contain it in their metadata.
:type size: int or NoneType
"""
self.image_id = image_id
self.parent_id = parent_id
Expand All @@ -32,6 +81,8 @@ def __init__(self, image_id, parent_id, size):
@property
def unit_key(self):
"""
Return the Image's unit key.
:return: unit key
:rtype: dict
"""
Expand All @@ -42,6 +93,8 @@ def unit_key(self):
@property
def relative_path(self):
"""
Return the Image's relative path for filesystem storage.
:return: the relative path to where this image's directory should live
:rtype: basestring
"""
Expand All @@ -50,6 +103,8 @@ def relative_path(self):
@property
def unit_metadata(self):
"""
Return the Image's Metadata.
:return: a subset of the complete docker metadata about this image,
including only what pulp_docker cares about
:rtype: dict
Expand Down Expand Up @@ -86,9 +141,9 @@ def __init__(self, digest, name, tag, architecture, fs_layers, history, schema_v
:param architecture: The host architecture on which the image is intended to run
:type architecture: basestring
:param fs_layers: A list of dictionaries. Each dictionary contains one key-value pair
that represents a layer of the image. The key is blobSum, and the
value is the digest of the referenced layer. See the documentation
referenced in the class docblock for more information.
that represents a layer (a Blob) of the image. The key is blobSum,
and the value is the digest of the referenced layer. See the
documentation referenced in the class docblock for more information.
:type fs_layers: list
:param history: This is a list of unstructured historical data for v1 compatibility.
Each member is a dictionary with a "v1Compatibility" key that indexes
Expand Down
51 changes: 51 additions & 0 deletions common/test/unit/test_models.py
Expand Up @@ -34,6 +34,57 @@ def test_metadata(self):
self.assertEqual(metadata.get('size'), 1024)


class TestBlob(unittest.TestCase):
"""
This class contains tests for the Blob class.
"""
def test___init__(self):
"""
Assert correct behavior from the __init__() method.
"""
digest = 'sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef'

blob = models.Blob(digest)

self.assertEqual(blob.digest, digest)

def test_type_id(self):
"""
Assert that the TYPE_ID attribute is correct.
"""
self.assertEqual(models.Blob.TYPE_ID, 'docker_blob')

def test_unit_key(self):
"""
Assert correct behavior from the unit_key() method.
"""
digest = 'sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef'

blob = models.Blob(digest)

self.assertEqual(blob.unit_key, {'digest': digest})

def test_metadata(self):
"""
Assert correct behavior from the metadata() method.
"""
digest = 'sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef'

blob = models.Blob(digest)

self.assertEqual(blob.metadata, {})

def test_relative_path(self):
"""
Assert correct behavior from the relative_path() method.
"""
digest = 'sha256:5f70bf18a086007016e948b04aed3b82103a36bea41755b6cddfaf10ace3c6ef'

blob = models.Blob(digest)

self.assertEqual(blob.relative_path, digest)


class TestManifest(unittest.TestCase):
"""
This class contains tests for the Manifest class.
Expand Down
44 changes: 16 additions & 28 deletions plugins/pulp_docker/plugins/importers/sync.py
@@ -1,13 +1,10 @@
"""
This module contains the primary sync entry point. Most of the code in this module is for syncing
Docker v2 registries, but if the feed_url is determined not to be a v2 registry this module will
call the SyncStep found in pulp_docker.plugins.importers.v1_sync instead.
This module contains the primary sync entry point for Docker v2 registries.
"""
from gettext import gettext as _
import logging
import os
import shutil
import stat

from pulp.common.plugins import importer_constants
from pulp.plugins.util import nectar_config
Expand All @@ -23,9 +20,7 @@

class SyncStep(PluginStep):
"""
This PluginStep is the primary entry point into a repository sync against a Docker registry. It
will work for either v1 or v2 registries, though if the registry is determined to be a v1
registry it will simply create the old v1 SyncStep as its only child step.
This PluginStep is the primary entry point into a repository sync against a Docker v2 registry.
"""
# The sync will fail if these settings are not provided in the config
required_settings = (constants.CONFIG_KEY_UPSTREAM_NAME, importer_constants.KEY_FEED)
Expand All @@ -37,7 +32,7 @@ def __init__(self, repo=None, conduit=None, config=None,
required keys are present. It then constructs some needed items (such as a download config),
and determines whether the feed URL is a Docker v2 registry or not. If it is, it
instantiates child tasks that are appropriate for syncing a v2 registry, and if it is not it
instantiates the old v1 SyncStep as its only child step.
raises a NotImplementedError.
:param repo: repository to sync
:type repo: pulp.plugins.model.Repository
Expand All @@ -60,7 +55,7 @@ def __init__(self, repo=None, conduit=None, config=None,
upstream_name = config.get(constants.CONFIG_KEY_UPSTREAM_NAME)
url = config.get(importer_constants.KEY_FEED)
# The GetMetadataStep will set this to a list of dictionaries of the form
# {'image_id': digest}.
# {'digest': digest}.
self.available_units = []

# Create a Repository object to interact with.
Expand All @@ -74,8 +69,8 @@ def __init__(self, repo=None, conduit=None, config=None,
working_dir=working_dir)
self.add_child(self.step_get_metadata)
# save this step so its "units_to_download" attribute can be accessed later
self.step_get_local_units = GetLocalImagesStep(
constants.IMPORTER_TYPE_ID, constants.IMAGE_TYPE_ID, ['image_id'], self.working_dir)
self.step_get_local_units = GetLocalBlobsStep(
constants.IMPORTER_TYPE_ID, models.Blob.TYPE_ID, ['digest'], self.working_dir)
self.add_child(self.step_get_local_units)
self.add_child(
DownloadStep(
Expand All @@ -94,8 +89,8 @@ def generate_download_requests(self):
:rtype: types.GeneratorType
"""
for unit_key in self.step_get_local_units.units_to_download:
image_id = unit_key['image_id']
yield self.index_repository.create_blob_download_request(image_id,
digest = unit_key['digest']
yield self.index_repository.create_blob_download_request(digest,
self.get_working_dir())

def sync(self):
Expand Down Expand Up @@ -207,11 +202,11 @@ def process_main(self):
available_blobs.add(layer['blobSum'])

# Update the available units with the blobs we learned about
available_blobs = [{'image_id': d} for d in available_blobs]
available_blobs = [{'digest': d} for d in available_blobs]
self.parent.parent.available_units.extend(available_blobs)


class GetLocalImagesStep(GetLocalUnitsStep):
class GetLocalBlobsStep(GetLocalUnitsStep):
def _dict_to_unit(self, unit_dict):
"""
convert a unit dictionary (a flat dict that has all unit key, metadata,
Expand All @@ -231,8 +226,7 @@ def _dict_to_unit(self, unit_dict):
:return: a unit instance
:rtype: pulp.plugins.model.Unit
"""
model = models.Image(unit_dict['image_id'], unit_dict.get('parent_id'),
unit_dict.get('size'))
model = models.Blob(unit_dict['digest'])
return self.get_conduit().init_unit(model.TYPE_ID, model.unit_key, {},
model.relative_path)

Expand Down Expand Up @@ -295,15 +289,13 @@ def process_main(self):
_logger.debug('saving manifest %s' % model.digest)
self.get_conduit().save_unit(unit)

# Save the Images
# Save the Blobs
for unit_key in self.parent.step_get_local_units.units_to_download:
image_id = unit_key['image_id']
size = os.stat(os.path.join(self.working_dir, unit_key['image_id']))[stat.ST_SIZE]
model = models.Image(image_id, None, size)
unit = self.get_conduit().init_unit(model.TYPE_ID, model.unit_key, model.unit_metadata,
model = models.Blob(unit_key['digest'])
unit = self.get_conduit().init_unit(model.TYPE_ID, model.unit_key, model.metadata,
model.relative_path)
self._move_file(unit)
_logger.debug('saving Image %s' % image_id)
_logger.debug('saving Blob %s' % unit_key)
self.get_conduit().save_unit(unit)

def _move_file(self, unit):
Expand All @@ -314,9 +306,5 @@ def _move_file(self, unit):
:param unit: a pulp unit
:type unit: pulp.plugins.model.Unit
"""
if unit.type_id == models.Image.TYPE_ID:
filename = unit.unit_key['image_id']
elif unit.type_id == models.Manifest.TYPE_ID:
filename = unit.unit_key['digest']
_logger.debug('moving files in to place for Unit {}'.format(unit))
shutil.move(os.path.join(self.working_dir, filename), unit.storage_path)
shutil.move(os.path.join(self.working_dir, unit.unit_key['digest']), unit.storage_path)
29 changes: 26 additions & 3 deletions plugins/pulp_docker/plugins/importers/v1_sync.py
Expand Up @@ -10,12 +10,12 @@

from pulp.common.plugins import importer_constants
from pulp.plugins.util import nectar_config
from pulp.plugins.util.publish_step import PluginStep, DownloadStep
from pulp.plugins.util.publish_step import DownloadStep, GetLocalUnitsStep, PluginStep
from pulp.server.exceptions import MissingValue

from pulp_docker.common import constants, models
from pulp_docker.plugins import registry
from pulp_docker.plugins.importers import sync, tags
from pulp_docker.plugins.importers import tags


_logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -62,7 +62,7 @@ def __init__(self, repo=None, conduit=None, config=None,

self.add_child(GetMetadataStep(working_dir=working_dir))
# save this step so its "units_to_download" attribute can be accessed later
self.step_get_local_units = sync.GetLocalImagesStep(
self.step_get_local_units = GetLocalImagesStep(
constants.IMPORTER_TYPE_ID, constants.IMAGE_TYPE_ID, ['image_id'], working_dir)
self.add_child(self.step_get_local_units)
self.add_child(DownloadStep(constants.SYNC_STEP_DOWNLOAD,
Expand Down Expand Up @@ -219,6 +219,29 @@ def find_and_read_ancestry_file(image_id, parent_dir):
return json.load(ancestry_file)


class GetLocalImagesStep(GetLocalUnitsStep):
def _dict_to_unit(self, unit_dict):
"""
convert a unit dictionary (a flat dict that has all unit key, metadata,
etc. keys at the root level) into a Unit object. This requires knowing
not just what fields are part of the unit key, but also how to derive
the storage path.
Any keys in the "metadata" dict on the returned unit will overwrite the
corresponding values that are currently saved in the unit's metadata. In
this case, we pass an empty dict, because we don't want to make changes.
:param unit_dict: a flat dictionary that has all unit key, metadata,
etc. keys at the root level, representing a unit
in pulp
:type unit_dict: dict
:return: a unit instance
:rtype: pulp.plugins.model.Unit
"""
model = models.Image(unit_dict['image_id'], unit_dict.get('parent_id'),
unit_dict.get('size'))
return self.get_conduit().init_unit(model.TYPE_ID, model.unit_key, {},
model.relative_path)


class SaveUnits(PluginStep):
def __init__(self, working_dir):
"""
Expand Down

0 comments on commit 6de0ca7

Please sign in to comment.