diff --git a/CHANGES/5319.feature b/CHANGES/5319.feature new file mode 100644 index 00000000..ac341667 --- /dev/null +++ b/CHANGES/5319.feature @@ -0,0 +1 @@ +Migration plan resources are validated against MongoDB (i.e. that they exist). diff --git a/CHANGES/5450.feature b/CHANGES/5450.feature new file mode 100644 index 00000000..82657da7 --- /dev/null +++ b/CHANGES/5450.feature @@ -0,0 +1 @@ +Migration plans are respected. diff --git a/pulp_2to3_migration/app/json_schema.py b/pulp_2to3_migration/app/json_schema.py index a968c247..0881153d 100644 --- a/pulp_2to3_migration/app/json_schema.py +++ b/pulp_2to3_migration/app/json_schema.py @@ -23,7 +23,7 @@ "name": { "type": "string" }, - "pulp2_repository_id": { + "pulp2_importer_repository_id": { "type": "string" }, "repository_versions": { @@ -51,22 +51,9 @@ } } }, - "required": ["name", "repository_versions"], + "required": ["name", "pulp2_importer_repository_id", "repository_versions"], "additionalProperties": false, - "$comment": "pulp2_repository_id field should be specified so we know which importer to use when migrating multiple pulp2repos into repo versions", - "if": { - "properties": { - "repository_versions": { - "type": "array", - "minItems": 2 - } - } - }, - "then": { - "dependencies": { - "repository_versions": ["pulp2_repository_id"] - } - } + "$comment": "pulp2_importer_repository_id field should be specified so we know which importer to use when migrating multiple pulp2 repos into repo versions" } } }, diff --git a/pulp_2to3_migration/app/migration.py b/pulp_2to3_migration/app/migration.py index 4fa14644..b91c45d7 100644 --- a/pulp_2to3_migration/app/migration.py +++ b/pulp_2to3_migration/app/migration.py @@ -17,14 +17,15 @@ _logger = logging.getLogger(__name__) -async def migrate_content(plugins_to_migrate): +async def migrate_content(plan): """ A coroutine to initiate content migration for each plugin. Args: - plugins_to_migrate(list): List of plugins to migrate + plan (MigrationPlan): Migration Plan to use """ content_migration_coros = [] + plugins_to_migrate = plan.get_plugins() progress_data = dict(message='Migrating content to Pulp 3', code='migrating.content', total=0) with ProgressReport(**progress_data) as pb: @@ -45,7 +46,7 @@ async def migrate_content(plugins_to_migrate): pb.done = pb.total -async def migrate_repositories(): +async def migrate_repositories(plan): """ A coroutine to migrate pre-migrated repositories. """ @@ -74,15 +75,17 @@ async def migrate_repositories(): pb.save() -async def migrate_importers(plugins_to_migrate): +async def migrate_importers(plan): """ A coroutine to migrate pre-migrated importers. Args: - plugins_to_migrate(list): A list of plugins which are being migrated. + plan (MigrationPlan): Migration Plan to use. """ # gather all needed plugin importer migrators importer_migrators = {} + plugins_to_migrate = plan.get_plugins() + for plugin, plugin_migrator in PLUGIN_MIGRATORS.items(): if plugin not in plugins_to_migrate: continue diff --git a/pulp_2to3_migration/app/models/base.py b/pulp_2to3_migration/app/models/base.py index 03c00b18..7af3270c 100644 --- a/pulp_2to3_migration/app/models/base.py +++ b/pulp_2to3_migration/app/models/base.py @@ -2,6 +2,13 @@ from pulpcore.plugin.models import Model +from pulp_2to3_migration.pulp2 import connection +from pulp_2to3_migration.pulp2.base import ( + Distributor, + Importer, + Repository, +) + class MigrationPlan(Model): """ @@ -11,24 +18,145 @@ class MigrationPlan(Model): plan (models.JSONField): The migration plan in the JSON format """ plan = JSONField() + _real_plan = None + + @property + def plan_view(self): + """ + Window to view the validated migration plan data through. + """ + if not self._real_plan: + self._real_plan = _InternalMigrationPlan(self.plan) + + return self._real_plan + + def get_plugins(self): + """ + Return a list of pulp2 plugins to migrate. + """ + return self.plan_view.plugins_to_migrate def get_repositories(self): """ Return a list of pulp2 repositories to migrate or empty list if all should be migrated. """ - # TODO: get pulp2 repositories from the self.plan - return [] + return self.plan_view.repositories_to_migrate + + def get_pulp3_repository_setup(self): + """ + Return a dict of pulp3 repositories to create and information about e.g. versions. + """ + return self.plan_view.repositories_to_create def get_importers(self): """ Return a list of pulp2 importers to migrate or empty list if all should be migrated. """ - # TODO: get pulp2 importers from the self.plan - return [] + return self.plan_view.importers_to_migrate def get_distributors(self): """ Return a list of pulp2 distributors to migrate or empty list if all should be migrated. """ - # TODO: get pulp2 distributors from the self.plan - return [] + return self.plan_view.distributors_to_migrate + + def get_missing_resources(self): + """ + Return a dict of any resources listed in the plan but missing from Pulp 2. + + Repositories and Importers are enumerated by repo_id, Distributors by distributor_id. + """ + ret = {} + if self.plan_view.missing_repositories: + ret['repositories'] = self.plan_view.missing_repositories + if self.plan_view.missing_importers: + ret['importers'] = self.plan_view.missing_importers + if self.plan_view.missing_distributors: + ret['distributors'] = self.plan_view.missing_distributors + return ret + + +class _InternalMigrationPlan: + def __init__(self, migration_plan): + self.migration_plan = migration_plan + + self.plugins_to_migrate = [] + self.importers_to_migrate = [] + self.distributors_to_migrate = [] + # pre-migration *just* needs these repos and nothing else + self.repositories_to_migrate = [] + + # a nested data structure with a format roughly matching the JSON schema. + # dictionary where the key is the name of the pulp3 repo and the value is a dict + # of other information like repo_versions, importer to use, etc. + self.repositories_to_create = {} + + self.missing_importers = [] + self.missing_repositories = [] + self.missing_distributors = [] + + # Make sure we've initialized the MongoDB connection first + connection.initialize() + self._populate() + self._check_missing() + + def _populate(self): + for plugin_data in self.migration_plan['plugins']: + self.plugins_to_migrate.append(plugin_data['type']) + if plugin_data.get('repositories'): + self._parse_repository_data(plugin_data.get('repositories')) + # TODO: do something with protection + + def _parse_repository_data(self, repository_data): + for repository in repository_data: + name = repository['name'] + + _find_importer_repo = repository['pulp2_importer_repository_id'] + self.importers_to_migrate.append(_find_importer_repo) + + repository_versions = self._parse_repository_version_data( + repository.get('repository_versions', []) + ) + + repository_data = { + "pulp2_importer_repository_id": _find_importer_repo, + "versions": repository_versions + # TODO: do something with protection + } + + self.repositories_to_create[name] = repository_data + + def _parse_repository_version_data(self, repository_version_data): + repository_versions = [] + + for repository_version in repository_version_data: + pulp2_repository_id = repository_version['pulp2_repository_id'] + self.repositories_to_migrate.append(pulp2_repository_id) + repository_versions.append(pulp2_repository_id) + + distributor_ids = repository_version.get('distributor_ids') + self.distributors_to_migrate.extend(distributor_ids) + + return repository_versions + + def _check_missing(self): + importers = Importer.objects( + repo_id__in=self.importers_to_migrate).only('repo_id') + present = set(importer.repo_id for importer in importers) + expected = set(self.importers_to_migrate) + + self.missing_importers = list(expected - present) + + repositories = Repository.objects( + repo_id__in=self.repositories_to_migrate).only('repo_id') + present = set(repository.repo_id for repository in repositories) + expected = set(self.repositories_to_migrate) + + self.missing_repositories = list(expected - present) + + distributors = Distributor.objects( + distributor_id__in=self.distributors_to_migrate).only('distributor_id') + present = set(distributor.distributor_id for distributor in distributors) + expected = set(self.distributors_to_migrate) + + self.missing_distributors = list(expected - present) diff --git a/pulp_2to3_migration/app/plugin/content.py b/pulp_2to3_migration/app/plugin/content.py index f3abebfc..7564a987 100644 --- a/pulp_2to3_migration/app/plugin/content.py +++ b/pulp_2to3_migration/app/plugin/content.py @@ -46,6 +46,7 @@ class DeclarativeContentMigration: :class:`~pulpcore.plugin.stages.DeclarativeContent` object for each Content unit that should be migrated to Pulp 3. """ + def __init__(self, first_stage): """Initializes DeclarativeContentMigration.""" self.first_stage = first_stage @@ -89,6 +90,7 @@ class ContentMigrationFirstStage(Stage): Creates hard links (or copies) for Pulp 2 content and creates DeclarativeContent for content being migrated. """ + def __init__(self, migrator): """ Args: diff --git a/pulp_2to3_migration/app/pre_migration.py b/pulp_2to3_migration/app/pre_migration.py index e056f76f..ee6072e0 100644 --- a/pulp_2to3_migration/app/pre_migration.py +++ b/pulp_2to3_migration/app/pre_migration.py @@ -35,13 +35,14 @@ ContentModel = namedtuple('ContentModel', ['pulp2', 'pulp_2to3_detail']) -async def pre_migrate_all_content(plugins_to_migrate): +async def pre_migrate_all_content(plan): """ Pre-migrate all content for the specified plugins. Args: - plugins_to_migrate(list): List of Pulp 2 plugin names to migrate content for + plan (MigrationPlan): Migration Plan to use for migration. """ + plugins_to_migrate = plan.get_plugins() pre_migrators = [] # get all the content models for the migrating plugins @@ -329,7 +330,7 @@ async def pre_migrate_distributor(repo, distributors): Pre-migrate a pulp 2 distributor. Args: - repo(Pulp2Repository): A pre-migrated pulp 2 repository which importer should be migrated + repo(Pulp2Repository): A pre-migrated pulp 2 repository which distributor should be migrated distributors(list): A list of distributors which are expected to be migrated. If empty, all are migrated. """ diff --git a/pulp_2to3_migration/app/serializers.py b/pulp_2to3_migration/app/serializers.py index 3034fabd..d56e369a 100644 --- a/pulp_2to3_migration/app/serializers.py +++ b/pulp_2to3_migration/app/serializers.py @@ -15,7 +15,7 @@ IdentityField ) -from .json_schema import SCHEMA +from pulp_2to3_migration.app.json_schema import SCHEMA from .models import MigrationPlan, Pulp2Content @@ -85,9 +85,16 @@ class MigrationPlanRunSerializer(serializers.Serializer): """ A serializer for running a migration plan. """ + validate = serializers.BooleanField( + help_text=_('If ``True``, migration cannot happen without successful validation ' + 'of the Migration Plan'), + required=False, + default=False, + write_only=True + ) dry_run = serializers.BooleanField( help_text=_('If ``True``, performs validation of a Migration Plan only, no migration is ' - 'run. If ``False``, both validation and migration are run.'), + 'run.'), required=False, default=False, write_only=True diff --git a/pulp_2to3_migration/app/tasks/migrate.py b/pulp_2to3_migration/app/tasks/migrate.py index 6a69f299..40eff145 100644 --- a/pulp_2to3_migration/app/tasks/migrate.py +++ b/pulp_2to3_migration/app/tasks/migrate.py @@ -12,13 +12,14 @@ migrate_repositories, ) from pulp_2to3_migration.app.models import MigrationPlan +from pulp_2to3_migration.exceptions import PlanValidationError from pulp_2to3_migration.pulp2 import connection _logger = logging.getLogger(__name__) -def migrate_from_pulp2(migration_plan_pk, dry_run=False): +def migrate_from_pulp2(migration_plan_pk, validate=False, dry_run=False): """ Main task to migrate from Pulp 2 to Pulp 3. @@ -26,27 +27,31 @@ def migrate_from_pulp2(migration_plan_pk, dry_run=False): Args: migration_plan_pk (str): The migration plan PK. + validate (bool): If True, don't migrate unless validation is successful. dry_run (bool): If True, nothing is migrated, only validation happens. """ + # MongoDB connection initialization + connection.initialize() + plan = MigrationPlan.objects.get(pk=migration_plan_pk) + missing_resources = plan.get_missing_resources() + + if (validate or dry_run) and missing_resources: + raise PlanValidationError( + "Validation failed: resources missing {}".format(missing_resources) + ) + if dry_run: - _logger.debug('Running in a dry-run mode.') - # TODO: Migration Plan validation return - # MongoDB connection initialization - connection.initialize() - - # TODO: Migration Plan parsing and validation - # For now, the list of plugins to migrate is hard-coded. - plugins_to_migrate = ['iso'] + # TODO: if plan is empty for a plugin, only migrate downloaded content loop = asyncio.get_event_loop() loop.run_until_complete(pre_migrate_all_without_content(plan)) - loop.run_until_complete(migrate_repositories()) - loop.run_until_complete(migrate_importers(plugins_to_migrate)) - loop.run_until_complete(pre_migrate_all_content(plugins_to_migrate)) - loop.run_until_complete(migrate_content(plugins_to_migrate)) # without RemoteArtifacts yet -# loop.run_until_complete(create_repo_versions()) + loop.run_until_complete(migrate_repositories(plan)) + loop.run_until_complete(migrate_importers(plan)) + loop.run_until_complete(pre_migrate_all_content(plan)) + loop.run_until_complete(migrate_content(plan)) # without RemoteArtifacts yet +# loop.run_until_complete(create_repo_versions(plan)) # loop.run_until_complete(migrate_distributors(plugins_to_migrate)) loop.close() diff --git a/pulp_2to3_migration/app/viewsets.py b/pulp_2to3_migration/app/viewsets.py index ad6e69e0..52f4c51c 100644 --- a/pulp_2to3_migration/app/viewsets.py +++ b/pulp_2to3_migration/app/viewsets.py @@ -53,12 +53,14 @@ def run(self, request, pk): context={'request': request} ) serializer.is_valid(raise_exception=True) + validate = serializer.validated_data.get('validate', False) dry_run = serializer.validated_data.get('dry_run', False) result = enqueue_with_reservation( migrate_from_pulp2, [PULP_2TO3_MIGRATION_RESOURCE], kwargs={ 'migration_plan_pk': migration_plan.pk, + 'validate': validate, 'dry_run': dry_run } ) diff --git a/pulp_2to3_migration/exceptions.py b/pulp_2to3_migration/exceptions.py index ae8b7f67..6e195f86 100644 --- a/pulp_2to3_migration/exceptions.py +++ b/pulp_2to3_migration/exceptions.py @@ -8,6 +8,7 @@ class ConfigurationError(PulpException): Exception that is raised when a necessary configuration parameters are not specified, of a wrong type, or conflicting. """ + def __init__(self, msg): """ :param msg: error message specifying what exactly is out of place @@ -18,3 +19,12 @@ def __init__(self, msg): def __str__(self): return self.msg + + +class PlanValidationError(Exception): + """ + Exception to be thrown when validating the MigrationPlan. + + e.g. Repository specified does not exist. + """ + pass diff --git a/pulp_2to3_migration/pulp2/connection.py b/pulp_2to3_migration/pulp2/connection.py index 5151fc18..79121b3a 100644 --- a/pulp_2to3_migration/pulp2/connection.py +++ b/pulp_2to3_migration/pulp2/connection.py @@ -53,12 +53,8 @@ def initialize(name=None, seeds=None, max_pool_size=None, replica_set=None, max_ # We do not allow a second call to initialize(), as mongoengine.connect() will cache the last # initialized connection for all calls. Thus, any process that attempts to call initialize() - # again might alter which database all further queries are made against. By raising this - # Exception, we can ensure that only one database connection is established per process which - # will help us to ensure that the connection does not get overridden later. + # again might alter which database all further queries are made against. if _CONNECTION or _DATABASE: - _logger.warn(_("The database is already initialized. It should not be called more than " - "once.")) return try: connection_kwargs = {}