From fd43b1e775d3d1ac9d983c2789887aa08582fff2 Mon Sep 17 00:00:00 2001 From: Dennis Kliban Date: Thu, 21 Apr 2016 10:22:06 -0400 Subject: [PATCH] Adds RPM rsync distributor The RPM rsync distributor allows a user to publish an already published RPM repo to a remote server. closes #1759 https://pulp.plan.io/issues/1759 --- docs/tech-reference/index.rst | 1 + docs/tech-reference/rsync-distributor.rst | 126 ++- docs/user-guide/release-notes/2.10.x.rst | 4 + plugins/pulp_rpm/plugins/db/models.py | 9 + .../distributors/rsync/configuration.py | 286 ------- .../plugins/distributors/rsync/distributor.py | 133 +-- .../plugins/distributors/rsync/publish.py | 791 ++---------------- .../plugins/distributors/rsync/utils.py | 99 --- plugins/pulp_rpm/plugins/error_codes.py | 1 + plugins/setup.py | 5 +- pulp-rpm.spec | 1 + 11 files changed, 193 insertions(+), 1263 deletions(-) delete mode 100644 plugins/pulp_rpm/plugins/distributors/rsync/configuration.py delete mode 100644 plugins/pulp_rpm/plugins/distributors/rsync/utils.py diff --git a/docs/tech-reference/index.rst b/docs/tech-reference/index.rst index d73f35d48..7eec9ffa6 100644 --- a/docs/tech-reference/index.rst +++ b/docs/tech-reference/index.rst @@ -6,6 +6,7 @@ Technical Reference yum-plugins export-distributor + rsync-distributor iso-plugins rpm sort-index diff --git a/docs/tech-reference/rsync-distributor.rst b/docs/tech-reference/rsync-distributor.rst index 22779d265..413d07321 100644 --- a/docs/tech-reference/rsync-distributor.rst +++ b/docs/tech-reference/rsync-distributor.rst @@ -1,92 +1,78 @@ -================= -Rsync distributor -================= +===================== +RPM rsync Distributor +===================== Purpose: ======== -Rsync distributor should be used for syncing already published files into remote -machine. That means, rsync distributor isn't designed to work as standalone -distributor. - -How it works -============ -Rsync distributor publishes in fast-forward mode by default. It selects -new content since last_published timestamp, however it also check predistributor -publish history if there weren't publishes that were non-fast-forward or -failed. In that case, cdn_distributor publishes everything from scratch. -Publish process is diveded into multiple steps: -unit query, origin publish, content publish, extra data publish - -1. Unit query step fetchs units from database and creates realive symlinks to -units _storage_dir in //.relative - -2. origin publish syncs hard files from /var/lib/pulp/content/ to remote server -There can by multiple origin publish steps if there are more then one content -types to be synced. - -3. content publish syncs files //.relative to -remote server. Remote repo directory is calculated from ``remote_root`` and -relative_url. - -4. extra data publish happens only if there are extra data to be synced. For -example extra data for rpm repo are repodata/* files. - - -Supported types -============== -For now, rsync distributor can sync any content from rpm and docker repositories - -Requirements -============ -To work proprely, your repository needs to have associated distributor that -procude data output. For docker repositories, docker_web_distributor is needed -and for rpm repositories, yum_distributor is needed. For now, cdn_distributor -will use configuration from first distributor with desired distributor_type -mentioned above. +The RPM rsync distributor publishes RPM content to a remote server. The distributor uses rsync over +ssh to perform the file transfer. Configuration ============= -Here's example of cdn_distributor configuration - -{ - "handler_type": "rsync", - "remote": { - "auth_type": "publickey", - "login": "rcm-cron", - "key_path": "/etc/httpd/id_rsa", - "ssh_login": "rcm-cron", - "host": "pulp04.web.stage.ext.phx2.redhat.com", - "remote_root": "/mnt/cdn/cdn-stage", - "skip_repodata": false +Here's an example of rpm_rsync_distributor configuration: + +.. code-block:: json + + { + "distributor_id": "my_rpm_rsync_distributor", + "distributor_type_id": "rpm_rsync_distributor", + "distributor_config": { + "remote": { + "auth_type": "publickey", + "ssh_user": "foo", + "ssh_identity_file": "/home/user/.ssh/id_rsa", + "host": "192.168.121.1", + "root": "/home/foo/pulp_root_dir" + }, + "predistributor_id": "yum_distributor", + } } -} -``handler_type`` - for now rsync is only supported handler + +``predistributor_id`` + The id of the yum_distributor associated with the same repository. The publish history of this + yum_distributor determines if the publish will be incremental. + +The ``distributor_config`` contains a ``remote`` section made up of the following settings: ``auth_type`` - for now publickey is only supported + Two authentication methods are supported: ``publickey`` and ``password``. -``login`` - ssh login to remote machine +``ssh_user`` + ssh user for remote server -``key_path`` - path to public key on pulp machine that will be used as identity file for ssh +``ssh_identity_file`` + A path to the private key that will be used as identity file for ssh. When ``auth_type`` is + ``publickey`` this is a required config. The key has to be readable by user ``apache``. -``ssh_login`` - artifact from old ages, can be probably removed. Isn't used anymore +``ssh_password`` + Password for ``ssh_user`` on remote server. Password is required when ``auth_type`` is 'password'. ``host`` - remote hostname + The hostname of the remote server. -``remote_root`` - remote root directory with all the data. (content + published_contnet) +``root`` + Absolute path to the remote root directory where all the data (content and published content) + lives. Remote equivalent to ``/var/lib/pulp``. The repository's relative url is appended to the + ``root`` to determine the location of published repository. Optional configuration ---------------------- ``skip_fast_forward`` - if true, rsync distribtor will sync all content of repository. + If true, the rsync distributor will publish all of the content of the repository. If false + (default), the publish is incremental when the predistributor's last publish was incremental. + +``content_units_only`` + If true, the distributor will publish content units only (e.g. ``/var/lib/pulp/content``). The + symlinks of a published repository will not be rsynced. + +``skip_repodata`` + If true, repodata will be omitted from the publish. + +``delete`` + If true, ``--delete`` is appended to the rsync command for symlinks and repodata so that any old files no longer present in + the local published directory are removed from the remote server. -``origin_only`` - if true, rsync distribtor will sync hard content (e.g. /var/lib/pulp/content) +``remote_units_path`` + The relative path from the ``root`` where unit files should live. Defaults to ``content/units``. diff --git a/docs/user-guide/release-notes/2.10.x.rst b/docs/user-guide/release-notes/2.10.x.rst index cde952136..1af1c43bd 100644 --- a/docs/user-guide/release-notes/2.10.x.rst +++ b/docs/user-guide/release-notes/2.10.x.rst @@ -11,4 +11,8 @@ New Features * The yum distributor and export distributor now can use the configured checksum type for updateinfo.xml generation which may differ from the configured checksum type for all repo metadata. It can be specified via pulp-admin repo "create" and "update" commands. + * Full re-sync can be done now using ``--force-full`` option. + +* The RPM rsync distributor has been added. The new distributor can be used to publish repositories + published by yum_distributor to a remote server. diff --git a/plugins/pulp_rpm/plugins/db/models.py b/plugins/pulp_rpm/plugins/db/models.py index 1767d2080..ebcdb536c 100644 --- a/plugins/pulp_rpm/plugins/db/models.py +++ b/plugins/pulp_rpm/plugins/db/models.py @@ -880,6 +880,15 @@ def _update_location(package_element, filename): location_element = package_element.find('location') location_element.set('href', filename) + def get_symlink_name(self): + """ + Provides the name that should be used when creating a symlink. + + :return: file name as it appears in a published repository + :rtype: str + """ + return self.filename + class RPM(RpmBase): # TODO add docstring to this class diff --git a/plugins/pulp_rpm/plugins/distributors/rsync/configuration.py b/plugins/pulp_rpm/plugins/distributors/rsync/configuration.py deleted file mode 100644 index 342768ae5..000000000 --- a/plugins/pulp_rpm/plugins/distributors/rsync/configuration.py +++ /dev/null @@ -1,286 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright © 2013 Red Hat, Inc. -# -# This software is licensed to you under the GNU General Public License as -# published by the Free Software Foundation; either version 2 of the License -# (GPLv2) or (at your option) any later version. -# There is NO WARRANTY for this software, express or implied, including the -# implied warranties of MERCHANTABILITY, NON-INFRINGEMENT, or FITNESS FOR A -# PARTICULAR PURPOSE. -# You should have received a copy of GPLv2 along with this software; -# if not, see http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt - -import os -import urlparse - -#from pulp.server.managers.repo.distributor import RepoDistributorManager -from pulp.server.db.model import Distributor -import logging - -LOG_PREFIX_NAME="pulp.plugins" -def getLogger(name): - log_name = LOG_PREFIX_NAME + "." + name - return logging.getLogger(log_name) -_LOG = getLogger(__name__) - -REMOTE_CONTENT_BASE = os.path.join("content", "origin") - - -class OneOfValidation(object): - def __init__(self, values): - self.values = values - - def __call__(self, value): - if value in self.values: - return (True, None) - else: - return (False, self._err(value)) - - def _err(self, value): - values_str = ", ".join(self.values) - return "{value} is not in allowed values: {allowed}" % (value, values_str) - -class NonEmptyValidation(object): - def __call__(self, value): - if value: - return (True, None) - else: - return (False, self._err(value)) - - def _err(self, value): - return "attribute cannot be empty" - -class TypeValidation(object): - def __init__(self, allowed_types): - self.allowed_types = allowed_types - - def __call__(self, value): - if [True for _type in self.allowed_types if issubclass(type(value), _type)]: - return (True, None) - else: - return (False, self._err(value)) - - def _err(self, value): - str_types = ", ".join(self.allowed_types) - return "type of atribute(%s) is not in allowed types: %s" % (type(value), str_types) - - -# These information are used to locate the repo diretory previously generated by -# YumHTTPDistributor. For the same repo, their values should be the same as set -# for YumHTTPDistributor. -REQUIRED_CONFIG_KEYS = ( - 'http', - 'https', - 'handler_type', # type: str. Copy mechanism. Options: rsync, scp - 'remote', # type: {}. Configurations of CDN publish. - ) - -CDN_MANDATORY_KEYS = {"auth_type": [OneOfValidation(["publickey", "password"])], - "key_path": [TypeValidation([basestring]), NonEmptyValidation()], - "login": [TypeValidation([basestring]), NonEmptyValidation()], - "ssh_login": [TypeValidation([basestring]), NonEmptyValidation()], - "host": [TypeValidation([basestring]), NonEmptyValidation()], - "root": [TypeValidation([basestring]), NonEmptyValidation()]} - -CDN_OPTIONAL_KEYS = ["relative_dir"] - -''' -A sample rh_cdn_distributor.conf can look like this: - -{ - 'http': false, - 'https': true, - 'handler_type': 'rsync', - - 'remote': { - 'auth_type': 'publickey', - 'key_path': '/full/path/to/key', - 'login': 'cdn_login_name', - 'ssh_login': 'ssh_login_name', - 'host': 'hostname.domain', - 'root': '/var/lib/cdn/', - 'relative_dir': 'repo_name_or_other_bits_for_path' - } -} -''' - -# The following two configs are copied from the yum_distributor on branch 'pulp-2.3' -# There is better way to do this by sharing these configs with yum_distributor -# instead of duplicating it. This can be done when branch 'pulp-2.4' is present. -HTTP_PUBLISH_DIR = "/var/lib/pulp/published/yum/http/repos" -HTTPS_PUBLISH_DIR = "/var/lib/pulp/published/yum/https/repos" -DOCKER_V1_PUBLISH_DIR = '/var/lib/pulp/published/docker/v1/web' -DOCKER_V2_PUBLISH_DIR = '/var/lib/pulp/published/docker/v2/web' - -DOCKER_UNIT_TYPES = ["docker_image"] -DOCKER_V2_UNIT_TYPES = ["docker_manifest","docker_blob"] -RPM_UNIT_TYPES = ["rpm"] - -def validate_config(repo, config, config_conduit): - """ - Validate the prospective configuration instance for the given repository. - - :param repo: repository to validate the config for - :type repo: pulp.plugins.model.Repository - :param config: configuration instance to validate - :type config: pulp.plugins.config.PluginCallConfiguration - :param config_conduit: conduit providing access to relevant Pulp functionality - :type config_conduit: pulp.plugins.conduits.repo_config.RepoConfigConduit - :return: tuple of (bool, str) stating that the configuration is valid or not and why - :rtype: tuple of (bool, str or None) - """ - _config = config.flatten() # now config is {} - err_list = [] - crit_fail = False - if not "cdn" in _config or ("cdn" in _config and not isinstance(_config["cdn"], dict)): - err_list.append("'cdn' dict missing in distributor's configuration") - crit_fail = True - if not crit_fail: - missing_attr_tmpl = "'%s' missing in 'cdn' section of distributor's configuration" - for attr, validations in CDN_MANDATORY_KEYS.iteritems(): - if attr not in _config["cdn"]: - err_list.append(missing_attr_tmpl % attr) - continue - for validation in validations: - succeed, _err = validation(_config["cdn"][attr]) - if not succeed: - err_list.append(err) - if err_list: - return (False, "\n".join(err_list)) - else: - return (True, None) - - -def get_https_publish_dir(config): - """ - @param config - @type pulp.server.content.plugins.config.PluginCallConfiguration - """ - if config: - publish_dir = config.get("https_publish_dir") - if publish_dir: - _LOG.info("Override HTTPS publish directory from passed in config value to: %s" % (publish_dir)) - return publish_dir - return HTTPS_PUBLISH_DIR - - -def get_http_publish_dir(config=None): - """ - @param config - @type pulp.server.content.plugins.config.PluginCallConfiguration - """ - if config: - publish_dir = config.get("http_publish_dir") - if publish_dir: - _LOG.info("Override HTTP publish directory from passed in config value to: %s" % (publish_dir)) - return publish_dir - return HTTP_PUBLISH_DIR - - -def get_docker_publish_dir(config, version): - """ - @param config - @type pulp.server.content.plugins.config.PluginCallConfiguration - """ - from pulp_docker.common import constants as pulp_docker_constants - return os.path.join(config.get(pulp_docker_constants.CONFIG_KEY_DOCKER_PUBLISH_DIRECTORY, - pulp_docker_constants.CONFIG_VALUE_DOCKER_PUBLISH_DIRECTORY), - version) - - -def get_docker_web_dir(config, version, repo): - """ - @param config - @type pulp.server.content.plugins.config.PluginCallConfiguration - """ - return os.path.join(get_docker_publish_dir(config, version), 'web', - repo.id) - - -def get_docker_master_dir(config, version, repo): - """ - @param config - @type pulp.server.content.plugins.config.PluginCallConfiguration - """ - return os.path.join(get_docker_publish_dir(config, version), - 'master', repo.id) - - -def get_repo_relative_path(repo, config, **kwargs): - repo_type = repo.notes.get('_repo-type') - if repo_type == 'rpm-repo': - relative_url = repo.notes.get("relative_url", "") - if relative_url: - return relative_url - else: - return repo.id - elif repo_type == 'docker-repo': - from pulp_docker.plugins.distributors.configuration import get_redirect_url - distributor = Distributor.objects.get_or_404(repo_id=repo.id, - distributor_type_id="docker_distributor_web") - - url = get_redirect_url(distributor["config"], repo, kwargs["docker_version"]) - parsed = urlparse.urlparse(url) - if "stage" in parsed.netloc or "qa" in parsed.netloc: - return parsed.path - elif "access" in parsed.netloc: - return parsed.path.replace("/webassets/docker/", "") - else: - return repo.id - else: - return config.get("relative_url") - -def get_remote_content_base(config): - remote_content_base = config.get("relative_url") - if not remote_content_base: - return REMOTE_CONTENT_BASE - return remote_content_base - - -#def get_repo_dir(repo, config): -# """ -# Get the dir of repodata directory generated by YumHTTPDistributor. -# -# :param repo: repository to get dir for -# :type repo: pulp.plugins.model.Repository -# :param config: configuration instance to validate -# :type config: pulp.plugins.config.PluginCallConfiguration -# :return: str the directory of repo -# :rtype: str or None -# """ -# d_config = config.flatten() -# -# repo_base_dir = None -# repo_type = repo.notes.get('_repo-type') -# if repo_type == 'rpm-repo': -# if d_config.get('http') is True: -# repo_base_dir = get_http_publish_dir(config) -# else: -# repo_base_dir = get_https_publish_dir(config) -# elif repo_type == 'docker-repo': -# repo_base_dir = get_docker_publish_dir(config) -# repo_rel_dir = get_repo_relative_path(repo, config) -# -# return (repo_base_dir, repo_rel_dir) - -def get_include_repodata(repo, config): - """ - Return whether or not repodata should be included in the rsync - - :param repo: repository to get dir for - :type repo: pulp.plugins.model.Repository - :param config: configuration instance to validate - :type config: pulp.plugins.config.PluginCallConfiguration - :return: whether to include the repodata in the rsync - :rtype: boolean - """ - try: - if repo.notes.get("rsync_repodata") is not None: - return repo.notes.get("rsync_repodata").lower() == 'true' - - d_config = config.flatten() - return str(d_config.get('cdn', {}).get('skip_repodata', 'True')).lower() != 'true' - except: - return False - diff --git a/plugins/pulp_rpm/plugins/distributors/rsync/distributor.py b/plugins/pulp_rpm/plugins/distributors/rsync/distributor.py index c56426b11..34f119339 100644 --- a/plugins/pulp_rpm/plugins/distributors/rsync/distributor.py +++ b/plugins/pulp_rpm/plugins/distributors/rsync/distributor.py @@ -1,66 +1,38 @@ -# -*- coding: utf-8 -*- -# -# Copyright © 2013 Red Hat, Inc. -# -# This software is licensed to you under the GNU General Public License as -# published by the Free Software Foundation; either version 2 of the License -# (GPLv2) or (at your option) any later version. -# There is NO WARRANTY for this software, express or implied, including the -# implied warranties of MERCHANTABILITY, NON-INFRINGEMENT, or FITNESS FOR A -# PARTICULAR PURPOSE. -# You should have received a copy of GPLv2 along with this software; -# if not, see http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt +from gettext import gettext as _ +import logging from pulp.common.config import read_json_config from pulp.plugins.distributor import Distributor -from . import configuration, publish +from pulp.plugins.rsync import configuration -# -- global constants ---------------------------------------------------------- +from pulp_rpm.plugins.distributors.rsync import publish +from pulp_rpm.common.ids import (TYPE_ID_DISTRO, TYPE_ID_DRPM, TYPE_ID_ERRATA, TYPE_ID_PKG_GROUP, + TYPE_ID_PKG_CATEGORY, TYPE_ID_RPM, TYPE_ID_SRPM, + TYPE_ID_YUM_REPO_METADATA_FILE) -TYPES = [] +_LOG = logging.getLogger(__name__) +TYPE_ID_DISTRIBUTOR_RPM_RSYNC = 'rpm_rsync_distributor' +CONF_FILE_PATH = 'server/plugins.conf.d/%s.json' % TYPE_ID_DISTRIBUTOR_RPM_RSYNC -try: - from pulp_rpm.common.ids import (TYPE_ID_DISTRO, TYPE_ID_DRPM, TYPE_ID_ERRATA, TYPE_ID_PKG_GROUP, - TYPE_ID_PKG_CATEGORY, TYPE_ID_RPM, TYPE_ID_SRPM, TYPE_ID_DISTRIBUTOR_YUM, - TYPE_ID_YUM_REPO_METADATA_FILE) - TYPES.extend([TYPE_ID_DISTRO, TYPE_ID_DRPM, TYPE_ID_ERRATA, TYPE_ID_PKG_GROUP, - TYPE_ID_PKG_CATEGORY, TYPE_ID_RPM, - TYPE_ID_SRPM, TYPE_ID_DISTRIBUTOR_YUM, - TYPE_ID_YUM_REPO_METADATA_FILE]) -except ImportError: - pass +DISTRIBUTOR_DISPLAY_NAME = 'RPM Rsync Distributor' -try: - from pulp_docker.common.constants import (IMAGE_TYPE_ID) - TYPES.extend([IMAGE_TYPE_ID]) -except ImportError: - pass - - - -_LOG = configuration.getLogger(__name__) - -TYPE_ID_DISTRIBUTOR_RH_CDN = 'cdn_distributor' -CONF_FILE_PATH = 'server/plugins.conf.d/%s.json' % TYPE_ID_DISTRIBUTOR_RH_CDN - -DISTRIBUTOR_DISPLAY_NAME = 'RH CDN Distributor' - -# -- entry point --------------------------------------------------------------- def entry_point(): config = read_json_config(CONF_FILE_PATH) - return RHCDNDistributor, config + return RPMRsyncDistributor, config -# -- distributor --------------------------------------------------------------- -class RHCDNDistributor(Distributor): +class RPMRsyncDistributor(Distributor): """ - Distributor class for publishing repo directory to RH CDN + Distributor class for publishing RPM repo to remote server. + + :ivar canceled: if true, task has been canceled + :ivar _publisher: instance of RPMRsyncPublisher """ def __init__(self): - super(RHCDNDistributor, self).__init__() + super(RPMRsyncDistributor, self).__init__() self.canceled = False self._publisher = None @@ -73,9 +45,11 @@ def metadata(cls): :return: description of the distributor's capabilities :rtype: dict """ - return {'id': TYPE_ID_DISTRIBUTOR_RH_CDN, + return {'id': TYPE_ID_DISTRIBUTOR_RPM_RSYNC, 'display_name': DISTRIBUTOR_DISPLAY_NAME, - 'types': TYPES} + 'types': [TYPE_ID_RPM, TYPE_ID_SRPM, TYPE_ID_DRPM, TYPE_ID_ERRATA, + TYPE_ID_PKG_GROUP, TYPE_ID_PKG_CATEGORY, TYPE_ID_DISTRO, + TYPE_ID_YUM_REPO_METADATA_FILE]} # -- repo lifecycle methods ------------------------------------------------ @@ -100,35 +74,10 @@ def validate_config(self, repo, config, config_conduit): :return: tuple of (bool, str) to describe the result :rtype: tuple """ - _LOG.debug('Validating yum repository configuration: %s' % repo.id) + _LOG.debug(_('Validating yum repository configuration: %(repoid)s') % {'repoid': repo.id}) return configuration.validate_config(repo, config, config_conduit) - def distributor_added(self, repo, config): - """ - Called upon the successful addition of a distributor of this type to a - repository. - - :param repo: metadata describing the repository - :type repo: pulp.plugins.model.Repository - - :param config: plugin configuration - :type config: pulp.plugins.config.PluginCallConfiguration - """ - pass - - def distributor_removed(self, repo, config): - """ - Called when a distributor of this type is removed from a repository. - - :param repo: metadata describing the repository - :type repo: pulp.plugins.model.Repository - - :param config: plugin configuration - :type config: pulp.plugins.config.PluginCallConfiguration - """ - pass - # -- actions --------------------------------------------------------------- def publish_repo(self, repo, publish_conduit, config): @@ -147,47 +96,19 @@ def publish_repo(self, repo, publish_conduit, config): :return: report describing the publish run :rtype: pulp.plugins.model.PublishReport """ - _LOG.debug('Publishing yum repository: %s' % repo.id) + _LOG.debug(_('Publishing yum repository: %(repoid)s') % {'repoid': repo.id}) - self._publisher = publish.Publisher(repo, publish_conduit, config, TYPE_ID_DISTRIBUTOR_RH_CDN) + self._publisher = publish.RPMRsyncPublisher(repo, publish_conduit, config, + TYPE_ID_DISTRIBUTOR_RPM_RSYNC) return self._publisher.publish() - #return {} def cancel_publish_repo(self): """ Call cancellation control hook. - :param call_request: call request for the call to cancel - :type call_request: pulp.server.dispatch.call.CallRequest - :param call_report: call report for the call to cancel - :type call_report: pulp.server.dispatch.call.CallReport """ - _LOG.debug('Canceling publishing repo to RH CDN') + _LOG.debug(_('Canceling publishing repo to remote server')) self.canceled = True if self._publisher is not None: self._publisher.cancel() - - def create_consumer_payload(self, repo, config, binding_config): - """ - Called when a consumer binds to a repository using this distributor. - This call should return a dictionary describing all data the consumer - will need to access the repository. - - :param repo: metadata describing the repository - :type repo: pulp.plugins.model.Repository - - :param config: plugin configuration - :type config: pulp.plugins.config.PluginCallConfiguration - - :param binding_config: configuration applicable only for the specific - consumer the payload is generated for; this will be None - if there are no specific options for the consumer in question - :type binding_config: object or None - - :return: dictionary of relevant data - :rtype: dict - """ - return {} - - diff --git a/plugins/pulp_rpm/plugins/distributors/rsync/publish.py b/plugins/pulp_rpm/plugins/distributors/rsync/publish.py index 7441d31fd..e1f074200 100644 --- a/plugins/pulp_rpm/plugins/distributors/rsync/publish.py +++ b/plugins/pulp_rpm/plugins/distributors/rsync/publish.py @@ -1,720 +1,111 @@ -# -*- coding: utf-8 -*- -# -# Copyright © 2013 Red Hat, Inc. -# -# This software is licensed to you under the GNU General Public License as -# published by the Free Software Foundation; either version 2 of the License -# (GPLv2) or (at your option) any later version. -# There is NO WARRANTY for this software, express or implied, including the -# implied warranties of MERCHANTABILITY, NON-INFRINGEMENT, or FITNESS FOR A -# PARTICULAR PURPOSE. -# You should have received a copy of GPLv2 along with this software; -# if not, see http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt - -from gettext import gettext as _ -import logging import os -import tempfile -import time -import functools -from pulp.common import dateutils -from pulp.server import config as pulp_config -from pulp.server.exceptions import PulpCodedException -from pulp.plugins.util.publish_step import UnitPublishStep, PublishStep, CopyDirectoryStep -from pulp.server.db.model.criteria import UnitAssociationCriteria -from pulp.server.db.model.repository import RepoPublishResult +from pulp.plugins.rsync.publish import Publisher, RSyncPublishStep +from pulp.plugins.util.publish_step import RSyncFastForwardUnitPublishStep from pulp.server.db.model import Distributor +from pulp.server.exceptions import PulpCodedException -import pulp_rpm.common.constants as constants - -from . import configuration -from .utils import CopySelectedStep, Lazy, run, common_path - -#import kobo.shortcuts - -ASSOCIATED_UNIT_DATE_KEYWORD = "created" -LOG_PREFIX_NAME="pulp.plugins" - -def getLogger(name): - log_name = LOG_PREFIX_NAME + "." + name - return logging.getLogger(log_name) - -_LOG = getLogger(__name__) -STORAGE_PATH = storage_dir = pulp_config.config.get('server', 'storage_dir') - -class UnknownRepositoryType(Exception): - def __init__(self, repo_type): - self.repo_type = repo_type - - def __str__(self): - return "RSync distributor can't work with %s repo type" % self.repo_type - - -def get_unit_types(repo_type): - if repo_type == "rpm-repo": - return ["rpm", "srpm"] - elif repo_type == "docker-repo": - return ["docker_image"] - - -def get_unit_fields(repo_type): - if repo_type == "rpm-repo": - return ["relativepath", "cdn_path", "_storage_path", "name", "version", - "release", "epoch", "arch", "checksumtype", "checksum", "signature"] - elif repo_type == "docker-repo": - return ["relativepath", "_storage_path", "image_id", "digest"] - - -def get_extra_sync_data(repo, docker_version=None): - if repo.notes["_repo-type"] == "rpm-repo": - return ["repodata"] - elif repo.notes["_repo-type"] == "docker-repo": - if docker_version == "v2": - return ["tags/list", "%s.json" % (repo.id)] - else: - return [] - - -def get_exclude(repo_type): - if repo_type == "rpm-repo": - return [".*", "repodata.old"] - elif repo_type == "docker-repo": - return [] - - -def get_origin_rel_path(unit, config): - """ - return relative path of content unit in remote fs - - :param unit: Pulp content unit - :type unit: Unit - :param configuration: configuration of distributor - :type configuration: dict - """ - - STORAGE_PATH = storage_dir = pulp_config.config.get('server', 'storage_dir') - content_base = os.path.join(STORAGE_PATH, "content", unit.metadata["_content_type_id"]) - rel_unit_path = os.path.relpath(unit.storage_path, content_base) - remote_content_base = configuration.get_remote_content_base(config) - remote_base = os.path.join(remote_content_base, unit.metadata["_content_type_id"]) - return os.path.join(remote_base, rel_unit_path) - - -def make_link_unit(repo_type, unit, extra_dst_path, working_dir, - remote_repo_path, remote_root, configuration): - """ - This method creates symlink in working directory, pointing to remote - content directory, so they can be sync to remote server - - :param repo_type: type of repository - :type repo_type: str - :param unit: Pulp content unit - :type unit: Unit - :param extra_dst_path: ending path for processes units - (for example 'Packages' for rpm) - :type extra_dst_path: str - :param working_dir: Working directory - :type working_dir: str - :param remote_repo_path: relative repo destination path on remote server - :type remote_repo_path: str - :param remote_root: remote destination root directory - :type remote_root: str - :param configuration: configuration of distributor - :type configuration: dict - """ - - out_path = "" - if unit.type_id == "rpm": - filename = unit.metadata["relativepath"] - extra_src_path = [".relative"] - elif unit.type_id == "docker_image": - filename = unit.unit_key["image_id"] - extra_src_path = [".relative"] - elif unit.type_id == "docker_blob": - filename = unit.unit_key["digest"] - extra_src_path = [".relative", "blobs"] - out_path = "blobs/" - elif unit.type_id == "docker_manifest": - filename = unit.unit_key["digest"] - extra_src_path = [".relative", "manifests"] - out_path = "manifests/" - if not os.path.exists(os.path.join(working_dir, extra_dst_path, *extra_src_path)): - os.makedirs(os.path.join(working_dir, extra_dst_path, *extra_src_path)) - - origin_path = get_origin_rel_path(unit, configuration) - - dest = os.path.join(working_dir, *([extra_dst_path] + extra_src_path + [filename])) - link_source = os.path.relpath(os.path.join(remote_root, origin_path), - os.path.join(remote_root, remote_repo_path.lstrip("/"), out_path, extra_dst_path)) - - _LOG.debug("LN %s -> %s " % (link_source, dest)) - if os.path.islink(dest): - os.remove(dest) - - os.symlink(link_source, dest) - return os.path.join(*([extra_dst_path] + extra_path + [filename])) - - -class RSyncPublishStep(PublishStep): - _CMD = "rsync" - _AZQ = "-rtKOzi" # recursive, symlinks, timestamps, keep dir links, omit dir times, compress, itemize - - def __init__(self, step_type, file_list, src_prefix, dest_prefix, - working_dir=None, distributor_type=None, config=None, - fast_forward=None, exclude=[], delete=False, links=False): - self.description = _('Rsync files to remote destination') - super(PublishStep, self).__init__(step_type,working_dir=working_dir, - #distributor_type=distributor_type, - config=config) - self.file_list = file_list - self.fast_forward = fast_forward - self.exclude = [] - self.delete = delete - self.src_prefix = src_prefix - self.dest_prefix = dest_prefix - self.links = links - - def remote_mkdir(self, path): - _LOG.debug("remote_mkdir: %s" % (path)) - remote = self.get_config().flatten()["remote"] - if not path or path == '/': - return (True, "path is empty") - if remote['auth_type'] == 'local': - # XXX - return () - remote_root = remote['root'].rstrip("/") - path = path.rstrip("/") - - if self.remote_dir_exists(path): - return (True, "%s exists" % path) - is_ok, output = self.remote_mkdir(os.path.dirname(path)) - if self.remote_dir_exists(path): - return (True, "%s exists" % path) - #_LOG.info("%s is_ok %s" % (path, is_ok)) - if not is_ok: - return (is_ok, output) - _LOG.info("Making directory on remote server: %s/%s" % (remote_root, path)) - args = self.make_ssh_cmd() + [remote["host"],'mkdir -vp %s/%s' % (remote_root, path)] - is_ok, output = self.call(args) - if not is_ok: - # check for race condition - if self.remote_dir_exists(path): - return (True, "%s exists" % path) - return (is_ok, output) - - def remote_dir_exists(self, path): - remote = self.get_config().flatten()["remote"] - if remote['auth_type'] == 'local': - # XXX - return () - remote_root = remote['root'].rstrip("/") - if 'akamai' in remote['host']: - args = self.make_ssh_cmd() + [remote["host"], 'ls %s/%s' % (remote_root, path)] - else: - args = self.make_ssh_cmd() + [remote["host"], 'stat -c "%%F" %s/%s' % (remote_root, path)] - is_ok, output = self.call(args, include_args_in_output=False) - output = [ line.strip() for line in output.splitlines() ] - # Filter out expected warnings - for substring in ("list of known hosts", - "Inappropriate ioctl for device", - "Shared connection to" ): - output = [ line for line in output if substring not in line ] - #_LOG.info("dir_exists path is_ok output: %s %s %s" % (path, is_ok, output)) - if is_ok and ("directory" in output or os.path.basename(path) in output): - return True - return False - - def make_ssh_cmd(self, args=None, for_rsync=False): - user = self.get_config().flatten()["remote"]['ssh_login'] - key = self.get_config().flatten()["remote"]['key_path'] # must be an abs path - - # -e 'ssh -l l_name -i key_path' - # use shared ssh connection for other threads - cmd = ['ssh', - '-l', user, - '-i', key, - '-o', 'StrictHostKeyChecking no', - '-o', 'UserKnownHostsFile /dev/null', - '-S', '/tmp/rsync_distributor-%r@%h:%p', - '-o', 'ControlMaster auto', - '-o', 'ControlPersist 10'] - if not for_rsync: - # -t -t is necessary for Akamai - cmd.extend(['-t']) - if args: - cmd += args - return cmd - - def make_authentication(self): - """ - Used when rsync uses SSH as the remote shell. - - :return: str. e.g., '-e ssh -l ssh_login_user -i /key_path' - :rtype: str - """ - if self.get_config().flatten()["remote"]['auth_type'] == 'local': - return () - ssh_parts = [] - for arg in self.make_ssh_cmd(for_rsync=True): - if " " in arg: - ssh_parts.append('"%s"' % arg) - else: - ssh_parts.append(arg) - - return ['-e', " ".join(ssh_parts)] - - def make_full_path(self, relative_path): - return os.path.join(self.get_config().flatten()["remote"]['root'], - relative_path) - - def make_destination(self, relative_path): - """ - Parse from self.config information - to make up a hostname and remote path used - for rsync command. - - :return: str of the combination of user, host, and dir. - :rtype: str - """ - if self.get_config().flatten()["remote"]['auth_type'] == 'local': - return self.get_config().flatten()["remote"]['root'] - relative_path = relative_path.lstrip("/") - - user = self.get_config().flatten()["remote"]['login'] - host = self.get_config().flatten()["remote"]['host'] - remote_root = self.get_config().flatten()["remote"]['root'] - - return '%s@%s:%s' % (user, host, os.path.join(remote_root, relative_path)) - - def is_skipped(self): - return False - - def call(self, args, include_args_in_output=True): - """ - Basically a wrapper to call kobo.shortcuts.run. - If ssh_exchange_identification or max-concurrent-connections - exceptions are thrown by ssh, up to 10 retries follows. - """ - - for t in xrange(10): - rv, out = run(cmd=args, can_fail=True) - if not ( rv and ("ssh_exchange_identification:" in out or "max-concurrent-connections=25" in out) ): - break - _LOG.info("Connections limit reached, trying once again in thirty seconds.") - time.sleep(30) - if include_args_in_output: - message = "%s\n%s" % (args, out) - else: - message = out - return (rv == 0, message) - - def make_rsync_args(self, files_from, source_prefix, dest_prefix, exclude=None): - args = [self._CMD, self._AZQ, - "--files-from", files_from, "--relative"] - if exclude: - for x in exclude: - args.extend(["--exclude", x]) - args.extend(self.make_authentication()) - if self.delete: - args.append("--delete") - if self.links: - args.append("--links") - else: - args.append("--copy-links") - args.append(source_prefix) - args.append(self.make_destination(dest_prefix)) - return args - - def rsync(self, dest=None, include_repodata=False): - if not self.file_list: - return (True, "Nothing to sync") - output = "" - _, tmp_path1 = tempfile.mkstemp(prefix="tmp_rsync_distributor") - open(tmp_path1, 'w').write("\n".join(sorted(self.file_list))) - # copy files here, not symlinks - exclude = self.exclude + get_exclude(self.parent.repo_type) - - (is_successful, this_output) = self.remote_mkdir(self.dest_prefix) - if not is_successful: - _LOG.error("Cannot create directory %s: %s" % (self.dest_prefix, this_output)) - return (is_successful, this_output) - output += this_output - rsync_args = self.make_rsync_args(tmp_path1, str(self.src_prefix), - self.dest_prefix, exclude) - - (is_successful, this_output) = self.call(rsync_args) - _LOG.info(this_output) - if not is_successful: - _LOG.error(this_output) - return (is_successful, this_output) - output += this_output - return (is_successful, output) - - def process_main(self): - (successful, output) = self.rsync(include_repodata=configuration.get_include_repodata(self.parent.repo, self.config)) - if not successful: - raise PulpCodedException(message=output) - - -class RSyncFastForwardUnitPublishStep(UnitPublishStep): - - def __init__(self, step_type, link_list, original_list, - unit_types=None, repo_type=None, - working_dir=None, distributor_type=None, config=None, - association_filters=None, relative_symlinks=None, - remote_repo_path=None): - self.description = _('Filtering units') - """ - Set the default parent, step_type and units_type for the the - publish step. - - :param step_type: The id of the step this processes - :type step_type: str - :param link_list: share list object across publish steps which - holds information of file links that will be synced - :type link_list: list - :param original_list: share list object across publish steps which - holds information of link origins that will be synced - :type original_list: list - :param unit_type: The type of unit this step processes - :type unit_type: str or list of str - """ - - self.link_list = link_list - self.original_list = original_list - self.cdn_list = [] - self.repo_type = repo_type - self.remote_repo_path = remote_repo_path - - super(RSyncFastForwardUnitPublishStep, self).__init__(step_type, - association_filters=association_filters, - #config=config, - unit_type=unit_types) - self.unit_fields = get_unit_fields(repo_type) - self.content_types = set() - - def get_unit_generator(self): - """ - This method returns a generator for the unit_type specified on the PublishStep. - The units created by this generator will be iterated over by the process_unit method. - - :return: generator of units - :rtype: GeneratorTyp of Units - """ - - types_to_query = list(set(self.unit_type).difference(self.skip_list)) - criteria = UnitAssociationCriteria(type_ids=types_to_query, - association_filters=self.association_filters, - unit_fields=self.unit_fields) - return self.get_conduit().get_units(criteria, as_generator=True) - - def is_skipped(self): - return False +from pulp_rpm.common import ids +from pulp_rpm.plugins.db import models +from pulp_rpm.plugins.distributors.yum import configuration as yum_config +from pulp_rpm.plugins import error_codes - def process_unit(self, unit): - self.content_types.add(unit.metadata["_content_type_id"]) - # symlink - self.link_list.append(make_link_unit(self.parent.repo_type, unit, - #self.parent.remote_path, - self.parent.extra_path, - self.get_working_dir(), - self.remote_repo_path, - self.get_config().get("remote")["root"]), - self.get_config()) - # cdn - rpm repos only - if "cdn_path" in unit.metadata: - self.cdn_list.append(unit.metadata["cdn_path"]) - # orignal file - self.original_list.append(unit.storage_path) - #_LOG.error("unit storage path %s" % (unit.storage_path,)) - def _process_block(self, item=None): - """ - This block is called for the main processing loop - """ - package_unit_generator = self.get_unit_generator() - count = 0 - for package_unit in package_unit_generator: - if self.canceled: - return - count += 1 - self.process_unit(package_unit) - self.progress_successes += 1 - if count % 50 == 0: - self.report_progress() +class RPMRsyncPublisher(Publisher): - all_content_types = self.get_config().get("content_types",[]) - all_content_types.extend(self.content_types) - self.get_config().override_config["content_types"] = list(set(all_content_types)) - cdn_rel_prefix = common_path(self.cdn_list) - self.get_config().override_config["cdn_rel_prefix"] = cdn_rel_prefix + REPO_CONTENT_TYPES = [ids.TYPE_ID_RPM, ids.TYPE_ID_DRPM, ids.TYPE_ID_SRPM] + REPO_CONTENT_MODELS = [models.RPM, models.SRPM, models.DRPM] -class Publisher(PublishStep): - """ - RSync publisher class that is responsible for the actual publishing - of a yum repository to remote machine. - """ + UNIT_FIELDS = ["_storage_path", "filename"] - def create_date_range_filter(self, config): + def _get_predistributor(self): """ - Create a date filter based on start and end issue dates specified in the repo config. The returned - filter is a dictionary which can be used directly in a mongo query. - - :param config: plugin configuration instance; the proposed repo configuration is found within - :type config: pulp.plugins.config.PluginCallConfiguration - - :return: date filter dict with issued date ranges in mongo query format - :rtype: {} + Returns the distributor that is configured as predistributor. """ - start_date = config.get(constants.START_DATE_KEYWORD) - end_date = config.get(constants.END_DATE_KEYWORD) - date_filter = None - if start_date and not isinstance(start_date, basestring): - start_date_str = dateutils.format_iso8601_datetime(start_date) - else: - start_date_str = start_date - if end_date and not isinstance(end_date, basestring): - end_date_str = dateutils.format_iso8601_datetime(end_date) + predistributor = self.get_config().flatten().get("predistributor_id", None) + if predistributor: + return Distributor.objects.get_or_404(repo_id=self.repo.id, + distributor_id=predistributor) else: - end_date_str = end_date - - if start_date and end_date: - date_filter = {ASSOCIATED_UNIT_DATE_KEYWORD: {"$gte": start_date_str, - "$lte": end_date_str}} - elif start_date: - date_filter = {ASSOCIATED_UNIT_DATE_KEYWORD: {"$gte": start_date_str}} - elif end_date: - date_filter = {ASSOCIATED_UNIT_DATE_KEYWORD: {"$lte": end_date_str}} - return date_filter + raise PulpCodedException(error_code=error_codes.RPM1010) def _get_root_publish_dir(self): - if self.repo_type == "rpm-repo": - if self.predistributor["config"].get("https",False): - return configuration.get_https_publish_dir(self.get_config()) - else: - return configuration.get_http_publish_dir(self.get_config()) - - def _get_extra_path(self): - if self.repo_type == "rpm-repo": - return self.predistributor["config"].get("relative_rpm_path", "") - else: - return "" - - def _get_predistributor(self): - """ Returns distributor which repo has to be published in before - publish in rsync distributor, content generator. """ - if self.repo_type == "rpm-repo": - dist_type = "yum_distributor" - elif self.repo_type == "docker-repo": - dist_type = "docker_distributor_web" - else: - raise UnknownRepositoryType(self.repo_type) - #distributors = RepoDistributorManager().get_distributors(self.repo.id) - dist = Distributor.objects.get_or_404(repo_id=self.repo.id, - distributor_type_id=dist_type) - return dist.distributor_id - #for distributor in distributors: - # if distributor["distributor_type_id"] == dist_type: - # return distributor["distributor_id"] - - def get_repo_repo_path(self): - if "relative_url" in self.repo.notes and self.repo.notes["relative_url"]: - return self.repo.notes["relative_url"] - else: - return self.repo.id - - def get_repo_content_types(self, repo_type): - if repo_type == "rpm-repo": - return configuration.RPM_UNIT_TYPES - elif repo_type == "docker-repo": - types = configuration.DOCKER_V2_UNIT_TYPES[:] - types.extend(configuration.DOCKER_UNIT_TYPES) - return sorted(set(types)) + Returns the publish directory path for the predistributor - def get_master_dirs(self): - dirs = [] - if self.repo_type == "rpm-repo": - for x in self.get_repo_content_types(self.repo_type): - repo_relative_path = configuration.get_repo_relative_path(self.repo, self.config) - dirs.append(os.path.realpath(os.path.join(self._get_root_publish_dir(), - repo_relative_path))) - elif self.repo_type == "docker-repo": - for _type in self.get_repo_content_types(self.repo_type): - if _type in configuration.DOCKER_V2_UNIT_TYPES: - docker_version = "v2" - else: - docker_version = "v1" - master_dir = configuration.get_docker_master_dir(self.config, docker_version, self.repo) - dirs.append(master_dir) #os.path.realpath(os.path.join(self._get_root_publish_dir() - return dirs - - def get_origin_dest_prefix(self): - if self.get_config().get("cdn_rel_prefix"): - origin_dest_prefix = self.get_config().get("cdn_rel_prefix") - elif len(self.get_config().get("content_types")) >= 2: - origin_dest_prefix = os.path.join("content", "origin", "units") - else: - origin_dest_prefix = os.path.join("content", "origin", "units", self.get_config().get("content_types")[0]) - return origin_dest_prefix - - def get_original_list(self): - original_list = reduce(lambda x,y: x+y, self.original_lists.values())[:] - original_rel_prefix = common_path(original_list) - for p in range(len(original_list)): - original_list[p] = os.path.relpath(original_list[p], original_rel_prefix) - return original_list - - def get_original_prefix(self): - original_list = reduce(lambda x,y: x+y, self.original_lists.values()) - return common_path(original_list) - - def get_link_list(self, master, docker_version): - link_rel_prefix = common_path(self.link_lists[master]) - link_list = self.link_lists[master][:] - for p in range(len(link_list)): - link_list[p] = os.path.relpath(link_list[p], link_rel_prefix) - return link_list - - def get_link_prefix(self, master_dir, docker_version): - return common_path(self.link_lists[master_dir]) - - def __init__(self, repo, publish_conduit, config, distributor_type, - association_filters=None, relative_path=None, - relative_symlinks=True, **kwargs): - """ - :param repo: Pulp managed Yum repository - :type repo: pulp.plugins.model.Repository - :param publish_conduit: Conduit providing access to relative Pulp functionality - :type publish_conduit: pulp.plugins.conduits.repo_publish.RepoPublishConduit - :param config: Pulp configuration for the distributor - :type config: pulp.plugins.config.PluginCallConfiguration + :return: path to the publish directory of the predistirbutor + :rtype: str """ - - super(Publisher, self).__init__("Repository publish", repo, - publish_conduit, config, - distributor_type=distributor_type, - working_dir="",#os.path.join(repo.working_dir, - ) - - distributor = Distributor.objects.get_or_404(repo_id=self.repo.id, - distributor_id=publish_conduit.distributor_id) - self.last_published = distributor["last_publish"] - self.last_deleted = repo.last_unit_removed - self.repo_type = repo.notes["_repo-type"] - self.predistributor = Distributor.objects.get_or_404(repo_id=self.repo.id, - distributor_id=self._get_predistributor()) - if self.last_published and not isinstance(self.last_published, basestring): - string_date = dateutils.format_iso8601_datetime(self.last_published) - elif self.last_published: - string_date = self.last_published + if self.predistributor["config"].get("https", False): + return yum_config.get_https_publish_dir(self.get_config()) else: - string_date = None - - search_params = {'repo_id': repo.id, - 'distributor_id': self.predistributor["id"], - 'started' : {"$gte":string_date}} - self.predist_history = RepoPublishResult.get_collection().find(search_params) - self.remote_path = self.get_remote_repo_path() - - if not self.is_fastforward(): - date_filter = None - else: - range_criteria = {constants.START_DATE_KEYWORD: self.last_published, - constants.END_DATE_KEYWORD: self.predistributor["last_publish"]} - date_filter = self.create_date_range_filter(range_criteria) - - self.extra_path = self._get_extra_path() - self.original_lists = {} - self.link_lists = {} - remote_repo_path = configuration.get_repo_relative_path(self.repo, self.config, - docker_version="v2") - if self.repo_type == "docker-repo": - web_dir = configuration.get_docker_web_dir(self.config, "v2", self.repo) - if os.path.exists(web_dir): - self.add_child(CopyDirectoryStep(web_dir, self.get_working_dir())) - for master_dir, ctypes in zip(self.get_master_dirs(), - self.get_repo_content_types(self.repo_type)): - self.original_lists.setdefault(master_dir, []) - self.link_lists.setdefault(master_dir, []) - gen_step = RSyncFastForwardUnitPublishStep("Unit query step (%s)" % ctypes, - self.link_lists[master_dir], - self.original_lists[master_dir], - repo_type=self.repo_type, - unit_types=ctypes, - distributor_type=distributor_type, - association_filters=date_filter, - working_dir=master_dir, - relative_symlinks=relative_symlinks, - remote_repo_path=remote_repo_path) - self.add_child(gen_step) - - - origin_dest_prefix = Lazy(str, self.get_origin_dest_prefix) - self.original_list = Lazy(list, self.get_original_list) - origin_src_prefix = Lazy(str, self.get_original_prefix) - - self.add_child(RSyncPublishStep("Rsync step (origin)", self.original_list, - origin_src_prefix, origin_dest_prefix, - fast_forward=self.is_fastforward(), - distributor_type=distributor_type, - #working_dir="",#working_dir, - config=config)) - - if self.repo_type == "rpm-repo": - src_prefix = self.get_working_dir() - dest_prefix = configuration.get_repo_relative_path(self.repo, self.config) - master_dir = self.get_master_dirs()[0] - self.add_child(CopySelectedStep(master_dir, self.get_working_dir()), - globs=["repodata"]) - self.add_child(RSyncPublishStep("Rsync step (content)", - Lazy(list, lambda: self.link_lists[master_dir] + get_extra_sync_data(self.repo)), - src_prefix, dest_prefix, - fast_forward=self.is_fastforward(), - distributor_type=distributor_type, - working_dir="",#working_dir, - config=config, links=True, - delete=not self.is_fastforward())) - elif self.repo_type == "docker-repo": - src_prefix = self.get_working_dir() - for master_dir, ctypes in zip(self.get_master_dirs(), - self.get_repo_content_types(self.repo_type)): - if ctypes in configuration.DOCKER_V2_UNIT_TYPES: - docker_version = "v2" - else: - docker_version = "v1" - step = RSyncPublishStep("Rsync step (content)", - Lazy(list, functools.partial(self.get_link_list, master_dir, docker_version)), - Lazy(str, functools.partial(lambda src_p, master_d, docker_v: os.path.join(src_prefix, self.get_link_prefix(master_d, docker_v)), - src_prefix, master_dir, docker_version)), - configuration.get_repo_relative_path(self.repo, self.config, docker_version=docker_version), - fast_forward=self.is_fastforward(), - distributor_type=distributor_type, - config=config, links=True, - delete=not self.is_fastforward()) - self.add_child(step) - if get_extra_sync_data(self.repo, docker_version): - step = RSyncPublishStep("Rsync step (content)", - Lazy(list, lambda:get_extra_sync_data(self.repo, docker_version) if self.link_lists[master_dir] else []), - src_prefix, - configuration.get_repo_relative_path(self.repo, self.config, docker_version=docker_version), - fast_forward=self.is_fastforward(), - distributor_type=distributor_type, + return yum_config.get_http_publish_dir(self.get_config()) + + def _add_necesary_steps(self, date_filter=None, config=None): + """ + This method adds all the steps that are needed to accomplish an RPM rsync publish. This + includes: + + Unit Query Step - selects units associated with the repo based on the date_filter and + creates relative symlinks + Rsync Step (origin) - rsyncs units discovered in previous step to the remote server + Rsync Step (repodata) - rsyncs repodata from master publish directory to remote server + Rsync Step (content) - rsyncs symlinks from working directory to remote server + + :param date_filter: Q object with start and/or end dates, or None if start and end dates + are not provided + :type date_filter: mongoengine.Q or types.NoneType + :param config: distributor configuration + :type config: pulp.plugins.config.PluginCallConfiguration + :return: None + """ + + predistributor = self._get_predistributor() + + remote_repo_path = yum_config.get_repo_relative_path(self.repo.repo_obj, + predistributor.config) + + # Find all the units associated with repo before last publish with predistributor + unit_types = ', '.join(RPMRsyncPublisher.REPO_CONTENT_TYPES) + gen_step = RSyncFastForwardUnitPublishStep("Unit query step (%s)" % unit_types, + RPMRsyncPublisher.REPO_CONTENT_MODELS, + repo=self.repo, + repo_content_unit_q=date_filter, + remote_repo_path=remote_repo_path, + published_unit_path=[], + unit_fields=RPMRsyncPublisher.UNIT_FIELDS) + self.add_child(gen_step) + + dest_content_units_dir = self.get_units_directory_dest_path() + src_content_units_dir = self.get_units_src_path() + + # Rsync content units + self.add_child(RSyncPublishStep("Rsync step (origin)", self.content_unit_file_list, + src_content_units_dir, dest_content_units_dir, + config=config, exclude=[".*", "repodata.old"])) + + # Stop here if distributor is only supposed to publish actual content + if self.get_config().flatten().get("content_units_only"): + return + + master_dir = self.get_master_directory() + + # Rsync symlinks to the remote server + self.add_child(RSyncPublishStep("Rsync step (content)", + self.symlink_list, self.symlink_src, + remote_repo_path, + config=config, links=True, exclude=["repodata"], + delete=self.config.get("delete"))) + + repodata_file_list = os.listdir(os.path.join(master_dir, 'repodata')) + + # Only rsync repodata if distributor is configured to do so + if not self.get_config().get('skip_repodata'): + self.add_child(RSyncPublishStep("Rsync step (repodata)", + repodata_file_list, + "%s/" % os.path.join(master_dir, 'repodata'), + "%s/" % os.path.join(remote_repo_path, 'repodata'), + exclude=[".*", "repodata.old"], config=config, links=True, - delete=not self.is_fastforward()) - self.add_child(step) - - def is_fastforward(self): - skip_fast_forward = False - for entry in self.predist_history: - skip_fast_forward |= entry.get("distributor_config",{}).get("skip_fast_forward", False) - if self.last_published: - last_published = self.last_published #dateutils.parse_iso8601_datetime(self.last_published) - else: - last_published = None - return last_published\ - and ((self.last_deleted and last_published > self.last_deleted) or not self.last_deleted)\ - and not (skip_fast_forward | self.get_config().get("skip_fast_forward", False)) - + delete=self.config.get("delete"))) diff --git a/plugins/pulp_rpm/plugins/distributors/rsync/utils.py b/plugins/pulp_rpm/plugins/distributors/rsync/utils.py deleted file mode 100644 index ff4bc0853..000000000 --- a/plugins/pulp_rpm/plugins/distributors/rsync/utils.py +++ /dev/null @@ -1,99 +0,0 @@ -from gettext import gettext as _ -import glob -import os -import shutil -import subprocess - -from pulp.plugins.util.publish_step import PublishStep -from pulp.server.util import copytree - -class CopySelectedStep(PublishStep): - """ - Copy a directory from another directory - - :param source_dir: The directory to copy - :type source_dir: str - :param target_dir: Fully qualified name of the final location to copy to - :type target_dir: str - :param step_type: The id of the step, so that this step can be used with custom names. - :type step_type: str - :param delete_before_copy: Whether or not the contents of the target_dir should be cleared - before copying from source_dir - :type delete_before_copy: bool - :param preserve_symlinks: Whether or not the symlinks in the original source directory should - be copied as symlinks or as the content of the linked files. - Defaults to False. - :type preserve_symlinks: bool - """ - def __init__(self, source_dir, target_dir, step_type=None, delete_before_copy=True, - globs=["*"]): - step_type = step_type if step_type else reporting_constants.PUBLISH_STEP_TAR - super(CopySelectedStep, self).__init__(step_type) - self.source_dir = source_dir - self.target_dir = target_dir - self.delete_before_copy = delete_before_copy - self.description = _('Copying selected files') - self.globs = globs - - def process_main(self): - """ - Copy one directory to another. - """ - if self.delete_before_copy: - shutil.rmtree(self.target_dir, ignore_errors=True) - files = [] - for glob_expr in self.globs: - matched = glob.glob(os.path.join(self.source_dir, glob_expr)) - files.extend([os.path.join(self.source_dir, fn) for fn in matched]) - for f in files: - if os.path.isdir(f): - copytree(os.path.join(f), - os.path.join(self.target_dir, os.path.basename(f))) - else: - shutil.copy(f, self.target_dir) - -class Lazy(object): - overwrite = ("__str__",) - def __new__(cls, _type, caller, *args, **kwargs): - lazy = super(Lazy, cls).__new__(cls, *args, **kwargs) - lazy.caller = caller - lazy._type = _type - lazy.real_object = None - #setattr(cls, '_invoke_method', invoke_method) - - for (name, member) in inspect.getmembers(_type): - if (hasattr(cls, name) and name not in lazy.overwrite)\ - or not inspect.ismethoddescriptor(member): - continue - setattr(cls, name, lazy._partialmethod('_invoke_method', name)) - #setattr(cls, name, functools.partial(cls._invoke_method, name)) - return lazy - - def _invoke_method(self, method_name, *args, **keywords): - if not self.real_object: - self.real_object = self._type(self.caller()) - method = getattr(self.real_object, method_name) - return method(*args, **keywords) - - def _partialmethod(cls, method, *args, **kw): - def call(obj, *more_args, **more_kw): - call_kw = kw.copy() - call_kw.update(more_kw) - return getattr(obj, method)(*(args+more_args), **call_kw) - return call - -def run(cmd, stdin=None, stdout=None, stderr=None, cwd=None, env=None, bufsize=None): - ret = subprocess.Popen(cmd, stdin=stdin, stdout=stdout, stderr=stderr, env=env, - bufsize=bufsize) - for x in ("stdin", "stdout", "stderr"): - locals()[x] = subprocess.PIPE if locals()[x] else None - - ret.wait() - return (ret.returncode, ret.stdout.read()) - - -def common_path(dirs): - _common_path = os.path.commonprefix(dirs) - if not _common_path.endswith("/"): - _common_path = os.path.dirname(_common_path) - return _common_path diff --git a/plugins/pulp_rpm/plugins/error_codes.py b/plugins/pulp_rpm/plugins/error_codes.py index 3d54f7afd..bba6f3674 100644 --- a/plugins/pulp_rpm/plugins/error_codes.py +++ b/plugins/pulp_rpm/plugins/error_codes.py @@ -24,3 +24,4 @@ ['checksumtype']) RPM1009 = Error("RPM1009", _('Checksum type "%(checksumtype)s" is not supported.'), ['checksumtype']) +RPM1010 = Error("RPM1010", _('RPMRsyncDistributor requires a predistributor to be configured.'), []) diff --git a/plugins/setup.py b/plugins/setup.py index 8f7a14297..5d3c5dc0a 100755 --- a/plugins/setup.py +++ b/plugins/setup.py @@ -25,8 +25,9 @@ 'distributor = pulp_rpm.plugins.distributors.yum.distributor:entry_point', 'ExportDistributor = pulp_rpm.plugins.distributors.export_distributor.distributor:' 'entry_point', - 'IsoDistributor = pulp_rpm.plugins.distributors.iso_distributor.distributor:entry_point', - 'RsyncDistributor = pulp_rpm.plugins.distributors.rsync.distributor.distributor:entry_point' + 'IsoDistributor = pulp_rpm.plugins.distributors.iso_distributor.distributor:' + 'entry_point', + 'RsyncDistributor = pulp_rpm.plugins.distributors.rsync.distributor:entry_point' ], 'pulp.group_distributors': [ 'rpm_export = pulp_rpm.plugins.distributors.export_distributor.groupdistributor:' diff --git a/pulp-rpm.spec b/pulp-rpm.spec index 749f9fd13..3ba14da64 100644 --- a/pulp-rpm.spec +++ b/pulp-rpm.spec @@ -163,6 +163,7 @@ Requires: genisoimage Requires: m2crypto Requires: python-lxml Requires: repoview +Requires: rsync %description plugins Provides a collection of platform plugins that extend the Pulp platform