Skip to content

Commit

Permalink
Merge pull request #80 from asmacdo/pulp-to-pulp
Browse files Browse the repository at this point in the history
Publish metadata to allow pulp to pulp sync
  • Loading branch information
asmacdo committed Jun 16, 2016
2 parents c89e85b + 5dde068 commit af9f83b
Show file tree
Hide file tree
Showing 4 changed files with 164 additions and 42 deletions.
2 changes: 2 additions & 0 deletions plugins/etc/httpd/conf.d/pulp_python.conf
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,6 @@ Alias /pulp/python /var/www/pub/python/

<Directory /var/www/pub/python>
Options FollowSymLinks Indexes
DirectoryIndex index.html index.json
</Directory>

97 changes: 84 additions & 13 deletions plugins/pulp_python/plugins/distributors/steps.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
import collections
from gettext import gettext as _
import itertools
import json
import logging
import os
from xml.etree import cElementTree as ElementTree

import pkg_resources
from pulp.plugins.util.publish_step import AtomicDirectoryPublishStep, PluginStep
from pulp.server.controllers import repository as repo_controller

Expand Down Expand Up @@ -33,7 +36,7 @@ def process_main(self):
"""
Publish all the python files themselves by creating the symlinks to the storage paths.
"""
for name, packages in _get_packages(self.get_conduit().repo_id).items():
for name, packages in _get_projects(self.get_conduit().repo_id).items():
for package in packages:
relative_path = _get_package_path(name, package['filename'])
symlink_path = os.path.join(self.parent.web_working_dir, relative_path)
Expand Down Expand Up @@ -62,10 +65,11 @@ def process_main(self):
"""
# Make the simple/ directory and put the correct index.html in it
simple_path = os.path.join(self.parent.web_working_dir, 'simple')
api_path = os.path.join(self.parent.web_working_dir, 'pypi')
os.makedirs(simple_path)
simple_index_path = os.path.join(simple_path, 'index.html')

packages = _get_packages(self.get_conduit().repo_id)
projects = _get_projects(self.get_conduit().repo_id)

with open(simple_index_path, 'w') as index:
html = ElementTree.Element('html')
Expand All @@ -76,14 +80,22 @@ def process_main(self):
body = ElementTree.SubElement(html, 'body')
# For each package, we need to make a reference in index.html and also make a directory
# with its own index.html for the package
for name, packages in packages.items():
for name, packages in projects.items():
element = ElementTree.SubElement(body, 'a', {'href': name})
element.text = name
ElementTree.SubElement(body, 'br')
PublishMetadataStep._create_package_index(name, simple_path, packages)

index.write(ElementTree.tostring(html, 'utf8'))

for name, packages in projects.items():
project_metadata_path = os.path.join(api_path, name, 'json')
os.makedirs(project_metadata_path)
project_index_metadata_path = os.path.join(project_metadata_path, 'index.json')
with open(project_index_metadata_path, 'w') as meta_json:
data = PublishMetadataStep._create_metadata(name, packages)
meta_json.write(json.dumps(data))

@staticmethod
def _create_package_index(name, simple_path, packages):
"""
Expand Down Expand Up @@ -121,6 +133,55 @@ def _create_package_index(name, simple_path, packages):

package_index.write(ElementTree.tostring(html, 'utf8'))

@staticmethod
def _create_metadata(name, packages):
"""
Generate json metadata for the project and its packages.
The structure of the data is designed to mimic the json api of PyPI. The data will be
for a single Python project (eg. SciPy). The inner dictionary 'info' specifies details of
the project that should be applicable for all packages. The inner dictionary 'releases'
contains keys for each version and the value is a list of dictionaries, each representing
the metadata for a single package of that version.
More information on the PyPI API can be found here: https://wiki.python.org/moin/PyPIJSON
:param name: Name of the project
:type name: basestring
:param packages: metadata for each package of the project
:type packages: list of dicts
:return: metadata for all packages of the project
:rtype: dict
"""
info = {'name': name}
releases = collections.defaultdict(list)
# For all versions, version > None is True
latest_version = None

for package in packages:

# info dict applies to all packages in this project, populated from the latest release.
version = package['version']
parsed_version = pkg_resources.parse_version(version)
if parsed_version > latest_version:
info['author'] = package['author'],
info['summary'] = package['summary'],
latest_version = parsed_version

href = '../../../%s#%s=%s' % (_get_package_path(name, package['filename']),
package['checksum_type'], package['checksum'])

# package data is specific to an individual file
package_data = {
'filename': package['filename'],
'packagetype': package['packagetype'],
'url': href,
'md5_digest': package['md5_digest'],
}
releases[version].append(package_data)

return {'info': info, 'releases': releases}


class PythonPublisher(PluginStep):
"""
Expand Down Expand Up @@ -171,9 +232,9 @@ def _get_package_path(name, filename):
return os.path.join('packages', 'source', name[0], name, filename)


def _get_packages(repo_id):
def _get_projects(repo_id):
"""
Build and return a data structure of the available packages. The keys each index a list of
Build and return a data structure of the available projects. The keys each index a list of
dictionaries. The inner dictionaries are of the form
{'version': VERSION, 'filename': FILENAME, 'checksum': MD5SUM, 'checksum_type': TYPE,
'storage_path': PATH}
Expand All @@ -183,15 +244,25 @@ def _get_packages(repo_id):
:return: A dictionary of all the packages in the repo to be published
:rtype: dict
"""
fields = ('filename', 'url', 'packagetype', 'md5_digest', '_checksum', '_checksum_type',
'version', 'name', 'author', '_storage_path')
packages = {}
fields = ('version', 'filename', '_checksum', '_checksum_type', 'name', '_storage_path')
unit_querysets = repo_controller.get_unit_model_querysets(repo_id, models.Package)
unit_querysets = (q.only(*fields) for q in unit_querysets)
for p in itertools.chain(*unit_querysets):
packages.setdefault(p.name, []).append(
{'version': p.version,
'filename': p.filename,
'checksum': p._checksum,
'checksum_type': p._checksum_type,
'storage_path': p.storage_path})

for pac in itertools.chain(*unit_querysets):
packages.setdefault(pac.name, []).append({
'filename': pac.filename,
'name': pac.name,
'url': pac.url,
'packagetype': pac.packagetype,
'md5_digest': pac.md5_digest,
'checksum_type': pac._checksum_type,
'version': pac.version,
'author': pac.author,
'summary': pac.summary,
'checksum': pac._checksum,
'storage_path': pac.storage_path,
})

return packages
2 changes: 1 addition & 1 deletion plugins/pulp_python/plugins/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ class Package(FileContentUnit):
:type name: basestring
:ivar packagetype: format of python package, ex bdist_wheel, sdist
:type packagetype: basestring
:ivar url: url that the package can be downloaded from
:ivar url: url that is the source of bits for this package
:type url: basestring
:ivar version: Contains the distribution's version number. This field must be in the format
specified in PEP 386.
Expand Down
105 changes: 77 additions & 28 deletions plugins/test/unit/plugins/distributors/test_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,40 +14,86 @@


_PACKAGES = [
models.Package(name='nectar', version='1.2.0', filename='nectar-1.2.0.tar.gz',
_checksum='abcde', _checksum_type='made_up',
_storage_path='/path/to/nectar-1.2.0.tar.gz'),
models.Package(name='nectar', version='1.3.1', filename='nectar-1.3.1.tar.gz',
_checksum='fghij', _checksum_type='made_up',
_storage_path='/path/to/nectar-1.3.1.tar.gz'),
models.Package(name='pulp_python_plugins', version='0.0.0',
filename='pulp_python_plugins-0.0.0.tar.gz',
_checksum='klmno', _checksum_type='made_up',
_storage_path='/path/to/pulp_python_plugins-0.0.0.tar.gz'),
models.Package(
name='nectar',
packagetype='sdist',
version='1.2.0',
author='me',
summary='does stuff',
md5_digest='abcde',
filename='nectar-1.2.0.tar.gz',
_checksum='abcde',
_checksum_type='made_up',
url='some/url',
_storage_path='/path/to/nectar-1.2.0.tar.gz'
),
models.Package(
name='nectar',
packagetype='sdist',
version='1.3.1',
summary='does stuff',
author='me',
filename='nectar-1.3.1.tar.gz',
md5_digest='fghij',
_checksum='fghij',
_checksum_type='made_up',
url='some/url',
_storage_path='/path/to/nectar-1.3.1.tar.gz'),
models.Package(
name='pulp_python_plugins',
packagetype='sdist',
summary='does stuff',
author='me',
version='0.0.0',
filename='pulp_python_plugins-0.0.0.tar.gz',
md5_digest='klmno',
_checksum='klmno',
_checksum_type='made_up',
url='some/url',
_storage_path='/path/to/pulp_python_plugins-0.0.0.tar.gz'
),
]


_GET_PACKAGES_RETURN = {
_GET_PROJECTS_RETURN = {
'nectar': [
{
'name': 'nectar',
'url': 'some/url',
'packagetype': 'sdist',
'summary': 'does stuff',
'author': 'me',
'version': '1.2.0',
'filename': 'nectar-1.2.0.tar.gz',
'md5_digest': 'abcde',
'checksum': 'abcde',
'checksum_type': 'made_up',
'storage_path': '/path/to/nectar-1.2.0.tar.gz',
},
{
'name': 'nectar',
'url': 'some/url',
'packagetype': 'sdist',
'summary': 'does stuff',
'author': 'me',
'version': '1.3.1',
'filename': 'nectar-1.3.1.tar.gz',
'md5_digest': 'fghij',
'checksum': 'fghij',
'checksum_type': 'made_up',
'storage_path': '/path/to/nectar-1.3.1.tar.gz',
},
],
'pulp_python_plugins': [
{
'name': 'pulp_python_plugins',
'url': 'some/url',
'packagetype': 'sdist',
'summary': 'does stuff',
'author': 'me',
'version': '0.0.0',
'filename': 'pulp_python_plugins-0.0.0.tar.gz',
'md5_digest': 'klmno',
'checksum': 'klmno',
'checksum_type': 'made_up',
'storage_path': '/path/to/pulp_python_plugins-0.0.0.tar.gz',
Expand All @@ -72,11 +118,11 @@ def test___init__(self, super___init__):
self.assertEqual(step.redirect_context, None)
self.assertEqual(step.description, _('Publishing Python Content.'))

@mock.patch('pulp_python.plugins.distributors.steps._get_packages', spec_set=True)
@mock.patch('pulp_python.plugins.distributors.steps._get_projects', spec_set=True)
@mock.patch('pulp_python.plugins.distributors.steps.os.makedirs')
@mock.patch('pulp_python.plugins.distributors.steps.os.path.exists')
@mock.patch('pulp_python.plugins.distributors.steps.os.symlink')
def test_process_main(self, symlink, exists, makedirs, mock_get_packages):
def test_process_main(self, symlink, exists, makedirs, mock_get_projects):
"""
Assert correct operation from the process_main() method with our _GET_UNITS_RETURN data.
"""
Expand All @@ -95,7 +141,7 @@ def mock_exists(path):
exists.side_effect = mock_exists

step = steps.PublishContentStep()
mock_get_packages.return_value = _GET_PACKAGES_RETURN
mock_get_projects.return_value = _GET_PROJECTS_RETURN
conduit = mock.MagicMock()
step.get_conduit = mock.MagicMock(return_value=conduit)
step.parent = mock.MagicMock()
Expand All @@ -104,7 +150,7 @@ def mock_exists(path):
step.process_main()

step.get_conduit.assert_called_once_with()
mock_get_packages.assert_called_once_with(conduit.repo_id)
mock_get_projects.assert_called_once_with(conduit.repo_id)
# os.path.exists should have been called once for each Unit. It also gets called for a lot
# of locale stuff, so we'll need to filter those out.
pulp_exists_calls = [c for c in exists.mock_calls if 'locale' not in c[1][0]]
Expand Down Expand Up @@ -145,16 +191,16 @@ def test___init__(self, super___init__):
self.assertEqual(step.description, _('Publishing Python Metadata.'))

@mock.patch('__builtin__.open', autospec=True)
@mock.patch('pulp_python.plugins.distributors.steps._get_packages', spec_set=True)
@mock.patch('pulp_python.plugins.distributors.steps._get_projects', spec_set=True)
@mock.patch('pulp_python.plugins.distributors.steps.os.makedirs')
@mock.patch('pulp_python.plugins.distributors.steps.PublishMetadataStep._create_package_index')
def test_process_main(self, _create_package_index, makedirs, mock_get_packages, mock_open):
def test_process_main(self, _create_package_index, makedirs, mock_get_projects, mock_open):
"""
Assert all the correct calls from process_main().
"""
step = steps.PublishMetadataStep()
conduit = mock.MagicMock()
mock_get_packages.return_value = _GET_PACKAGES_RETURN
mock_get_projects.return_value = _GET_PROJECTS_RETURN
step.get_conduit = mock.MagicMock(return_value=conduit)
step.parent = mock.MagicMock()
step.parent.web_working_dir = '/some/path/'
Expand All @@ -163,14 +209,17 @@ def test_process_main(self, _create_package_index, makedirs, mock_get_packages,

# Assert correct usage of various mocked items
step.get_conduit.assert_called_once_with()
mock_get_packages.assert_called_once_with(conduit.repo_id)
makedirs.assert_called_once_with(os.path.join(step.parent.web_working_dir, 'simple'))
mock_open.assert_called_once_with(
os.path.join(step.parent.web_working_dir, 'simple', 'index.html'), 'w')
mock_get_projects.assert_called_once_with(conduit.repo_id)
makedirs.assert_has_calls([
mock.call(os.path.join(step.parent.web_working_dir, 'simple')),
mock.call(os.path.join(step.parent.web_working_dir, 'pypi', 'pulp_python_plugins',
'json')),
mock.call(os.path.join(step.parent.web_working_dir, 'pypi', 'nectar', 'json')),
])

# Assert that the two calls to _create_package_index for each package name are correct
self.assertEqual(_create_package_index.call_count, 2)
expected_packages_by_name = steps._get_packages(conduit)
expected_packages_by_name = steps._get_projects(conduit)
for call in _create_package_index.mock_calls:
expected_packages = expected_packages_by_name[call[1][0]]
self.assertEqual(call[1][1], os.path.join(step.parent.web_working_dir, 'simple'))
Expand Down Expand Up @@ -357,19 +406,19 @@ def test__get_package_path(self):

class TestGetPackages(unittest.TestCase):
"""
This class contains tests for the _get_packages() function.
This class contains tests for the _get_projects() function.
"""
@mock.patch('pulp.server.controllers.repository.get_unit_model_querysets', spec_set=True)
def test__get_packages(self, mock_get_querysets):
def test__get_projects(self, mock_get_querysets):
"""
Assert the correct return value from _get_packages() with the _GET_UNITS_RETURN data set.
Assert the correct return value from _get_projects() with the _GET_UNITS_RETURN data set.
"""
qs = mock.MagicMock()
qs.only.return_value = _PACKAGES
mock_get_querysets.return_value = [qs]

packages = steps._get_packages('repo1')
packages = steps._get_projects('repo1')

expected_packages = _GET_PACKAGES_RETURN
expected_packages = _GET_PROJECTS_RETURN

self.assertEqual(packages, expected_packages)

0 comments on commit af9f83b

Please sign in to comment.