Skip to content

Commit

Permalink
Implemented PyPi json API
Browse files Browse the repository at this point in the history
Pulp now supports PyPi's json API with the new content endpoint '/pypi/{package-name}/json'.  This will enable basic syncing from other Pulp instances.  Also package classifiers are now included in Python content.

fixes: #2886
https://pulp.plan.io/issues/2886
fixes: #3627
https://pulp.plan.io/issues/3627
  • Loading branch information
gerrod3 committed Aug 13, 2020
1 parent 7277344 commit 819af76
Show file tree
Hide file tree
Showing 7 changed files with 438 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGES/2886.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implemented PyPi's json API at content endpoint '/pypi/{package-name}/json'. Pulp can now perform basic syncing on other Pulp Python instances.
1 change: 1 addition & 0 deletions CHANGES/3627.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Sync now includes python package's classifiers in the content unit
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 2.2.3 on 2020-08-12 22:25

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('python', '0001_initial'),
]

operations = [
migrations.AddField(
model_name='pythonpackagecontent',
name='python_version',
field=models.TextField(default=''),
preserve_default=False,
),
]
40 changes: 40 additions & 0 deletions pulp_python/app/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from logging import getLogger

from aiohttp.web import json_response
from django.contrib.postgres.fields import JSONField
from django.db import models

Expand All @@ -11,6 +12,9 @@
Repository
)

from pathlib import PurePath
from .utils import python_content_to_json, PYPI_LAST_SERIAL, PYPI_SERIAL_CONSTANT

log = getLogger(__name__)


Expand All @@ -33,6 +37,41 @@ class PythonDistribution(PublicationDistribution):

TYPE = 'python'

def content_handler(self, path):
"""
Handler to serve extra, non-Artifact content for this Distribution
Args:
path (str): The path being requested
Returns:
None if there is no content to be served at path. Otherwise a
aiohttp.web_response.Response with the content.
"""
path = PurePath(path)
name = None
version = None
if path.match("pypi/*/*/json"):
version = path.parts[2]
name = path.parts[1]
elif path.match("pypi/*/json"):
name = path.parts[1]
if name:
try:
package_content = PythonPackageContent.objects.filter(
pk__in=self.publication.repository_version.content,
name__iexact=name
)
# TODO Change this value to the Repo's serial value when implemented
headers = {PYPI_LAST_SERIAL: str(PYPI_SERIAL_CONSTANT)}
return json_response(python_content_to_json(self.base_path,
package_content,
version=version),
headers=headers)
finally:
pass

return None

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"

Expand All @@ -54,6 +93,7 @@ class PythonPackageContent(Content):
name = models.TextField()
version = models.TextField()
# Optional metadata
python_version = models.TextField()
metadata_version = models.TextField()
summary = models.TextField()
description = models.TextField()
Expand Down
145 changes: 145 additions & 0 deletions pulp_python/app/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import json
from django.conf import settings
from packaging.version import parse

PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
PYPI_SERIAL_CONSTANT = 1000000000

def parse_project_metadata(project):
"""
Expand Down Expand Up @@ -33,6 +37,7 @@ def parse_project_metadata(project):
package['provides_dist'] = json.dumps(project.get('provides_dist', []))
package['obsoletes_dist'] = json.dumps(project.get('obsoletes_dist', []))
package['requires_external'] = json.dumps(project.get('requires_external', []))
package['classifiers'] = json.dumps(project.get('classifiers', []))

return package

Expand Down Expand Up @@ -60,7 +65,147 @@ def parse_metadata(project, version, distribution):
package['version'] = version
package['url'] = distribution.get('url') or ""
package['sha256_digest'] = distribution.get('digests', {}).get('sha256') or ""
package['python_version'] = distribution.get('python_version') or ""

package.update(parse_project_metadata(project))

return package


def python_content_to_json(base_path, content_query, version=None):
"""
Converts a QuerySet of PythonPackageContent into the PyPi JSON format
* Find the PEP *
JSON metadata has:
info: Dict
last_serial: int
releases: Dict
urls: Dict
"""
full_metadata = {"last_serial": 0} # For now the serial field isn't supported by Pulp
latest_content = latest_content_version(content_query, version)
full_metadata.update({"info": python_content_to_info(latest_content[0])})
full_metadata.update({"releases": python_content_to_releases(content_query, base_path)})
full_metadata.update({"urls": python_content_to_urls(latest_content, base_path)})
return full_metadata


def latest_content_version(content_query, version):
"""
Walks through the content QuerySet and finds the instances that is the latest version.
If 'version' is specified, the function instead tries to find content instances
with that version and raises an exception if not found
"""
latest_version = version
latest_content = []
for content in content_query:
if version and parse(version) == parse(content.version):
latest_content.append(content)
elif not latest_version or parse(content.version) > parse(latest_version):
latest_content = [content]
latest_version = content.version
elif parse(content.version) == parse(latest_version):
latest_content.append(content)

if version and not latest_content:
raise Exception()
return latest_content


def python_content_to_info(latest_content):
"""
Takes in a PythonPackageContent instance and returns a dictionary of the Info fields
"""
# Would have been nice to use a serializer,
# but I couldn't figure out how to pass in the request context
info = dict(latest_content.__dict__)
fields_to_remove = ["_state",
"pulp_id",
"pulp_created",
"pulp_last_updated",
"pulp_type",
"upstream_id",
"content_ptr_id",
"filename", # These three are for releases
"packagetype",
"python_version",
"metadata_version", # I don't see these on PyPi infos
"supported_platform",
]
for field in fields_to_remove:
info.pop(field, None)
fields_in_json = [
"requires_dist",
"provides_dist", # I haven't seen these three on any PyPi infos
"obsoletes_dist",
"requires_external",
"classifiers" # This tends to be empty
]
# Loop through and set any empty string to None
# and set any JSON field to a python equivalent
for field in info.keys():
if field in fields_in_json:
info[field] = json.loads(info[field])
elif not info[field]:
info[field] = None
# this is deprecated so safe to set to -1
info.update({"downloads": {"last_day": -1, "last_month": -1, "last_week": -1}})
# fields missing: bugtrack_url, description_content_type, docs_url, package_url,
# project_urls {Download, Homepage}, release_url, yanked, yanked_reason
return info


def python_content_to_releases(content_query, base_path):
"""
Takes a QuerySet of PythonPackageContent and returns a dictionary of releases
with each key being a version and value being a list of content for that version of the package
"""
releases = {}
for content in content_query:
list_packagetypes = releases.setdefault(content.version, [])
list_packagetypes.append(python_content_to_download_info(content, base_path))
return releases


def python_content_to_urls(contents, base_path):
"""
Takes the latest contents in contents and returns a list of download information
"""
return [python_content_to_download_info(content, base_path) for content in contents]


def python_content_to_download_info(content, base_path):
"""
Takes in a PythonPackageContent and base path of the distribution to create a dictionary of
download information for that content. This dictionary is used by Releases and Urls.
"""
def find_artifact():
_art = content_artifact.artifact
if not _art:
from pulpcore.plugin import models
_art = models.RemoteArtifact.objects.filter(content_artifact=content_artifact).first()
return _art

content_artifact = content.contentartifact_set.first()
artifact = find_artifact()
origin = settings.CONTENT_ORIGIN.strip("/")
prefix = settings.CONTENT_PATH_PREFIX.strip("/")
base_path = base_path.strip("/")
url = "/".join((origin, prefix, base_path, content.filename))
return {
"comment_text": "",
"digests": {"md5": artifact.md5, "sha256": artifact.sha256},
"downloads": -1,
"filename": content.filename,
"has_sig": False,
"md5_digest": artifact.md5,
"packagetype": content.packagetype,
"python_version": content.python_version,
"requires_python": content.requires_python or None,
"size": artifact.size,
"upload_time": str(artifact.pulp_created),
"upload_time_iso_8601": str(artifact.pulp_created.isoformat()),
"url": url,
"yanked": False,
"yanked_reason": None
}
Loading

0 comments on commit 819af76

Please sign in to comment.