Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PyPI live json API #306

Merged
merged 1 commit into from
Sep 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/2886.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implemented PyPi's json API at content endpoint '/pypi/{package-name}/json'. Pulp can now perform basic syncing on other Pulp Python instances.
1 change: 1 addition & 0 deletions CHANGES/3627.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Sync now includes python package's classifiers in the content unit
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 2.2.3 on 2020-08-12 22:25

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('python', '0001_initial'),
]

operations = [
migrations.AddField(
model_name='pythonpackagecontent',
name='python_version',
field=models.TextField(default=''),
preserve_default=False,
),
]
36 changes: 36 additions & 0 deletions pulp_python/app/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from logging import getLogger

from aiohttp.web import json_response
from django.contrib.postgres.fields import JSONField
from django.db import models

Expand All @@ -11,6 +12,9 @@
Repository
)

from pathlib import PurePath
from .utils import python_content_to_json, PYPI_LAST_SERIAL, PYPI_SERIAL_CONSTANT

log = getLogger(__name__)


Expand All @@ -33,6 +37,37 @@ class PythonDistribution(PublicationDistribution):

TYPE = 'python'

def content_handler(self, path):
"""
Copy link
Contributor

@dralley dralley Sep 4, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Someone more familiar with the content app / live APIs should take a look at this also, maybe Ina or Dennis.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the correct use of the content_handler() interface. I read about how PurePath.match() works and this looks like a good use of it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually i was wondering, why the content_handler wasn't using an async interface, but it is declared like that in pulpcore. Not much you can do about it here.

Handler to serve extra, non-Artifact content for this Distribution

Args:
path (str): The path being requested
Returns:
None if there is no content to be served at path. Otherwise a
aiohttp.web_response.Response with the content.
"""
path = PurePath(path)
name = None
version = None
if path.match("pypi/*/*/json"):
version = path.parts[2]
name = path.parts[1]
elif path.match("pypi/*/json"):
name = path.parts[1]
if name:
package_content = PythonPackageContent.objects.filter(
pk__in=self.publication.repository_version.content,
name__iexact=name
)
# TODO Change this value to the Repo's serial value when implemented
headers = {PYPI_LAST_SERIAL: str(PYPI_SERIAL_CONSTANT)}
json_body = python_content_to_json(self.base_path, package_content, version=version)
if json_body:
return json_response(json_body, headers=headers)

return None

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"

Expand All @@ -54,6 +89,7 @@ class PythonPackageContent(Content):
name = models.TextField()
version = models.TextField()
# Optional metadata
python_version = models.TextField()
metadata_version = models.TextField()
summary = models.TextField()
description = models.TextField()
Expand Down
134 changes: 134 additions & 0 deletions pulp_python/app/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
import json
from collections import defaultdict
from django.conf import settings
from packaging.version import parse

PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL"
"""TODO This serial constant is temporary until Python repositories implements serials"""
PYPI_SERIAL_CONSTANT = 1000000000
Copy link
Member

@mdellweg mdellweg Sep 4, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe i am missing the point, but hardcoding a serial number feels wrong.

edit: I saw the corresponding comment in that other file. Maybe add something here, too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a comment for that, thanks.



def parse_project_metadata(project):
Expand Down Expand Up @@ -33,6 +40,7 @@ def parse_project_metadata(project):
package['provides_dist'] = json.dumps(project.get('provides_dist', []))
package['obsoletes_dist'] = json.dumps(project.get('obsoletes_dist', []))
package['requires_external'] = json.dumps(project.get('requires_external', []))
package['classifiers'] = json.dumps(project.get('classifiers', []))

return package

Expand Down Expand Up @@ -60,7 +68,133 @@ def parse_metadata(project, version, distribution):
package['version'] = version
package['url'] = distribution.get('url') or ""
package['sha256_digest'] = distribution.get('digests', {}).get('sha256') or ""
package['python_version'] = distribution.get('python_version') or ""

package.update(parse_project_metadata(project))

return package


def python_content_to_json(base_path, content_query, version=None):
"""
Converts a QuerySet of PythonPackageContent into the PyPi JSON format
https://www.python.org/dev/peps/pep-0566/
JSON metadata has:
info: Dict
last_serial: int
releases: Dict
urls: Dict

Returns None if version is specified but not found within content_query
"""
full_metadata = {"last_serial": 0} # For now the serial field isn't supported by Pulp
latest_content = latest_content_version(content_query, version)
if not latest_content:
return None
full_metadata.update({"info": python_content_to_info(latest_content[0])})
full_metadata.update({"releases": python_content_to_releases(content_query, base_path)})
full_metadata.update({"urls": python_content_to_urls(latest_content, base_path)})
return full_metadata


def latest_content_version(content_query, version):
"""
Walks through the content QuerySet and finds the instances that is the latest version.
If 'version' is specified, the function instead tries to find content instances
with that version and will return an empty list if nothing is found
"""
latest_version = version
latest_content = []
for content in content_query:
if version and parse(version) == parse(content.version):
latest_content.append(content)
elif not latest_version or parse(content.version) > parse(latest_version):
latest_content = [content]
latest_version = content.version
elif parse(content.version) == parse(latest_version):
latest_content.append(content)

return latest_content


def python_content_to_info(latest_content):
"""
Takes in a PythonPackageContent instance and returns a dictionary of the Info fields
"""
return {
"name": latest_content.name,
"version": latest_content.version,
"summary": latest_content.summary or None,
"description": latest_content.description or None,
"keywords": latest_content.keywords or None,
"home_page": latest_content.home_page or None,
"downloads": {"last_day": -1, "last_month": -1, "last_week": -1},
"download_url": latest_content.download_url or None,
"author": latest_content.author or None,
"author_email": latest_content.author_email or None,
"maintainer": latest_content.maintainer or None,
"maintainer_email": latest_content.maintainer_email or None,
"license": latest_content.license or None,
"requires_python": latest_content.requires_python or None,
"project_url": latest_content.project_url or None,
"platform": latest_content.platform or None,
"requires_dist": json.loads(latest_content.requires_dist) or None,
"classifiers": json.loads(latest_content.classifiers) or None,
}
# fields missing: bugtrack_url, description_content_type, docs_url, package_url,
# project_urls {Download, Homepage}, release_url, yanked, yanked_reason


def python_content_to_releases(content_query, base_path):
"""
Takes a QuerySet of PythonPackageContent and returns a dictionary of releases
with each key being a version and value being a list of content for that version of the package
"""
releases = defaultdict(lambda: [])
for content in content_query:
releases[content.version].append(python_content_to_download_info(content, base_path))
return releases


def python_content_to_urls(contents, base_path):
"""
Takes the latest content in contents and returns a list of download information
"""
return [python_content_to_download_info(content, base_path) for content in contents]


def python_content_to_download_info(content, base_path):
"""
Takes in a PythonPackageContent and base path of the distribution to create a dictionary of
download information for that content. This dictionary is used by Releases and Urls.
"""
def find_artifact():
_art = content_artifact.artifact
if not _art:
from pulpcore.plugin import models
_art = models.RemoteArtifact.objects.filter(content_artifact=content_artifact).first()
return _art

content_artifact = content.contentartifact_set.first()
artifact = find_artifact()
origin = settings.CONTENT_ORIGIN.strip("/")
prefix = settings.CONTENT_PATH_PREFIX.strip("/")
base_path = base_path.strip("/")
url = "/".join((origin, prefix, base_path, content.filename))
return {
"comment_text": "",
"digests": {"md5": artifact.md5, "sha256": artifact.sha256},
"downloads": -1,
"filename": content.filename,
"has_sig": False,
"md5_digest": artifact.md5,
"packagetype": content.packagetype,
"python_version": content.python_version,
"requires_python": content.requires_python or None,
"size": artifact.size,
"upload_time": str(artifact.pulp_created),
"upload_time_iso_8601": str(artifact.pulp_created.isoformat()),
"url": url,
"yanked": False,
"yanked_reason": None
}
Loading