-
Notifications
You must be signed in to change notification settings - Fork 74
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
PyPI live json API #306
PyPI live json API #306
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Implemented PyPi's json API at content endpoint '/pypi/{package-name}/json'. Pulp can now perform basic syncing on other Pulp Python instances. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Sync now includes python package's classifiers in the content unit |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# Generated by Django 2.2.3 on 2020-08-12 22:25 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('python', '0001_initial'), | ||
] | ||
|
||
operations = [ | ||
migrations.AddField( | ||
model_name='pythonpackagecontent', | ||
name='python_version', | ||
field=models.TextField(default=''), | ||
preserve_default=False, | ||
), | ||
] |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,11 @@ | ||
import json | ||
from collections import defaultdict | ||
from django.conf import settings | ||
from packaging.version import parse | ||
|
||
PYPI_LAST_SERIAL = "X-PYPI-LAST-SERIAL" | ||
"""TODO This serial constant is temporary until Python repositories implements serials""" | ||
PYPI_SERIAL_CONSTANT = 1000000000 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe i am missing the point, but hardcoding a serial number feels wrong. edit: I saw the corresponding comment in that other file. Maybe add something here, too? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment for that, thanks. |
||
|
||
|
||
def parse_project_metadata(project): | ||
|
@@ -33,6 +40,7 @@ def parse_project_metadata(project): | |
package['provides_dist'] = json.dumps(project.get('provides_dist', [])) | ||
package['obsoletes_dist'] = json.dumps(project.get('obsoletes_dist', [])) | ||
package['requires_external'] = json.dumps(project.get('requires_external', [])) | ||
package['classifiers'] = json.dumps(project.get('classifiers', [])) | ||
|
||
return package | ||
|
||
|
@@ -60,7 +68,133 @@ def parse_metadata(project, version, distribution): | |
package['version'] = version | ||
package['url'] = distribution.get('url') or "" | ||
package['sha256_digest'] = distribution.get('digests', {}).get('sha256') or "" | ||
package['python_version'] = distribution.get('python_version') or "" | ||
|
||
package.update(parse_project_metadata(project)) | ||
|
||
return package | ||
|
||
|
||
def python_content_to_json(base_path, content_query, version=None): | ||
""" | ||
Converts a QuerySet of PythonPackageContent into the PyPi JSON format | ||
https://www.python.org/dev/peps/pep-0566/ | ||
JSON metadata has: | ||
info: Dict | ||
last_serial: int | ||
releases: Dict | ||
urls: Dict | ||
|
||
Returns None if version is specified but not found within content_query | ||
""" | ||
full_metadata = {"last_serial": 0} # For now the serial field isn't supported by Pulp | ||
latest_content = latest_content_version(content_query, version) | ||
if not latest_content: | ||
return None | ||
full_metadata.update({"info": python_content_to_info(latest_content[0])}) | ||
full_metadata.update({"releases": python_content_to_releases(content_query, base_path)}) | ||
full_metadata.update({"urls": python_content_to_urls(latest_content, base_path)}) | ||
return full_metadata | ||
|
||
|
||
def latest_content_version(content_query, version): | ||
""" | ||
Walks through the content QuerySet and finds the instances that is the latest version. | ||
If 'version' is specified, the function instead tries to find content instances | ||
with that version and will return an empty list if nothing is found | ||
""" | ||
latest_version = version | ||
latest_content = [] | ||
for content in content_query: | ||
if version and parse(version) == parse(content.version): | ||
latest_content.append(content) | ||
elif not latest_version or parse(content.version) > parse(latest_version): | ||
latest_content = [content] | ||
latest_version = content.version | ||
elif parse(content.version) == parse(latest_version): | ||
latest_content.append(content) | ||
|
||
return latest_content | ||
|
||
|
||
def python_content_to_info(latest_content): | ||
""" | ||
Takes in a PythonPackageContent instance and returns a dictionary of the Info fields | ||
""" | ||
return { | ||
"name": latest_content.name, | ||
"version": latest_content.version, | ||
"summary": latest_content.summary or None, | ||
"description": latest_content.description or None, | ||
"keywords": latest_content.keywords or None, | ||
"home_page": latest_content.home_page or None, | ||
"downloads": {"last_day": -1, "last_month": -1, "last_week": -1}, | ||
"download_url": latest_content.download_url or None, | ||
"author": latest_content.author or None, | ||
"author_email": latest_content.author_email or None, | ||
"maintainer": latest_content.maintainer or None, | ||
"maintainer_email": latest_content.maintainer_email or None, | ||
"license": latest_content.license or None, | ||
"requires_python": latest_content.requires_python or None, | ||
"project_url": latest_content.project_url or None, | ||
"platform": latest_content.platform or None, | ||
"requires_dist": json.loads(latest_content.requires_dist) or None, | ||
"classifiers": json.loads(latest_content.classifiers) or None, | ||
} | ||
# fields missing: bugtrack_url, description_content_type, docs_url, package_url, | ||
# project_urls {Download, Homepage}, release_url, yanked, yanked_reason | ||
|
||
|
||
def python_content_to_releases(content_query, base_path): | ||
""" | ||
Takes a QuerySet of PythonPackageContent and returns a dictionary of releases | ||
with each key being a version and value being a list of content for that version of the package | ||
""" | ||
releases = defaultdict(lambda: []) | ||
for content in content_query: | ||
releases[content.version].append(python_content_to_download_info(content, base_path)) | ||
return releases | ||
|
||
|
||
def python_content_to_urls(contents, base_path): | ||
""" | ||
Takes the latest content in contents and returns a list of download information | ||
""" | ||
return [python_content_to_download_info(content, base_path) for content in contents] | ||
|
||
|
||
def python_content_to_download_info(content, base_path): | ||
""" | ||
Takes in a PythonPackageContent and base path of the distribution to create a dictionary of | ||
download information for that content. This dictionary is used by Releases and Urls. | ||
""" | ||
def find_artifact(): | ||
_art = content_artifact.artifact | ||
if not _art: | ||
from pulpcore.plugin import models | ||
_art = models.RemoteArtifact.objects.filter(content_artifact=content_artifact).first() | ||
return _art | ||
|
||
content_artifact = content.contentartifact_set.first() | ||
artifact = find_artifact() | ||
origin = settings.CONTENT_ORIGIN.strip("/") | ||
prefix = settings.CONTENT_PATH_PREFIX.strip("/") | ||
base_path = base_path.strip("/") | ||
url = "/".join((origin, prefix, base_path, content.filename)) | ||
return { | ||
"comment_text": "", | ||
"digests": {"md5": artifact.md5, "sha256": artifact.sha256}, | ||
"downloads": -1, | ||
"filename": content.filename, | ||
"has_sig": False, | ||
"md5_digest": artifact.md5, | ||
"packagetype": content.packagetype, | ||
"python_version": content.python_version, | ||
"requires_python": content.requires_python or None, | ||
"size": artifact.size, | ||
"upload_time": str(artifact.pulp_created), | ||
"upload_time_iso_8601": str(artifact.pulp_created.isoformat()), | ||
"url": url, | ||
"yanked": False, | ||
"yanked_reason": None | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Someone more familiar with the content app / live APIs should take a look at this also, maybe Ina or Dennis.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is the correct use of the content_handler() interface. I read about how PurePath.match() works and this looks like a good use of it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
actually i was wondering, why the
content_handler
wasn't using anasync
interface, but it is declared like that in pulpcore. Not much you can do about it here.