diff --git a/docs/workflows/upload-publish.rst b/docs/workflows/upload-publish.rst index e5884e76515..9b2aa580b5d 100644 --- a/docs/workflows/upload-publish.rst +++ b/docs/workflows/upload-publish.rst @@ -1,2 +1,35 @@ Upload and Publish ================== + +Chunked Uploads +--------------- + +For large file uploads, Pulp supports uploading files in chunks. To begin uploading a file in +chunks, an initial PUT request must be sent to the ``/pulp/api/v3/uploads`` endpoint:: + + http --form PUT :8000/pulp/api/v3/uploads/ file@./chunkaa 'Content-Range:bytes 0-6291455/32095676' + +This returns an upload href (e.g. ``/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/``) that can +be used for subsequent chunks:: + + http --form PUT :8000/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/ file@./chunkbb 'Content-Range:bytes 6291456-10485759/32095676' + +Once all chunks have been uploaded, a final POST request with the file md5 can be sent to complete the +upload:: + + http POST :8000/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/ md5=037a47d93670e64f2b1038e6f90e4cfd + +Then the artifact may be created with the upload href:: + + http POST :8000/pulp/api/v3/artifacts/ upload=/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/ + +Note that after creating an artifact from an upload, the upload gets deleted and cannot be re-used. + +Putting this altogether, here is an example that uploads a 1.iso file in two chunks:: + + curl -O https://repos.fedorapeople.org/repos/pulp/pulp/fixtures/file-large/1.iso + split --bytes=6M 1.iso chunk + export UPLOAD=$(http --form PUT :8000/pulp/api/v3/uploads/ file@./chunkaa 'Content-Range:bytes 0-6291455/32095676' | jq -r '._href') + http --form PUT :8000$UPLOAD file@./chunkab 'Content-Range:bytes 6291456-10485759/32095676' + http POST :8000$UPLOAD md5=037a47d93670e64f2b1038e6f90e4cfd + http POST :8000/pulp/api/v3/artifacts/ upload=$UPLOAD diff --git a/pulpcore/app/files.py b/pulpcore/app/files.py index 9e487e7b429..684f7597a1a 100644 --- a/pulpcore/app/files.py +++ b/pulpcore/app/files.py @@ -1,4 +1,5 @@ import hashlib +import os from django.core.files.uploadhandler import TemporaryFileUploadHandler from django.core.files.uploadedfile import TemporaryUploadedFile @@ -14,6 +15,25 @@ def __init__(self, name, content_type, size, charset, content_type_extra=None): self.hashers[hasher] = getattr(hashlib, hasher)() super().__init__(name, content_type, size, charset, content_type_extra) + @classmethod + def from_file(cls, file): + """ + Create a PulpTemporaryUploadedFile from a file system file + + Args: + file (File): a filesystem file + + Returns: + PulpTemporaryUploadedFile: instantiated instance from file + """ + name = os.path.basename(file.name) + instance = cls(name, '', file.size, '', '') + instance.file = file + data = file.read() + for hasher in hashlib.algorithms_guaranteed: + instance.hashers[hasher].update(data) + return instance + class HashingFileUploadHandler(TemporaryFileUploadHandler): """ diff --git a/pulpcore/app/migrations/0004_upload.py b/pulpcore/app/migrations/0004_upload.py new file mode 100644 index 00000000000..c8f30b4cbd2 --- /dev/null +++ b/pulpcore/app/migrations/0004_upload.py @@ -0,0 +1,34 @@ +# Generated by Django 2.1.7 on 2019-02-28 11:57 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion +import drf_chunked_upload.models +import uuid + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ('pulp_app', '0003_repositoryversioncontentdetails'), + ] + + operations = [ + migrations.CreateModel( + name='Upload', + fields=[ + ('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)), + ('file', models.FileField(max_length=255, null=True, upload_to=drf_chunked_upload.models.generate_filename)), + ('filename', models.CharField(max_length=255)), + ('offset', models.BigIntegerField(default=0)), + ('created_at', models.DateTimeField(auto_now_add=True)), + ('status', models.PositiveSmallIntegerField(choices=[(1, 'Incomplete'), (2, 'Complete')], default=1)), + ('completed_at', models.DateTimeField(blank=True, null=True)), + ('user', models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, related_name='upload', to=settings.AUTH_USER_MODEL)), + ], + options={ + 'abstract': False, + }, + ), + ] diff --git a/pulpcore/app/models/__init__.py b/pulpcore/app/models/__init__.py index c74706c96bf..e43e3c966be 100644 --- a/pulpcore/app/models/__init__.py +++ b/pulpcore/app/models/__init__.py @@ -1,7 +1,7 @@ # https://docs.djangoproject.com/en/dev/topics/db/models/#organizing-models-in-a-package from .base import Model, MasterModel # noqa -from .content import Artifact, Content, ContentArtifact, RemoteArtifact # noqa +from .content import Artifact, Content, ContentArtifact, RemoteArtifact, Upload # noqa from .generic import GenericRelationModel # noqa from .publication import ( # noqa ContentGuard, diff --git a/pulpcore/app/models/content.py b/pulpcore/app/models/content.py index 2b5a6018b19..99090399e56 100644 --- a/pulpcore/app/models/content.py +++ b/pulpcore/app/models/content.py @@ -6,6 +6,7 @@ from django.core import validators from django.db import IntegrityError, models, transaction from django.forms.models import model_to_dict +from drf_chunked_upload.models import ChunkedUpload from itertools import chain @@ -340,3 +341,7 @@ class RemoteArtifact(Model, QueryMixin): class Meta: unique_together = ('content_artifact', 'remote') + + +class Upload(ChunkedUpload): + pass diff --git a/pulpcore/app/serializers/__init__.py b/pulpcore/app/serializers/__init__.py index a8a923dcb23..8050716c236 100644 --- a/pulpcore/app/serializers/__init__.py +++ b/pulpcore/app/serializers/__init__.py @@ -25,6 +25,7 @@ NoArtifactContentSerializer, SingleArtifactContentSerializer, MultipleArtifactContentSerializer, + UploadSerializer, ) from .progress import ProgressReportSerializer # noqa from .publication import ( # noqa diff --git a/pulpcore/app/serializers/content.py b/pulpcore/app/serializers/content.py index c37c1c3801c..cafe04b6211 100644 --- a/pulpcore/app/serializers/content.py +++ b/pulpcore/app/serializers/content.py @@ -2,10 +2,11 @@ import hashlib from django.db import transaction +from drf_chunked_upload.serializers import ChunkedUploadSerializer from rest_framework import serializers from rest_framework.validators import UniqueValidator -from pulpcore.app import models +from pulpcore.app import models, files from pulpcore.app.serializers import base, fields @@ -98,7 +99,14 @@ class ArtifactSerializer(base.ModelSerializer): file = serializers.FileField( help_text=_("The stored file."), - required=True + required=False + ) + + upload = serializers.HyperlinkedRelatedField( + view_name="upload-detail", + write_only=True, + required=False, + queryset=models.Upload.objects.filter(status=models.Upload.COMPLETE) ) size = serializers.IntegerField( @@ -156,6 +164,15 @@ def validate(self, data): """ super().validate(data) + if ('file' not in data and 'upload' not in data) or \ + ('file' in data and 'upload' in data): + raise serializers.ValidationError(_("Either 'file' or 'upload' parameter must be " + "supplied but not both.")) + + if 'upload' in data: + self.upload = data.pop('upload') + data['file'] = files.PulpTemporaryUploadedFile.from_file(self.upload.file.file) + if 'size' in data: if data['file'].size != int(data['size']): raise serializers.ValidationError(_("The size did not match actual size of file.")) @@ -180,7 +197,36 @@ def validate(self, data): validator(digest) return data + def create(self, validated_data): + """ + Create the artifact and delete its associated upload (if there is one) + + Args: + validated_data (dict): Data to save to the database + """ + artifact = super().create(validated_data) + # creating an artifact will move the upload file so we need to delete the db record + self.upload.delete() + return artifact + class Meta: model = models.Artifact fields = base.ModelSerializer.Meta.fields + ('file', 'size', 'md5', 'sha1', 'sha224', - 'sha256', 'sha384', 'sha512') + 'sha256', 'sha384', 'sha512', 'upload') + + +class UploadSerializer(base.ModelSerializer): + """Serializer for chunked uploads.""" + viewname = 'uploads:upload-detail' + + _href = base.IdentityField( + view_name='upload-detail', + ) + + file = serializers.FileField( + write_only=True, + ) + + class Meta(ChunkedUploadSerializer.Meta): + model = models.Upload + fields = ('_href', 'file', 'offset', 'expires_at') diff --git a/pulpcore/app/settings.py b/pulpcore/app/settings.py index 9a59f31f4c0..8e43d6d36d3 100644 --- a/pulpcore/app/settings.py +++ b/pulpcore/app/settings.py @@ -63,6 +63,7 @@ 'django_filters', 'drf_yasg', 'rest_framework', + 'drf_chunked_upload', # pulp core app 'pulpcore.app', ] diff --git a/pulpcore/app/urls.py b/pulpcore/app/urls.py index ebb90e3841c..01aaf51a055 100644 --- a/pulpcore/app/urls.py +++ b/pulpcore/app/urls.py @@ -8,7 +8,7 @@ from pulpcore.app.apps import pulp_plugin_configs from pulpcore.app.openapigenerator import PulpOpenAPISchemaGenerator -from pulpcore.app.views import OrphansView, StatusView +from pulpcore.app.views import OrphansView, StatusView, UploadView from pulpcore.constants import API_ROOT import logging @@ -144,6 +144,16 @@ def __repr__(self): name='schema-redoc') ) +urlpatterns.append(url( + r'^{api_root}uploads/(?P.*)/$'.format(api_root=API_ROOT), + UploadView.as_view(), + name='upload-detail') +) +urlpatterns.append(url( + r'^{api_root}uploads/$'.format(api_root=API_ROOT), + UploadView.as_view()) +) + schema_view = get_schema_view( title='Pulp API', permission_classes=[permissions.AllowAny], diff --git a/pulpcore/app/views/__init__.py b/pulpcore/app/views/__init__.py index 86eedea5126..1487b884088 100644 --- a/pulpcore/app/views/__init__.py +++ b/pulpcore/app/views/__init__.py @@ -1,2 +1,3 @@ from .orphans import OrphansView # noqa from .status import StatusView # noqa +from .upload import UploadView # noqa diff --git a/pulpcore/app/views/upload.py b/pulpcore/app/views/upload.py new file mode 100644 index 00000000000..eee713522ae --- /dev/null +++ b/pulpcore/app/views/upload.py @@ -0,0 +1,10 @@ +from drf_chunked_upload.views import ChunkedUploadView +from pulpcore.app.models import Upload +from pulpcore.app.serializers import UploadSerializer + + +class UploadView(ChunkedUploadView): + """View for chunked uploads.""" + model = Upload + serializer_class = UploadSerializer + queryset = Upload.objects.all() diff --git a/pulpcore/app/viewsets/content.py b/pulpcore/app/viewsets/content.py index 50e2f74e53a..7c376c91f3a 100644 --- a/pulpcore/app/viewsets/content.py +++ b/pulpcore/app/viewsets/content.py @@ -2,7 +2,7 @@ from django.db import models from rest_framework import status, mixins -from rest_framework.parsers import MultiPartParser, FormParser +from rest_framework.parsers import FormParser, JSONParser, MultiPartParser from rest_framework.response import Response from pulpcore.app.models import Artifact, Content @@ -39,7 +39,7 @@ class ArtifactViewSet(NamedModelViewSet, queryset = Artifact.objects.all() serializer_class = ArtifactSerializer filterset_class = ArtifactFilter - parser_classes = (MultiPartParser, FormParser) + parser_classes = (MultiPartParser, FormParser, JSONParser) def destroy(self, request, pk): """ diff --git a/setup.py b/setup.py index 7aa821f100d..653cf39fa66 100755 --- a/setup.py +++ b/setup.py @@ -18,7 +18,8 @@ 'redis<3.2.0', 'setuptools', 'dynaconf>=1.0.4', - 'whitenoise' + 'whitenoise', + 'drf-chunked-upload' ] setup(