Skip to content

Commit

Permalink
Add support for chunked uploads
Browse files Browse the repository at this point in the history
  • Loading branch information
David Davis committed Mar 4, 2019
1 parent 14f4be9 commit 895d6bd
Show file tree
Hide file tree
Showing 13 changed files with 170 additions and 8 deletions.
33 changes: 33 additions & 0 deletions docs/workflows/upload-publish.rst
Original file line number Diff line number Diff line change
@@ -1,2 +1,35 @@
Upload and Publish
==================

Chunked Uploads
---------------

For large file uploads, Pulp supports uploading files in chunks. To begin uploading a file in
chunks, an initial PUT request must be sent to the ``/pulp/api/v3/uploads`` endpoint::

http --form PUT :8000/pulp/api/v3/uploads/ file@./chunkaa 'Content-Range:bytes 0-6291455/32095676'

This returns an upload href (e.g. ``/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/``) that can
be used for subsequent chunks::

http --form PUT :8000/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/ file@./chunkbb 'Content-Range:bytes 6291456-10485759/32095676'

Once all chunks have been uploaded, a final POST request with the file md5 can be sent to complete the
upload::

http POST :8000/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/ md5=037a47d93670e64f2b1038e6f90e4cfd

Then the artifact may be created with the upload href::

http POST :8000/pulp/api/v3/artifacts/ upload=/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/

Note that after creating an artifact from an upload, the upload gets deleted and cannot be re-used.

Putting this altogether, here is an example that uploads a 1.iso file in two chunks::

curl -O https://repos.fedorapeople.org/repos/pulp/pulp/fixtures/file-large/1.iso
split --bytes=6M 1.iso chunk
export UPLOAD=$(http --form PUT :8000/pulp/api/v3/uploads/ file@./chunkaa 'Content-Range:bytes 0-6291455/32095676' | jq -r '._href')
http --form PUT :8000$UPLOAD file@./chunkab 'Content-Range:bytes 6291456-10485759/32095676'
http POST :8000$UPLOAD md5=037a47d93670e64f2b1038e6f90e4cfd
http POST :8000/pulp/api/v3/artifacts/ upload=$UPLOAD
20 changes: 20 additions & 0 deletions pulpcore/app/files.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import hashlib
import os
from django.core.files.uploadhandler import TemporaryFileUploadHandler
from django.core.files.uploadedfile import TemporaryUploadedFile

Expand All @@ -14,6 +15,25 @@ def __init__(self, name, content_type, size, charset, content_type_extra=None):
self.hashers[hasher] = getattr(hashlib, hasher)()
super().__init__(name, content_type, size, charset, content_type_extra)

@classmethod
def from_file(cls, file):
"""
Create a PulpTemporaryUploadedFile from a file system file
Args:
file (File): a filesystem file
Returns:
PulpTemporaryUploadedFile: instantiated instance from file
"""
name = os.path.basename(file.name)
instance = cls(name, '', file.size, '', '')
instance.file = file
data = file.read()
for hasher in hashlib.algorithms_guaranteed:
instance.hashers[hasher].update(data)
return instance


class HashingFileUploadHandler(TemporaryFileUploadHandler):
"""
Expand Down
34 changes: 34 additions & 0 deletions pulpcore/app/migrations/0004_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Generated by Django 2.1.7 on 2019-02-28 11:57

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import drf_chunked_upload.models
import uuid


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('pulp_app', '0003_repositoryversioncontentdetails'),
]

operations = [
migrations.CreateModel(
name='Upload',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('file', models.FileField(max_length=255, null=True, upload_to=drf_chunked_upload.models.generate_filename)),
('filename', models.CharField(max_length=255)),
('offset', models.BigIntegerField(default=0)),
('created_at', models.DateTimeField(auto_now_add=True)),
('status', models.PositiveSmallIntegerField(choices=[(1, 'Incomplete'), (2, 'Complete')], default=1)),
('completed_at', models.DateTimeField(blank=True, null=True)),
('user', models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, related_name='upload', to=settings.AUTH_USER_MODEL)),
],
options={
'abstract': False,
},
),
]
2 changes: 1 addition & 1 deletion pulpcore/app/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# https://docs.djangoproject.com/en/dev/topics/db/models/#organizing-models-in-a-package

from .base import Model, MasterModel # noqa
from .content import Artifact, Content, ContentArtifact, RemoteArtifact # noqa
from .content import Artifact, Content, ContentArtifact, RemoteArtifact, Upload # noqa
from .generic import GenericRelationModel # noqa
from .publication import ( # noqa
ContentGuard,
Expand Down
5 changes: 5 additions & 0 deletions pulpcore/app/models/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from django.core import validators
from django.db import IntegrityError, models, transaction
from django.forms.models import model_to_dict
from drf_chunked_upload.models import ChunkedUpload

from itertools import chain

Expand Down Expand Up @@ -340,3 +341,7 @@ class RemoteArtifact(Model, QueryMixin):

class Meta:
unique_together = ('content_artifact', 'remote')


class Upload(ChunkedUpload):
pass
1 change: 1 addition & 0 deletions pulpcore/app/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
NoArtifactContentSerializer,
SingleArtifactContentSerializer,
MultipleArtifactContentSerializer,
UploadSerializer,
)
from .progress import ProgressReportSerializer # noqa
from .publication import ( # noqa
Expand Down
52 changes: 49 additions & 3 deletions pulpcore/app/serializers/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import hashlib

from django.db import transaction
from drf_chunked_upload.serializers import ChunkedUploadSerializer
from rest_framework import serializers
from rest_framework.validators import UniqueValidator

from pulpcore.app import models
from pulpcore.app import models, files
from pulpcore.app.serializers import base, fields


Expand Down Expand Up @@ -98,7 +99,14 @@ class ArtifactSerializer(base.ModelSerializer):

file = serializers.FileField(
help_text=_("The stored file."),
required=True
required=False
)

upload = serializers.HyperlinkedRelatedField(
view_name="upload-detail",
write_only=True,
required=False,
queryset=models.Upload.objects.filter(status=models.Upload.COMPLETE)
)

size = serializers.IntegerField(
Expand Down Expand Up @@ -156,6 +164,15 @@ def validate(self, data):
"""
super().validate(data)

if ('file' not in data and 'upload' not in data) or \
('file' in data and 'upload' in data):
raise serializers.ValidationError(_("Either 'file' or 'upload' parameter must be "
"supplied but not both."))

if 'upload' in data:
self.upload = data.pop('upload')
data['file'] = files.PulpTemporaryUploadedFile.from_file(self.upload.file.file)

if 'size' in data:
if data['file'].size != int(data['size']):
raise serializers.ValidationError(_("The size did not match actual size of file."))
Expand All @@ -180,7 +197,36 @@ def validate(self, data):
validator(digest)
return data

def create(self, validated_data):
"""
Create the artifact and delete its associated upload (if there is one)
Args:
validated_data (dict): Data to save to the database
"""
artifact = super().create(validated_data)
# creating an artifact will move the upload file so we need to delete the db record
self.upload.delete()
return artifact

class Meta:
model = models.Artifact
fields = base.ModelSerializer.Meta.fields + ('file', 'size', 'md5', 'sha1', 'sha224',
'sha256', 'sha384', 'sha512')
'sha256', 'sha384', 'sha512', 'upload')


class UploadSerializer(base.ModelSerializer):
"""Serializer for chunked uploads."""
viewname = 'uploads:upload-detail'

_href = base.IdentityField(
view_name='upload-detail',
)

file = serializers.FileField(
write_only=True,
)

class Meta(ChunkedUploadSerializer.Meta):
model = models.Upload
fields = ('_href', 'file', 'offset', 'expires_at')
1 change: 1 addition & 0 deletions pulpcore/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
'django_filters',
'drf_yasg',
'rest_framework',
'drf_chunked_upload',
# pulp core app
'pulpcore.app',
]
Expand Down
12 changes: 11 additions & 1 deletion pulpcore/app/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from pulpcore.app.apps import pulp_plugin_configs
from pulpcore.app.openapigenerator import PulpOpenAPISchemaGenerator
from pulpcore.app.views import OrphansView, StatusView
from pulpcore.app.views import OrphansView, StatusView, UploadView
from pulpcore.constants import API_ROOT

import logging
Expand Down Expand Up @@ -144,6 +144,16 @@ def __repr__(self):
name='schema-redoc')
)

urlpatterns.append(url(
r'^{api_root}uploads/(?P<pk>.*)/$'.format(api_root=API_ROOT),
UploadView.as_view(),
name='upload-detail')
)
urlpatterns.append(url(
r'^{api_root}uploads/$'.format(api_root=API_ROOT),
UploadView.as_view())
)

schema_view = get_schema_view(
title='Pulp API',
permission_classes=[permissions.AllowAny],
Expand Down
1 change: 1 addition & 0 deletions pulpcore/app/views/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .orphans import OrphansView # noqa
from .status import StatusView # noqa
from .upload import UploadView # noqa
10 changes: 10 additions & 0 deletions pulpcore/app/views/upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from drf_chunked_upload.views import ChunkedUploadView
from pulpcore.app.models import Upload
from pulpcore.app.serializers import UploadSerializer


class UploadView(ChunkedUploadView):
"""View for chunked uploads."""
model = Upload
serializer_class = UploadSerializer
queryset = Upload.objects.all()
4 changes: 2 additions & 2 deletions pulpcore/app/viewsets/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from django.db import models
from rest_framework import status, mixins
from rest_framework.parsers import MultiPartParser, FormParser
from rest_framework.parsers import FormParser, JSONParser, MultiPartParser
from rest_framework.response import Response

from pulpcore.app.models import Artifact, Content
Expand Down Expand Up @@ -39,7 +39,7 @@ class ArtifactViewSet(NamedModelViewSet,
queryset = Artifact.objects.all()
serializer_class = ArtifactSerializer
filterset_class = ArtifactFilter
parser_classes = (MultiPartParser, FormParser)
parser_classes = (MultiPartParser, FormParser, JSONParser)

def destroy(self, request, pk):
"""
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
'redis<3.2.0',
'setuptools',
'dynaconf>=1.0.4',
'whitenoise'
'whitenoise',
'drf-chunked-upload'
]

setup(
Expand Down

0 comments on commit 895d6bd

Please sign in to comment.