Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for chunked uploads #18

Merged
merged 1 commit into from
Mar 5, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions docs/workflows/upload-publish.rst
Original file line number Diff line number Diff line change
@@ -1,2 +1,35 @@
Upload and Publish
==================

Chunked Uploads
---------------

For large file uploads, Pulp supports uploading files in chunks. To begin uploading a file in
chunks, an initial PUT request must be sent to the ``/pulp/api/v3/uploads`` endpoint::

http --form PUT :8000/pulp/api/v3/uploads/ file@./chunkaa 'Content-Range:bytes 0-6291455/32095676'

This returns an upload href (e.g. ``/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/``) that can
be used for subsequent chunks::

http --form PUT :8000/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/ file@./chunkbb 'Content-Range:bytes 6291456-10485759/32095676'

Once all chunks have been uploaded, a final POST request with the file md5 can be sent to complete the
upload::

http POST :8000/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/ md5=037a47d93670e64f2b1038e6f90e4cfd

Then the artifact may be created with the upload href::

http POST :8000/pulp/api/v3/artifacts/ upload=/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/

Note that after creating an artifact from an upload, the upload gets deleted and cannot be re-used.

Putting this altogether, here is an example that uploads a 1.iso file in two chunks::

curl -O https://repos.fedorapeople.org/repos/pulp/pulp/fixtures/file-large/1.iso
split --bytes=6M 1.iso chunk
export UPLOAD=$(http --form PUT :8000/pulp/api/v3/uploads/ file@./chunkaa 'Content-Range:bytes 0-6291455/32095676' | jq -r '._href')
http --form PUT :8000$UPLOAD file@./chunkab 'Content-Range:bytes 6291456-10485759/32095676'
http POST :8000$UPLOAD md5=037a47d93670e64f2b1038e6f90e4cfd
http POST :8000/pulp/api/v3/artifacts/ upload=$UPLOAD
20 changes: 20 additions & 0 deletions pulpcore/app/files.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import hashlib
import os
from django.core.files.uploadhandler import TemporaryFileUploadHandler
from django.core.files.uploadedfile import TemporaryUploadedFile

Expand All @@ -14,6 +15,25 @@ def __init__(self, name, content_type, size, charset, content_type_extra=None):
self.hashers[hasher] = getattr(hashlib, hasher)()
super().__init__(name, content_type, size, charset, content_type_extra)

@classmethod
def from_file(cls, file):
"""
Create a PulpTemporaryUploadedFile from a file system file

Args:
file (File): a filesystem file

Returns:
PulpTemporaryUploadedFile: instantiated instance from file
"""
name = os.path.basename(file.name)
instance = cls(name, '', file.size, '', '')
instance.file = file
data = file.read()
for hasher in hashlib.algorithms_guaranteed:
instance.hashers[hasher].update(data)
return instance


class HashingFileUploadHandler(TemporaryFileUploadHandler):
"""
Expand Down
34 changes: 34 additions & 0 deletions pulpcore/app/migrations/0004_upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Generated by Django 2.1.7 on 2019-02-28 11:57

from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import drf_chunked_upload.models
import uuid


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('pulp_app', '0003_repositoryversioncontentdetails'),
]

operations = [
migrations.CreateModel(
name='Upload',
fields=[
('id', models.UUIDField(default=uuid.uuid4, editable=False, primary_key=True, serialize=False)),
('file', models.FileField(max_length=255, null=True, upload_to=drf_chunked_upload.models.generate_filename)),
('filename', models.CharField(max_length=255)),
('offset', models.BigIntegerField(default=0)),
('created_at', models.DateTimeField(auto_now_add=True)),
('status', models.PositiveSmallIntegerField(choices=[(1, 'Incomplete'), (2, 'Complete')], default=1)),
('completed_at', models.DateTimeField(blank=True, null=True)),
('user', models.ForeignKey(editable=False, on_delete=django.db.models.deletion.CASCADE, related_name='upload', to=settings.AUTH_USER_MODEL)),
],
options={
'abstract': False,
},
),
]
2 changes: 1 addition & 1 deletion pulpcore/app/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# https://docs.djangoproject.com/en/dev/topics/db/models/#organizing-models-in-a-package

from .base import Model, MasterModel # noqa
from .content import Artifact, Content, ContentArtifact, RemoteArtifact # noqa
from .content import Artifact, Content, ContentArtifact, RemoteArtifact, Upload # noqa
from .generic import GenericRelationModel # noqa
from .publication import ( # noqa
ContentGuard,
Expand Down
5 changes: 5 additions & 0 deletions pulpcore/app/models/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from django.core import validators
from django.db import IntegrityError, models, transaction
from django.forms.models import model_to_dict
from drf_chunked_upload.models import ChunkedUpload

from itertools import chain

Expand Down Expand Up @@ -340,3 +341,7 @@ class RemoteArtifact(Model, QueryMixin):

class Meta:
unique_together = ('content_artifact', 'remote')


class Upload(ChunkedUpload):
pass
1 change: 1 addition & 0 deletions pulpcore/app/serializers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
NoArtifactContentSerializer,
SingleArtifactContentSerializer,
MultipleArtifactContentSerializer,
UploadSerializer,
)
from .progress import ProgressReportSerializer # noqa
from .publication import ( # noqa
Expand Down
53 changes: 50 additions & 3 deletions pulpcore/app/serializers/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
import hashlib

from django.db import transaction
from drf_chunked_upload.serializers import ChunkedUploadSerializer
from rest_framework import serializers
from rest_framework.validators import UniqueValidator

from pulpcore.app import models
from pulpcore.app import models, files
from pulpcore.app.serializers import base, fields


Expand Down Expand Up @@ -98,7 +99,14 @@ class ArtifactSerializer(base.ModelSerializer):

file = serializers.FileField(
help_text=_("The stored file."),
required=True
required=False
)

upload = serializers.HyperlinkedRelatedField(
view_name="upload-detail",
write_only=True,
required=False,
queryset=models.Upload.objects.filter(status=models.Upload.COMPLETE)
)

size = serializers.IntegerField(
Expand Down Expand Up @@ -156,6 +164,15 @@ def validate(self, data):
"""
super().validate(data)

if ('file' not in data and 'upload' not in data) or \
('file' in data and 'upload' in data):
raise serializers.ValidationError(_("Either 'file' or 'upload' parameter must be "
"supplied but not both."))

if 'upload' in data:
self.upload = data.pop('upload')
data['file'] = files.PulpTemporaryUploadedFile.from_file(self.upload.file.file)

if 'size' in data:
if data['file'].size != int(data['size']):
raise serializers.ValidationError(_("The size did not match actual size of file."))
Expand All @@ -180,7 +197,37 @@ def validate(self, data):
validator(digest)
return data

def create(self, validated_data):
"""
Create the artifact and delete its associated upload (if there is one)

Args:
validated_data (dict): Data to save to the database
"""
artifact = super().create(validated_data)
if hasattr(self, 'upload'):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would think that the upload attribute is always there. However it's value is None or HyperlinkedRelatedField. Is my understanding correct? In that case I would check if self.upload is None or not.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, the upload attribute gets optionally created in the validate method. See line 173 above.

# creating an artifact will move the upload file so we need to delete the db record
self.upload.delete()
return artifact

class Meta:
model = models.Artifact
fields = base.ModelSerializer.Meta.fields + ('file', 'size', 'md5', 'sha1', 'sha224',
'sha256', 'sha384', 'sha512')
'sha256', 'sha384', 'sha512', 'upload')


class UploadSerializer(base.ModelSerializer):
"""Serializer for chunked uploads."""
viewname = 'uploads:upload-detail'

_href = base.IdentityField(
view_name='upload-detail',
)

file = serializers.FileField(
write_only=True,
)

class Meta(ChunkedUploadSerializer.Meta):
model = models.Upload
fields = ('_href', 'file', 'offset', 'expires_at')
1 change: 1 addition & 0 deletions pulpcore/app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
'django_filters',
'drf_yasg',
'rest_framework',
'drf_chunked_upload',
# pulp core app
'pulpcore.app',
]
Expand Down
12 changes: 11 additions & 1 deletion pulpcore/app/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from pulpcore.app.apps import pulp_plugin_configs
from pulpcore.app.openapigenerator import PulpOpenAPISchemaGenerator
from pulpcore.app.views import OrphansView, StatusView
from pulpcore.app.views import OrphansView, StatusView, UploadView
from pulpcore.constants import API_ROOT

import logging
Expand Down Expand Up @@ -144,6 +144,16 @@ def __repr__(self):
name='schema-redoc')
)

urlpatterns.append(url(
r'^{api_root}uploads/(?P<pk>.*)/$'.format(api_root=API_ROOT),
UploadView.as_view(),
name='upload-detail')
)
urlpatterns.append(url(
r'^{api_root}uploads/$'.format(api_root=API_ROOT),
UploadView.as_view())
)

schema_view = get_schema_view(
title='Pulp API',
permission_classes=[permissions.AllowAny],
Expand Down
1 change: 1 addition & 0 deletions pulpcore/app/views/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .orphans import OrphansView # noqa
from .status import StatusView # noqa
from .upload import UploadView # noqa
10 changes: 10 additions & 0 deletions pulpcore/app/views/upload.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from drf_chunked_upload.views import ChunkedUploadView
from pulpcore.app.models import Upload
from pulpcore.app.serializers import UploadSerializer


class UploadView(ChunkedUploadView):
"""View for chunked uploads."""
model = Upload
serializer_class = UploadSerializer
queryset = Upload.objects.all()
4 changes: 2 additions & 2 deletions pulpcore/app/viewsets/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from django.db import models
from rest_framework import status, mixins
from rest_framework.parsers import MultiPartParser, FormParser
from rest_framework.parsers import FormParser, JSONParser, MultiPartParser
from rest_framework.response import Response

from pulpcore.app.models import Artifact, Content
Expand Down Expand Up @@ -39,7 +39,7 @@ class ArtifactViewSet(NamedModelViewSet,
queryset = Artifact.objects.all()
serializer_class = ArtifactSerializer
filterset_class = ArtifactFilter
parser_classes = (MultiPartParser, FormParser)
parser_classes = (MultiPartParser, FormParser, JSONParser)

def destroy(self, request, pk):
"""
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
'redis<3.2.0',
'setuptools',
'dynaconf>=1.0.4',
'whitenoise'
'whitenoise',
'drf-chunked-upload'
]

setup(
Expand Down