Skip to content

Commit

Permalink
Veryfing chunk checksum
Browse files Browse the repository at this point in the history
Enable sending checksum for chunk upload.

https://pulp.plan.io/issues/4982
closes #4982
  • Loading branch information
fao89 committed Jul 18, 2019
1 parent d739774 commit ce4a29f
Show file tree
Hide file tree
Showing 6 changed files with 91 additions and 25 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Expand Up @@ -30,6 +30,9 @@ tags
# Sublime
.codeintel

#VScode
.vscode/

# Rope
.ropeproject

Expand Down
4 changes: 4 additions & 0 deletions docs/workflows/upload-publish.rst
Expand Up @@ -16,6 +16,10 @@ be used for chunks. Chunks can be uploaded in any order or in parallel::
http --form PUT :24817/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/ file@./chunk2 'Content-Range:bytes 6291456-10485759/*'
http --form PUT :24817/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/ file@./chunk1 'Content-Range:bytes 0-6291455'

Note: You can send an optional sha256 argument::

http --form PUT :24817/pulp/api/v3/uploads/a8b5a7f7-2f22-460d-ab20-d5616cb71cdd/ file@./chunk1 'Content-Range:bytes 0-6291455' sha256=7ffc86295de63e96006ce5ab379050628aa5d51f816267946c71906594e13870

Once all chunks have been uploaded, a final POST request with the file md5 can be sent to complete the
upload::

Expand Down
10 changes: 8 additions & 2 deletions pulpcore/app/models/upload.py
Expand Up @@ -3,6 +3,7 @@

from django.core.files.base import ContentFile
from django.db import models
from rest_framework import serializers

from pulpcore.app.models import Model

Expand All @@ -22,7 +23,7 @@ class Upload(Model):
size = models.BigIntegerField()
completed = models.DateTimeField(null=True)

def append(self, chunk, offset):
def append(self, chunk, offset, sha256=None):
"""
Append a chunk to an upload.
Expand All @@ -33,9 +34,14 @@ def append(self, chunk, offset):
if not self.file:
self.file.save(os.path.join('upload', str(self.pk)), ContentFile(''))

chunk_read = chunk.read()
current_sha256 = hashlib.sha256(chunk_read).hexdigest()
if sha256 and sha256 != current_sha256:
raise serializers.ValidationError("Checksum does not match chunk upload.")

with self.file.open(mode='r+b') as file:
file.seek(offset)
file.write(chunk.read())
file.write(chunk_read)

self.chunks.create(offset=offset, size=len(chunk))

Expand Down
6 changes: 6 additions & 0 deletions pulpcore/app/serializers/upload.py
Expand Up @@ -30,6 +30,12 @@ class UploadChunkSerializer(serializers.Serializer):
help_text=_("A chunk of the uploaded file."),
)

sha256 = serializers.CharField(
help_text=_("The SHA-256 checksum of the chunk if available."),
required=False,
allow_null=True,
)


class UploadCommitSerializer(serializers.Serializer):
sha256 = serializers.CharField(
Expand Down
3 changes: 2 additions & 1 deletion pulpcore/app/viewsets/upload.py
Expand Up @@ -76,7 +76,8 @@ def update(self, request, pk=None):
if end > upload.size - 1:
raise serializers.ValidationError(_("End byte is greater than upload size."))

upload.append(chunk, start)
sha256 = request.data.get('sha256')
upload.append(chunk, start, sha256)

serializer = UploadSerializer(upload, context={'request': request})
return Response(serializer.data)
Expand Down
90 changes: 68 additions & 22 deletions pulpcore/tests/functional/api/test_upload.py
Expand Up @@ -28,6 +28,7 @@ class ChunkedUploadTestCase(unittest.TestCase):
* `Pulp #4197 <https://pulp.plan.io/issues/4197>`_
* `Pulp #5092 <https://pulp.plan.io/issues/5092>`_
* `Pulp #4982 <https://pulp.plan.io/issues/4982>`_
"""

@classmethod
Expand All @@ -42,10 +43,27 @@ def setUpClass(cls):
cls.file_sha256 = hashlib.sha256(cls.file).hexdigest()
cls.size_file = len(cls.file)

cls.first_chunk = http_get(FILE_CHUNKED_PART_1_URL)
cls.second_chunk = http_get(FILE_CHUNKED_PART_2_URL)
first_chunk = http_get(FILE_CHUNKED_PART_1_URL)
header_first_chunk = {
'Content-Range': 'bytes 0-{}/{}'.format(
len(first_chunk) - 1, cls.size_file
)
}

def test_create_artifact(self):
second_chunk = http_get(FILE_CHUNKED_PART_2_URL)
header_second_chunk = {
'Content-Range': 'bytes {}-{}/{}'.format(
len(first_chunk), cls.size_file - 1, cls.size_file
)
}

cls.chunked_data = [
[first_chunk, header_first_chunk],
[second_chunk, header_second_chunk],
]
shuffle(cls.chunked_data)

def test_create_artifact_without_checksum(self):
"""Test creation of artifact using upload of files in chunks."""

upload_request = self.upload_chunks()
Expand All @@ -59,6 +77,52 @@ def test_create_artifact(self):

self.assertEqual(artifact['sha256'], self.file_sha256, artifact)

def test_create_artifact_passing_checksum(self):
"""Test creation of artifact using upload of files in chunks passing checksum."""
upload_request = self.client.post(
UPLOAD_PATH, {'size': self.size_file}
)

for data in self.chunked_data:
self.client.put(
upload_request['_href'],
data={'sha256': hashlib.sha256(data[0]).hexdigest()},
files={'file': data[0]},
headers=data[1],
)

self.client.put(
urljoin(upload_request['_href'], 'commit/'),
data={'sha256': self.file_sha256},
)

response = self.client.post(
ARTIFACTS_PATH, {'upload': upload_request['_href']}
)

artifact = self.client.get(response['_href'])
self.addCleanup(self.client.delete, artifact['_href'])

self.assertEqual(artifact['sha256'], self.file_sha256, artifact)

def test_upload_chunk_wrong_checksum(self):
"""Test creation of artifact using upload of files in chunks passing wrong checksum."""
self.client.response_handler = api.echo_handler

upload_request = self.client.post(
UPLOAD_PATH, {'size': self.size_file}
)

for data in self.chunked_data:
response = self.client.put(
upload_request.json()['_href'],
data={'sha256': "WRONG CHECKSUM"},
files={'file': data[0]},
headers=data[1],
)

assert response.status_code == 400

def test_delete_upload(self):
"""Test a deletion of an upload using upload of files in chunks."""

Expand All @@ -77,29 +141,11 @@ def test_delete_upload(self):
self.cli_client.run(cmd, sudo=True)

def upload_chunks(self):
header_first_chunk = {
'Content-Range': 'bytes 0-{}/{}'.format(
len(self.first_chunk) - 1, self.size_file
)
}

header_second_chunk = {
'Content-Range': 'bytes {}-{}/{}'.format(
len(self.first_chunk), self.size_file - 1, self.size_file
)
}

chunked_data = [
[self.first_chunk, header_first_chunk],
[self.second_chunk, header_second_chunk],
]
shuffle(chunked_data)

upload_request = self.client.post(
UPLOAD_PATH, {'size': self.size_file}
)

for data in chunked_data:
for data in self.chunked_data:
self.client.put(
upload_request['_href'],
files={'file': data[0]},
Expand Down

0 comments on commit ce4a29f

Please sign in to comment.