From 6d7072d2f38cf3ead6bedc5e1b0b661b7b29a544 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 17 Jul 2019 14:58:32 -0400 Subject: [PATCH 01/11] Support uploading content to pulp Implemented basic uploading functionality, which could upload a single file to pulp. --- pubtools/pulplib/_impl/client/client.py | 87 ++++++++++++++++++++++++- pubtools/pulplib/_impl/client/poller.py | 1 - tests/client/test_client.py | 65 ++++++++++++++++++ 3 files changed, 151 insertions(+), 2 deletions(-) diff --git a/pubtools/pulplib/_impl/client/client.py b/pubtools/pulplib/_impl/client/client.py index 22ad688c..1557fe47 100644 --- a/pubtools/pulplib/_impl/client/client.py +++ b/pubtools/pulplib/_impl/client/client.py @@ -56,6 +56,9 @@ class Client(object): _REQUEST_THREADS = int(os.environ.get("PUBTOOLS_PULPLIB_REQUEST_THREADS", "4")) _PAGE_SIZE = int(os.environ.get("PUBTOOLS_PULPLIB_PAGE_SIZE", "2000")) _TASK_THROTTLE = int(os.environ.get("PUBTOOLS_PULPLIB_TASK_THROTTLE", "200")) + _PULP_CHUNK_SIZE = int( + os.environ.get("PUBTOOLS_PULPLIB_PULP_CHUNK_SIZE", 1024 * 1024) + ) # Policy used when deciding whether to retry operations. # This is mainly provided here as a hook for autotests, so the policy can be @@ -179,6 +182,42 @@ def search_repository(self, criteria=None): response_f, lambda data: self._handle_page(Repository, search, data) ) + def upload_one_file( + self, + file_name, + repo_id, + type_id, + unit_key, + unit_metadata=None, + override_config=None, + ): + """Including four steps uploading a file: + 1. request upload id from pulp + 2. upload contents to pulp + 3. import the uploaded content to wanted repo + 4. delete the reuqest id + """ + upload_id = self._request_upload().result()["upload_id"] + offset = 0 + with open(file_name) as f: + data = f.read(self._PULP_CHUNK_SIZE) + LOG.info("Uploading %s to repo %s", file_name, repo_id) + while data: + self._do_upload(data, upload_id, offset).result() + offset += self._PULP_CHUNK_SIZE + data = f.read(self._PULP_CHUNK_SIZE) + + self._do_import( + repo_id, + upload_id, + unit_type_id=type_id, + unit_key=unit_key, + unit_metadata=unit_metadata, + override_config=override_config, + ).result() + + self._delete_upload_request(upload_id).result() + def _publish_repository(self, repo, distributors_with_config): tasks_f = f_return([]) @@ -290,7 +329,6 @@ def _do_search(self, resource_type, search): url = os.path.join(self._url, "pulp/api/v2/{0}/search/".format(resource_type)) LOG.debug("Submitting %s search: %s", url, search) - return self._request_executor.submit( self._do_request, method="POST", url=url, json=search ) @@ -302,3 +340,50 @@ def _delete_resource(self, resource_type, resource_id): LOG.debug("Queuing request to DELETE %s", url) return self._task_executor.submit(self._do_request, method="DELETE", url=url) + + def _request_upload(self): + url = os.path.join(self._url, "pulp/api/v2/content/uploads/") + + LOG.debug("Reuqesting upload id") + return self._request_executor.submit(self._do_request, method="POST", url=url) + + def _do_upload(self, data, upload_id, offset): + url = os.path.join( + self._url, "pulp/api/v2/contents/%s/%s" % (upload_id, offset) + ) + + return self._request_executor.submit( + self._do_request, method="PUT", url=url, json=data + ) + + def _do_import( + self, + repo_id, + upload_id, + unit_type_id, + unit_key, + unit_metadata=None, + override_config=None, + ): + url = os.path.join( + self._url, "pulp/api/v2/repositories/%s/actions/import_upload/" % repo_id + ) + + body = { + "unit_type_id": unit_type_id, + "upload_id": upload_id, + "unit_key": unit_key, + "override_config": override_config or {}, + "unit_metadata": unit_metadata or {}, + } + + LOG.debug("Importing contents to repo %s", repo_id) + return self._task_executor.submit( + self._do_request, method="POST", url=url, json=body + ) + + def _delete_upload_request(self, upload_id): + url = os.path.join(self._url, "pulp/api/v2/content/uploads/%s/" % upload_id) + + LOG.debug("Deleting upload request") + return self._request_executor.submit(self._do_request, method="DELETE", url=url) diff --git a/pubtools/pulplib/_impl/client/poller.py b/pubtools/pulplib/_impl/client/poller.py index a045327f..e37b0ace 100644 --- a/pubtools/pulplib/_impl/client/poller.py +++ b/pubtools/pulplib/_impl/client/poller.py @@ -147,7 +147,6 @@ def search_tasks(self, task_ids): def gather_descriptor_tasks(self, descriptors): descriptor_tasks = [] all_tasks = [] - for descriptor in descriptors: result = descriptor.result try: diff --git a/tests/client/test_client.py b/tests/client/test_client.py index f0e46a75..713b67c3 100644 --- a/tests/client/test_client.py +++ b/tests/client/test_client.py @@ -165,3 +165,68 @@ def test_get_missing(client, requests_mocker): # It should explain the problem assert "repo1 was not found" in str(error.value) + + +def test_upload_one_file(client, requests_mocker, tmpdir, caplog): + """test upload a file to a repo in pulp""" + logging.getLogger().setLevel(logging.INFO) + caplog.set_level(logging.INFO) + + client._PULP_CHUNK_SIZE = 20 + + request_body = { + "_href": "/pulp/api/v2/content/uploads/cfb1fed0-752b-439e-aa68-fba68eababa3/", + "upload_id": "cfb1fed0-752b-439e-aa68-fba68eababa3", + } + upload_id = request_body["upload_id"] + repo_id = "repo1" + import_report = { + "result": {}, + "error": {}, + "spawned_tasks": [ + { + "_href": "/pulp/api/v2/tasks/7744e2df-39b9-46f0-bb10-feffa2f7014b/", + "task_id": "7744e2df-39b9-46f0-bb10-feffa2f7014b", + } + ], + } + + tasks_report = [ + {"task_id": "7744e2df-39b9-46f0-bb10-feffa2f7014b", "state": "finished"} + ] + + somefile = tmpdir.join("some-file.txt") + somefile.write("I am the file to be uploaded") + + requests_mocker.post( + "https://pulp.example.com/pulp/api/v2/content/uploads/", json=request_body + ) + requests_mocker.put( + "https://pulp.example.com/pulp/api/v2/contents/%s/0" % upload_id, json=[] + ) + requests_mocker.put( + "https://pulp.example.com/pulp/api/v2/contents/%s/20" % upload_id, json=[] + ) + requests_mocker.post( + "https://pulp.example.com/pulp/api/v2/repositories/%s/actions/import_upload/" + % repo_id, + json=import_report, + ) + requests_mocker.delete( + "https://pulp.example.com/pulp/api/v2/content/uploads/%s/" % upload_id, json=[] + ) + requests_mocker.post( + "https://pulp.example.com/pulp/api/v2/tasks/search/", json=tasks_report + ) + + client.upload_one_file(str(somefile), repo_id, "txt", {}) + + messages = caplog.messages + lines = [m.splitlines()[0] for m in messages] + + # 6 reuqests should be made + assert requests_mocker.call_count == 6 + + # task's spwaned and completed + assert "Created Pulp task: 7744e2df-39b9-46f0-bb10-feffa2f7014b" in messages + assert "Pulp task completed: 7744e2df-39b9-46f0-bb10-feffa2f7014b" in messages From 8d77f7b5feb429ad74e6fb2a945c02835d2556ce Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Mon, 22 Jul 2019 15:11:04 -0400 Subject: [PATCH 02/11] Fix various problems raised by Rohan in last patch: 1. bump version and update CHANGELOG.md 2. move the upload api to FileRepository class 3. remove some unused arguments for importing to iso repo 4. upload/import file to Pulp is still blocking style, the api is just for import 1 file, if users want to import many files at the same time, should wrap it in threads 5. read file with binary mode 6. add verifications in test --- CHANGELOG.md | 5 + pubtools/pulplib/_impl/client/client.py | 79 +++++++--------- .../pulplib/_impl/model/repository/file.py | 53 +++++++++++ setup.py | 2 +- tests/client/test_client.py | 65 ------------- tests/repository/test_upload.py | 92 +++++++++++++++++++ 6 files changed, 182 insertions(+), 114 deletions(-) create mode 100644 tests/repository/test_upload.py diff --git a/CHANGELOG.md b/CHANGELOG.md index b8f518f9..ff528a25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - n/a +## [1.2.0] - 2019-07-21 + +###Added +- A new API `FileRepository.upload_file` to upload a file to Pulp repository + ## [1.1.0] - 2019-07-03 ### Added diff --git a/pubtools/pulplib/_impl/client/client.py b/pubtools/pulplib/_impl/client/client.py index 1557fe47..4e583929 100644 --- a/pubtools/pulplib/_impl/client/client.py +++ b/pubtools/pulplib/_impl/client/client.py @@ -1,6 +1,7 @@ import os import logging import threading +import hashlib from functools import partial import requests @@ -56,9 +57,7 @@ class Client(object): _REQUEST_THREADS = int(os.environ.get("PUBTOOLS_PULPLIB_REQUEST_THREADS", "4")) _PAGE_SIZE = int(os.environ.get("PUBTOOLS_PULPLIB_PAGE_SIZE", "2000")) _TASK_THROTTLE = int(os.environ.get("PUBTOOLS_PULPLIB_TASK_THROTTLE", "200")) - _PULP_CHUNK_SIZE = int( - os.environ.get("PUBTOOLS_PULPLIB_PULP_CHUNK_SIZE", 1024 * 1024) - ) + _CHUNK_SIZE = int(os.environ.get("PUBTOOLS_PULPLIB_CHUNK_SIZE", 1024 * 1024)) # Policy used when deciding whether to retry operations. # This is mainly provided here as a hook for autotests, so the policy can be @@ -182,41 +181,35 @@ def search_repository(self, criteria=None): response_f, lambda data: self._handle_page(Repository, search, data) ) - def upload_one_file( - self, - file_name, - repo_id, - type_id, - unit_key, - unit_metadata=None, - override_config=None, - ): - """Including four steps uploading a file: - 1. request upload id from pulp - 2. upload contents to pulp - 3. import the uploaded content to wanted repo - 4. delete the reuqest id - """ - upload_id = self._request_upload().result()["upload_id"] + def _do_upload_file(self, upload_id, repo_id, filename): + tasks_f = f_return([]) offset = 0 - with open(file_name) as f: - data = f.read(self._PULP_CHUNK_SIZE) - LOG.info("Uploading %s to repo %s", file_name, repo_id) - while data: - self._do_upload(data, upload_id, offset).result() - offset += self._PULP_CHUNK_SIZE - data = f.read(self._PULP_CHUNK_SIZE) - self._do_import( - repo_id, - upload_id, - unit_type_id=type_id, - unit_key=unit_key, - unit_metadata=unit_metadata, - override_config=override_config, - ).result() + def _do_next_part_upload(accumulated_tasks, upload_data): + data, upload_id, offset = upload_data + upload_task_f = self._do_upload(data, upload_id, offset) + + return f_map( + upload_task_f, lambda upload_task: accumulated_tasks + upload_task + ) + + # calculate size and checksum of the file during upload + with open(filename, "rb") as f: + checksum = hashlib.sha256() + size = 0 + data = f.read(self._CHUNK_SIZE) + LOG.info("uploading %s to repo %s", filename, repo_id) + while data: + checksum.update(data) + size += len(data) + next_part_upload = partial( + _do_next_part_upload, upload_data=(data, upload_id, offset) + ) + tasks_f = f_flat_map(tasks_f, next_part_upload) + offset += self._CHUNK_SIZE + data = f.read(self._CHUNK_SIZE) - self._delete_upload_request(upload_id).result() + return tasks_f, checksum.hexdigest(), size def _publish_repository(self, repo, distributors_with_config): tasks_f = f_return([]) @@ -344,7 +337,7 @@ def _delete_resource(self, resource_type, resource_id): def _request_upload(self): url = os.path.join(self._url, "pulp/api/v2/content/uploads/") - LOG.debug("Reuqesting upload id") + LOG.debug("Requesting upload id") return self._request_executor.submit(self._do_request, method="POST", url=url) def _do_upload(self, data, upload_id, offset): @@ -353,18 +346,10 @@ def _do_upload(self, data, upload_id, offset): ) return self._request_executor.submit( - self._do_request, method="PUT", url=url, json=data + self._do_request, method="PUT", url=url, data=data ) - def _do_import( - self, - repo_id, - upload_id, - unit_type_id, - unit_key, - unit_metadata=None, - override_config=None, - ): + def _do_import(self, repo_id, upload_id, unit_type_id, unit_key): url = os.path.join( self._url, "pulp/api/v2/repositories/%s/actions/import_upload/" % repo_id ) @@ -373,8 +358,6 @@ def _do_import( "unit_type_id": unit_type_id, "upload_id": upload_id, "unit_key": unit_key, - "override_config": override_config or {}, - "unit_metadata": unit_metadata or {}, } LOG.debug("Importing contents to repo %s", repo_id) diff --git a/pubtools/pulplib/_impl/model/repository/file.py b/pubtools/pulplib/_impl/model/repository/file.py index 0926888a..d13234b2 100644 --- a/pubtools/pulplib/_impl/model/repository/file.py +++ b/pubtools/pulplib/_impl/model/repository/file.py @@ -1,5 +1,8 @@ +import os + from .base import Repository, repo_type from ..attr import pulp_attrib +from ..common import DetachedException from ... import compat_attr as attr @@ -20,3 +23,53 @@ class FileRepository(Repository): ) mutable_urls = attr.ib(default=attr.Factory(lambda: ["PULP_MANIFEST"]), type=list) + + def upload_file(self, filename, relative_url=None): + """Upload a file to this repository. + + The upload operation includes 4 steps: + 1. Request an upload id from pulp + 2. With the requested upload id, upload the file + 3. After the upload's done, import the uploaded file to repository + 4. Delete the upload request + + Args: + filename (str) + Path to a local file to upload. + + relative_url (str) + Path that should be used in remote repository + + if omitted, filename will be used. + Returns: + Future[:class:`~pubtools.pulplib.Task`] + A future which is resolved when publish succeeds. + + The future contains the task to delete upload request + + Raises: + DetachedException + If this instance is not attached to a Pulp client. + """ + if not self._client: + raise DetachedException() + + path, name = os.path.split(filename.rstrip("/")) + if not relative_url: + relative_url = path + + # request upload id and wait for it + upload_id = self._client._request_upload().result()["upload_id"] + + # caculate hash/size of the file and upload it to pulp + upload_fs, checksum, size = self._client._do_upload_file( + upload_id, self.id, filename + ) + + # we need to wait for upload then it can start importing + upload_fs.result() + + unit_key = {"name": name, "digest": checksum, "size": size} + self._client._do_import(self.id, upload_id, "iso", unit_key).result() + + return self._client._delete_upload_request(upload_id) diff --git a/setup.py b/setup.py index 7cc90ec7..f42baf6c 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_requirements(): setup( name="pubtools-pulplib", - version="1.1.0", + version="1.2.0", packages=find_packages(exclude=["tests"]), package_data={"pubtools.pulplib._impl.schema": ["*.yaml"]}, url="https://github.com/release-engineering/pubtools-pulplib", diff --git a/tests/client/test_client.py b/tests/client/test_client.py index 713b67c3..f0e46a75 100644 --- a/tests/client/test_client.py +++ b/tests/client/test_client.py @@ -165,68 +165,3 @@ def test_get_missing(client, requests_mocker): # It should explain the problem assert "repo1 was not found" in str(error.value) - - -def test_upload_one_file(client, requests_mocker, tmpdir, caplog): - """test upload a file to a repo in pulp""" - logging.getLogger().setLevel(logging.INFO) - caplog.set_level(logging.INFO) - - client._PULP_CHUNK_SIZE = 20 - - request_body = { - "_href": "/pulp/api/v2/content/uploads/cfb1fed0-752b-439e-aa68-fba68eababa3/", - "upload_id": "cfb1fed0-752b-439e-aa68-fba68eababa3", - } - upload_id = request_body["upload_id"] - repo_id = "repo1" - import_report = { - "result": {}, - "error": {}, - "spawned_tasks": [ - { - "_href": "/pulp/api/v2/tasks/7744e2df-39b9-46f0-bb10-feffa2f7014b/", - "task_id": "7744e2df-39b9-46f0-bb10-feffa2f7014b", - } - ], - } - - tasks_report = [ - {"task_id": "7744e2df-39b9-46f0-bb10-feffa2f7014b", "state": "finished"} - ] - - somefile = tmpdir.join("some-file.txt") - somefile.write("I am the file to be uploaded") - - requests_mocker.post( - "https://pulp.example.com/pulp/api/v2/content/uploads/", json=request_body - ) - requests_mocker.put( - "https://pulp.example.com/pulp/api/v2/contents/%s/0" % upload_id, json=[] - ) - requests_mocker.put( - "https://pulp.example.com/pulp/api/v2/contents/%s/20" % upload_id, json=[] - ) - requests_mocker.post( - "https://pulp.example.com/pulp/api/v2/repositories/%s/actions/import_upload/" - % repo_id, - json=import_report, - ) - requests_mocker.delete( - "https://pulp.example.com/pulp/api/v2/content/uploads/%s/" % upload_id, json=[] - ) - requests_mocker.post( - "https://pulp.example.com/pulp/api/v2/tasks/search/", json=tasks_report - ) - - client.upload_one_file(str(somefile), repo_id, "txt", {}) - - messages = caplog.messages - lines = [m.splitlines()[0] for m in messages] - - # 6 reuqests should be made - assert requests_mocker.call_count == 6 - - # task's spwaned and completed - assert "Created Pulp task: 7744e2df-39b9-46f0-bb10-feffa2f7014b" in messages - assert "Pulp task completed: 7744e2df-39b9-46f0-bb10-feffa2f7014b" in messages diff --git a/tests/repository/test_upload.py b/tests/repository/test_upload.py new file mode 100644 index 00000000..5aa0bc66 --- /dev/null +++ b/tests/repository/test_upload.py @@ -0,0 +1,92 @@ +import logging +import pytest +import json + +from pubtools.pulplib import ( + Repository, + FileRepository, + Task, + DetachedException, + TaskFailedException, +) + + +def test_upload_detached(): + """publish raises if called on a detached repo""" + with pytest.raises(DetachedException): + FileRepository(id="some-repo").upload_file("some-file") + + +def test_upload_file(client, requests_mocker, tmpdir, caplog): + """test upload a file to a repo in pulp""" + + logging.getLogger().setLevel(logging.INFO) + caplog.set_level(logging.INFO) + + repo_id = "repo1" + repo = FileRepository(id=repo_id) + repo.__dict__["_client"] = client + + client._CHUNK_SIZE = 20 + + request_body = { + "_href": "/pulp/api/v2/content/uploads/cfb1fed0-752b-439e-aa68-fba68eababa3/", + "upload_id": "cfb1fed0-752b-439e-aa68-fba68eababa3", + } + upload_id = request_body["upload_id"] + + import_report = { + "result": {}, + "error": {}, + "spawned_tasks": [ + { + "_href": "/pulp/api/v2/tasks/7744e2df-39b9-46f0-bb10-feffa2f7014b/", + "task_id": "task1", + } + ], + } + + tasks_report = [{"task_id": "task1", "state": "finished"}] + + somefile = tmpdir.join("some-file.txt") + somefile.write(b"there is some binary data:\x00\x01\x02") + + requests_mocker.post( + "https://pulp.example.com/pulp/api/v2/content/uploads/", json=request_body + ) + requests_mocker.put( + "https://pulp.example.com/pulp/api/v2/contents/%s/0" % upload_id, json=[] + ) + requests_mocker.put( + "https://pulp.example.com/pulp/api/v2/contents/%s/20" % upload_id, json=[] + ) + requests_mocker.post( + "https://pulp.example.com/pulp/api/v2/repositories/%s/actions/import_upload/" + % repo_id, + json=import_report, + ) + requests_mocker.delete( + "https://pulp.example.com/pulp/api/v2/content/uploads/%s/" % upload_id, json=[] + ) + requests_mocker.post( + "https://pulp.example.com/pulp/api/v2/tasks/search/", json=tasks_report + ) + + # delete's done, return [] + assert repo.upload_file(str(somefile)).result() == [] + + # 4th call should be import, check if right unit_key's passed + import_request = requests_mocker.request_history[3].json() + import_unit_key = { + u"name": u"some-file.txt", + u"digest": u"fad3fc1e6d583b2003ec0a5273702ed8fcc2504271c87c40d9176467ebe218cb", + u"size": 29, + } + assert import_request["unit_key"] == import_unit_key + messages = caplog.messages + + # 6 reuqests should be made + assert requests_mocker.call_count == 6 + # task's spwaned and completed + assert "Created Pulp task: task1" in messages + assert "Pulp task completed: task1" in messages From 19db7088cb0dfc31736ab2a5922fbc79e22bd902 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 24 Jul 2019 22:14:33 -0400 Subject: [PATCH 03/11] implement upload to non-blocking style various small fixes will add example and add more test cases if the idea here's OK --- CHANGELOG.md | 4 -- pubtools/pulplib/_impl/client/client.py | 50 +++++++--------- .../pulplib/_impl/model/repository/file.py | 57 +++++++++++-------- tests/repository/test_upload.py | 14 ++--- 4 files changed, 61 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ff528a25..fc7d7e26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,10 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -- n/a - -## [1.2.0] - 2019-07-21 - ###Added - A new API `FileRepository.upload_file` to upload a file to Pulp repository diff --git a/pubtools/pulplib/_impl/client/client.py b/pubtools/pulplib/_impl/client/client.py index 4e583929..4de5423f 100644 --- a/pubtools/pulplib/_impl/client/client.py +++ b/pubtools/pulplib/_impl/client/client.py @@ -181,35 +181,25 @@ def search_repository(self, criteria=None): response_f, lambda data: self._handle_page(Repository, search, data) ) - def _do_upload_file(self, upload_id, repo_id, filename): - tasks_f = f_return([]) - offset = 0 - - def _do_next_part_upload(accumulated_tasks, upload_data): - data, upload_id, offset = upload_data - upload_task_f = self._do_upload(data, upload_id, offset) - - return f_map( - upload_task_f, lambda upload_task: accumulated_tasks + upload_task - ) - - # calculate size and checksum of the file during upload - with open(filename, "rb") as f: - checksum = hashlib.sha256() - size = 0 - data = f.read(self._CHUNK_SIZE) - LOG.info("uploading %s to repo %s", filename, repo_id) - while data: - checksum.update(data) - size += len(data) - next_part_upload = partial( - _do_next_part_upload, upload_data=(data, upload_id, offset) - ) - tasks_f = f_flat_map(tasks_f, next_part_upload) - offset += self._CHUNK_SIZE - data = f.read(self._CHUNK_SIZE) - - return tasks_f, checksum.hexdigest(), size + def _do_upload_file(self, upload_id, repo_id, file_obj): + upload_f = f_return() + + if isinstance(file_obj, str): + file_obj = open(file_obj, 'rb') + + checksum = hashlib.sha256() + size, offset = 0, 0 + data = file_obj.read(self._CHUNK_SIZE) + LOG.info("Uploading %s to repo %s", file_obj, repo_id) + while data: + checksum.update(data) + size += len(data) + upload_f = f_map(upload_f, lambda _: self._do_upload(data, upload_id, offset)) + offset += self._CHUNK_SIZE + data = file_obj.read(self._CHUNK_SIZE) + file_obj.close() + + return upload_f, checksum.hexdigest(), size def _publish_repository(self, repo, distributors_with_config): tasks_f = f_return([]) @@ -368,5 +358,5 @@ def _do_import(self, repo_id, upload_id, unit_type_id, unit_key): def _delete_upload_request(self, upload_id): url = os.path.join(self._url, "pulp/api/v2/content/uploads/%s/" % upload_id) - LOG.debug("Deleting upload request") + LOG.debug("Deleting upload request %s", upload_id) return self._request_executor.submit(self._do_request, method="DELETE", url=url) diff --git a/pubtools/pulplib/_impl/model/repository/file.py b/pubtools/pulplib/_impl/model/repository/file.py index d13234b2..15421ffa 100644 --- a/pubtools/pulplib/_impl/model/repository/file.py +++ b/pubtools/pulplib/_impl/model/repository/file.py @@ -1,5 +1,7 @@ import os +from more_executors.futures import f_flat_map, f_map + from .base import Repository, repo_type from ..attr import pulp_attrib from ..common import DetachedException @@ -24,28 +26,24 @@ class FileRepository(Repository): mutable_urls = attr.ib(default=attr.Factory(lambda: ["PULP_MANIFEST"]), type=list) - def upload_file(self, filename, relative_url=None): + def upload_file(self, file_obj, relative_url=None): """Upload a file to this repository. - The upload operation includes 4 steps: - 1. Request an upload id from pulp - 2. With the requested upload id, upload the file - 3. After the upload's done, import the uploaded file to repository - 4. Delete the upload request - Args: - filename (str) - Path to a local file to upload. + file_obj (str, file object) + If it's a string, then it's the path of file to upload. + Else, it ought to be a file-like object relative_url (str) Path that should be used in remote repository - if omitted, filename will be used. + If omitted, filename will be used. Returns: Future[:class:`~pubtools.pulplib.Task`] - A future which is resolved when publish succeeds. + A future which is resolved when import succeeds. - The future contains the task to delete upload request + The future contains the task to import uploaded content + to repository Raises: DetachedException @@ -54,22 +52,35 @@ def upload_file(self, filename, relative_url=None): if not self._client: raise DetachedException() - path, name = os.path.split(filename.rstrip("/")) - if not relative_url: - relative_url = path + is_path = isinstance(file_obj, str) + if is_path: + if not relative_url: + relative_url = file_obj + elif relative_url.endwith('/'): + path, name = os.path.split(file_obj) + relative_url = os.path.join(relative_url, name) + elif not is_path and not relative_url: + msg = "Must provide relative_url if the file's not from disk" + LOG.exception(msg) + raise ValueError(msg) # request upload id and wait for it upload_id = self._client._request_upload().result()["upload_id"] - # caculate hash/size of the file and upload it to pulp - upload_fs, checksum, size = self._client._do_upload_file( - upload_id, self.id, filename + upload_complete_f, checksum, size = self._client._do_upload_file( + upload_id, self.id, file_obj ) - # we need to wait for upload then it can start importing - upload_fs.result() + unit_key = {"name": relative_url, "digest": checksum, "size": size} - unit_key = {"name": name, "digest": checksum, "size": size} - self._client._do_import(self.id, upload_id, "iso", unit_key).result() + import_complete_f = f_flat_map( + upload_complete_f, + lambda _: self._client._do_import(self.id, upload_id, "iso", unit_key) + ) + + f_map( + import_complete_f, + lambda _: self._client._delete_upload_request(upload_id) + ) - return self._client._delete_upload_request(upload_id) + return import_complete_f diff --git a/tests/repository/test_upload.py b/tests/repository/test_upload.py index 5aa0bc66..cd01d5dc 100644 --- a/tests/repository/test_upload.py +++ b/tests/repository/test_upload.py @@ -12,7 +12,7 @@ def test_upload_detached(): - """publish raises if called on a detached repo""" + """upload_file raises if called on a detached repo""" with pytest.raises(DetachedException): FileRepository(id="some-repo").upload_file("some-file") @@ -40,7 +40,7 @@ def test_upload_file(client, requests_mocker, tmpdir, caplog): "error": {}, "spawned_tasks": [ { - "_href": "/pulp/api/v2/tasks/7744e2df-39b9-46f0-bb10-feffa2f7014b/", + "_href": "/pulp/api/v2/tasks/task1/", "task_id": "task1", } ], @@ -72,21 +72,21 @@ def test_upload_file(client, requests_mocker, tmpdir, caplog): "https://pulp.example.com/pulp/api/v2/tasks/search/", json=tasks_report ) - # delete's done, return [] - assert repo.upload_file(str(somefile)).result() == [] + + assert repo.upload_file(str(somefile)).result() == [Task(id="task1", succeeded=True, completed=True)] # 4th call should be import, check if right unit_key's passed import_request = requests_mocker.request_history[3].json() import_unit_key = { - u"name": u"some-file.txt", + u"name": str(somefile), u"digest": u"fad3fc1e6d583b2003ec0a5273702ed8fcc2504271c87c40d9176467ebe218cb", u"size": 29, } assert import_request["unit_key"] == import_unit_key messages = caplog.messages - # 6 reuqests should be made - assert requests_mocker.call_count == 6 + # 5 reuqests should be made + assert requests_mocker.call_count == 5 # task's spwaned and completed assert "Created Pulp task: task1" in messages assert "Pulp task completed: task1" in messages From 681c11752873da2d5008ea6f74d7f44664083fee Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Fri, 26 Jul 2019 11:13:43 -0400 Subject: [PATCH 04/11] Add example and fix bug in upload_file --- examples/upload-files | 76 +++++++++++++++++++ pubtools/pulplib/_impl/client/client.py | 10 +-- pubtools/pulplib/_impl/client/poller.py | 1 + .../pulplib/_impl/model/repository/file.py | 36 +++++---- tests/repository/test_upload.py | 52 +++++++++---- 5 files changed, 143 insertions(+), 32 deletions(-) create mode 100755 examples/upload-files diff --git a/examples/upload-files b/examples/upload-files new file mode 100755 index 00000000..6744c02b --- /dev/null +++ b/examples/upload-files @@ -0,0 +1,76 @@ +#!/usr/bin/env python +import os +import logging +from argparse import ArgumentParser + +from pubtools.pulplib import Criteria, Client + +log = logging.getLogger("upload") + + +def upload(client, path, repo_id): + crit = Criteria.with_id(repo_id) + repo = client.search_repository(crit).result().data[0] + + uploads = [] + if os.path.isdir(path): + for file in os.listdir(path): + if os.path.isfile(file): + file = os.path.join(path, file) + log.debug("Uploading %s to repo %s in threads", file, repo_id) + uploads.append(repo.upload_file(file)) + elif os.path.isfile(path): + log.debug("Uploading %s to repo %s", path, repo_id) + uploads.append(repo.upload_file(path)) + + for up in uploads: + up.result() + + log.info("Uploaded %s files to repository %s", len(uploads), repo_id) + + +def make_client(args): + auth = None + + if args.username: + password = args.password + if password is None: + password = os.environ.get("PULP_PASSWORD") + if not password: + log.warning("No password provided for %s", args.username) + auth = (args.username, args.password) + + return Client(args.url, auth=auth) + + +def main(): + log.setLevel(logging.INFO) + logging.basicConfig(format="%(message)s", level=logging.INFO) + + parser = ArgumentParser(description="Upload files to Repository") + parser.add_argument("--url", help="Pulp server URL") + parser.add_argument("--username", help="Pulp username") + parser.add_argument( + "--password", help="Pulp password (or set PULP_PASSWORD in env)" + ) + parser.add_argument("--debug", action="store_true") + parser.add_argument("--repo-id", action="store") + parser.add_argument("path", action="store", help="Path to a file or a directory") + + p = parser.parse_args() + + if not p.url: + parser.error("--url is required") + + if not p.repo_id: + parser.error("--repo-id is required") + + if p.debug: + logging.getLogger("pubtools.pulplib").setLevel(logging.DEBUG) + + client = make_client(p) + return upload(client, p.path, p.repo_id) + + +if __name__ == "__main__": + main() diff --git a/pubtools/pulplib/_impl/client/client.py b/pubtools/pulplib/_impl/client/client.py index 4de5423f..b86ebd2c 100644 --- a/pubtools/pulplib/_impl/client/client.py +++ b/pubtools/pulplib/_impl/client/client.py @@ -6,7 +6,7 @@ import requests from more_executors import Executors -from more_executors.futures import f_map, f_flat_map, f_return +from more_executors.futures import f_map, f_flat_map, f_return, f_sequence from ..page import Page from ..criteria import Criteria @@ -182,10 +182,10 @@ def search_repository(self, criteria=None): ) def _do_upload_file(self, upload_id, repo_id, file_obj): - upload_f = f_return() + upload_fts = [] if isinstance(file_obj, str): - file_obj = open(file_obj, 'rb') + file_obj = open(file_obj, "rb") checksum = hashlib.sha256() size, offset = 0, 0 @@ -194,12 +194,12 @@ def _do_upload_file(self, upload_id, repo_id, file_obj): while data: checksum.update(data) size += len(data) - upload_f = f_map(upload_f, lambda _: self._do_upload(data, upload_id, offset)) + upload_fts.append(self._do_upload(data, upload_id, offset)) offset += self._CHUNK_SIZE data = file_obj.read(self._CHUNK_SIZE) file_obj.close() - return upload_f, checksum.hexdigest(), size + return f_sequence(upload_fts), checksum.hexdigest(), size def _publish_repository(self, repo, distributors_with_config): tasks_f = f_return([]) diff --git a/pubtools/pulplib/_impl/client/poller.py b/pubtools/pulplib/_impl/client/poller.py index e37b0ace..a045327f 100644 --- a/pubtools/pulplib/_impl/client/poller.py +++ b/pubtools/pulplib/_impl/client/poller.py @@ -147,6 +147,7 @@ def search_tasks(self, task_ids): def gather_descriptor_tasks(self, descriptors): descriptor_tasks = [] all_tasks = [] + for descriptor in descriptors: result = descriptor.result try: diff --git a/pubtools/pulplib/_impl/model/repository/file.py b/pubtools/pulplib/_impl/model/repository/file.py index 15421ffa..b4fb4394 100644 --- a/pubtools/pulplib/_impl/model/repository/file.py +++ b/pubtools/pulplib/_impl/model/repository/file.py @@ -1,4 +1,5 @@ import os +import logging from more_executors.futures import f_flat_map, f_map @@ -8,6 +9,9 @@ from ... import compat_attr as attr +LOG = logging.getLogger("pubtools.pulplib") + + @repo_type("iso-repo") @attr.s(kw_only=True, frozen=True) class FileRepository(Repository): @@ -52,17 +56,7 @@ def upload_file(self, file_obj, relative_url=None): if not self._client: raise DetachedException() - is_path = isinstance(file_obj, str) - if is_path: - if not relative_url: - relative_url = file_obj - elif relative_url.endwith('/'): - path, name = os.path.split(file_obj) - relative_url = os.path.join(relative_url, name) - elif not is_path and not relative_url: - msg = "Must provide relative_url if the file's not from disk" - LOG.exception(msg) - raise ValueError(msg) + relative_url = self._get_relative_url(file_obj, relative_url) # request upload id and wait for it upload_id = self._client._request_upload().result()["upload_id"] @@ -75,12 +69,26 @@ def upload_file(self, file_obj, relative_url=None): import_complete_f = f_flat_map( upload_complete_f, - lambda _: self._client._do_import(self.id, upload_id, "iso", unit_key) + lambda _: self._client._do_import(self.id, upload_id, "iso", unit_key), ) f_map( - import_complete_f, - lambda _: self._client._delete_upload_request(upload_id) + import_complete_f, lambda _: self._client._delete_upload_request(upload_id) ) return import_complete_f + + def _get_relative_url(self, file_obj, relative_url): + is_path = isinstance(file_obj, str) + if is_path: + if not relative_url: + relative_url = file_obj + elif relative_url.endswith("/"): + _, name = os.path.split(file_obj) + relative_url = os.path.join(relative_url, name) + elif not is_path and (not relative_url or relative_url.endswith("/")): + msg = "Must provide complete relative_url if the file's not from disk" + LOG.exception(msg) + raise ValueError(msg) + + return relative_url diff --git a/tests/repository/test_upload.py b/tests/repository/test_upload.py index cd01d5dc..d26b0dd1 100644 --- a/tests/repository/test_upload.py +++ b/tests/repository/test_upload.py @@ -1,6 +1,7 @@ import logging import pytest import json +import io from pubtools.pulplib import ( Repository, @@ -38,12 +39,7 @@ def test_upload_file(client, requests_mocker, tmpdir, caplog): import_report = { "result": {}, "error": {}, - "spawned_tasks": [ - { - "_href": "/pulp/api/v2/tasks/task1/", - "task_id": "task1", - } - ], + "spawned_tasks": [{"_href": "/pulp/api/v2/tasks/task1/", "task_id": "task1"}], } tasks_report = [{"task_id": "task1", "state": "finished"}] @@ -65,15 +61,19 @@ def test_upload_file(client, requests_mocker, tmpdir, caplog): % repo_id, json=import_report, ) - requests_mocker.delete( - "https://pulp.example.com/pulp/api/v2/content/uploads/%s/" % upload_id, json=[] - ) requests_mocker.post( "https://pulp.example.com/pulp/api/v2/tasks/search/", json=tasks_report ) + requests_mocker.delete( + "https://pulp.example.com/pulp/api/v2/content/uploads/%s/" % upload_id, json=[] + ) + assert repo.upload_file(str(somefile)).result() == [ + Task(id="task1", succeeded=True, completed=True) + ] - assert repo.upload_file(str(somefile)).result() == [Task(id="task1", succeeded=True, completed=True)] + # it' possible the delete has been called while doing assertion + assert requests_mocker.call_count == 5 or 6 # 4th call should be import, check if right unit_key's passed import_request = requests_mocker.request_history[3].json() @@ -83,10 +83,36 @@ def test_upload_file(client, requests_mocker, tmpdir, caplog): u"size": 29, } assert import_request["unit_key"] == import_unit_key - messages = caplog.messages - # 5 reuqests should be made - assert requests_mocker.call_count == 5 + messages = caplog.messages # task's spwaned and completed assert "Created Pulp task: task1" in messages assert "Pulp task completed: task1" in messages + + +@pytest.mark.parametrize( + "relative_url,expected", + [ + ("some/path/", "some/path/some-file.txt"), + ("some/path/foo.txt", "some/path/foo.txt"), + ], +) +def test_get_relative_url(tmpdir, relative_url, expected): + somefile = tmpdir.join("some-file.txt") + repo = FileRepository(id="some-repo") + result = repo._get_relative_url(str(somefile), relative_url) + + assert result == expected + + +def test_get_relative_url_with_file_object(): + repo = FileRepository(id="some-repo") + file_obj = io.StringIO() + + with pytest.raises(ValueError): + repo._get_relative_url(file_obj, None) + + with pytest.raises(ValueError): + repo._get_relative_url(file_obj, "some/path/") + + assert repo._get_relative_url(file_obj, "path/foo.txt") == "path/foo.txt" From f5cdc2a66f10ed81be55c952a3dda12ea28ac417 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Tue, 30 Jul 2019 16:30:58 -0400 Subject: [PATCH 05/11] Include small changes per Rohan and implement upload in non-blocking style with upload by chunk in sequence --- CHANGELOG.md | 2 +- examples/upload-files | 11 ++--- pubtools/pulplib/_impl/client/client.py | 41 ++++++++++--------- .../pulplib/_impl/model/repository/file.py | 40 +++++++++++------- 4 files changed, 55 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc7d7e26..92d0689a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -###Added +### Added - A new API `FileRepository.upload_file` to upload a file to Pulp repository ## [1.1.0] - 2019-07-03 diff --git a/examples/upload-files b/examples/upload-files index 6744c02b..689c8a53 100755 --- a/examples/upload-files +++ b/examples/upload-files @@ -3,14 +3,13 @@ import os import logging from argparse import ArgumentParser -from pubtools.pulplib import Criteria, Client +from pubtools.pulplib import Client log = logging.getLogger("upload") def upload(client, path, repo_id): - crit = Criteria.with_id(repo_id) - repo = client.search_repository(crit).result().data[0] + repo = client.get_repository(repo_id).result() uploads = [] if os.path.isdir(path): @@ -24,7 +23,8 @@ def upload(client, path, repo_id): uploads.append(repo.upload_file(path)) for up in uploads: - up.result() + result = up.result() + log.debug("Import task finished:\n", result) log.info("Uploaded %s files to repository %s", len(uploads), repo_id) @@ -54,7 +54,7 @@ def main(): "--password", help="Pulp password (or set PULP_PASSWORD in env)" ) parser.add_argument("--debug", action="store_true") - parser.add_argument("--repo-id", action="store") + parser.add_argument("repo-id", action="store") parser.add_argument("path", action="store", help="Path to a file or a directory") p = parser.parse_args() @@ -67,6 +67,7 @@ def main(): if p.debug: logging.getLogger("pubtools.pulplib").setLevel(logging.DEBUG) + log.setLevel(logging.DEBUG) client = make_client(p) return upload(client, p.path, p.repo_id) diff --git a/pubtools/pulplib/_impl/client/client.py b/pubtools/pulplib/_impl/client/client.py index b86ebd2c..72a20e59 100644 --- a/pubtools/pulplib/_impl/client/client.py +++ b/pubtools/pulplib/_impl/client/client.py @@ -2,11 +2,12 @@ import logging import threading import hashlib +import six from functools import partial import requests from more_executors import Executors -from more_executors.futures import f_map, f_flat_map, f_return, f_sequence +from more_executors.futures import f_map, f_flat_map, f_return from ..page import Page from ..criteria import Criteria @@ -181,25 +182,27 @@ def search_repository(self, criteria=None): response_f, lambda data: self._handle_page(Repository, search, data) ) - def _do_upload_file(self, upload_id, repo_id, file_obj): - upload_fts = [] - - if isinstance(file_obj, str): - file_obj = open(file_obj, "rb") - - checksum = hashlib.sha256() - size, offset = 0, 0 - data = file_obj.read(self._CHUNK_SIZE) - LOG.info("Uploading %s to repo %s", file_obj, repo_id) - while data: - checksum.update(data) - size += len(data) - upload_fts.append(self._do_upload(data, upload_id, offset)) - offset += self._CHUNK_SIZE + def _do_upload_file(self, upload_id, file_obj): + def do_next_upload(checksum, size): data = file_obj.read(self._CHUNK_SIZE) - file_obj.close() + if data: + checksum.update(data) + return f_flat_map( + self._do_upload(data, upload_id, size), + lambda _: do_next_upload(checksum, size + len(data)), + ) + else: + # nothing more to upload, return checksum and size + if is_string: + file_obj.close() + return f_return((checksum.hexdigest(), size)) + + is_string = isinstance(file_obj, six.string_types) + if is_string: + file_obj = open(file_obj, "rb") - return f_sequence(upload_fts), checksum.hexdigest(), size + LOG.info("Uploading %s to Pulp", file_obj) + return f_flat_map(f_return(), lambda _: do_next_upload(hashlib.sha256(), 0)) def _publish_repository(self, repo, distributors_with_config): tasks_f = f_return([]) @@ -350,7 +353,7 @@ def _do_import(self, repo_id, upload_id, unit_type_id, unit_key): "unit_key": unit_key, } - LOG.debug("Importing contents to repo %s", repo_id) + LOG.debug("Importing contents to repo %s with %s", repo_id, upload_id) return self._task_executor.submit( self._do_request, method="POST", url=url, json=body ) diff --git a/pubtools/pulplib/_impl/model/repository/file.py b/pubtools/pulplib/_impl/model/repository/file.py index b4fb4394..1f5b937b 100644 --- a/pubtools/pulplib/_impl/model/repository/file.py +++ b/pubtools/pulplib/_impl/model/repository/file.py @@ -1,5 +1,6 @@ import os import logging +import six from more_executors.futures import f_flat_map, f_map @@ -36,22 +37,33 @@ def upload_file(self, file_obj, relative_url=None): Args: file_obj (str, file object) If it's a string, then it's the path of file to upload. - Else, it ought to be a file-like object + Else, it ought to be a file-like object, see: + https://docs.python.org/3/glossary.html#term-file-object relative_url (str) - Path that should be used in remote repository + Path that should be used in remote repository, can either + be a path to a directory or a path to a file, e.g: + - if relative_url is 'foo/bar' and file_obj has name 'f.txt', + the result remote path wll be 'foo/bar/f.txt'. + - if relative_url is 'foo/bar/f.txt', no matter what the + name of file_obj is, the remote path is 'foo/bar/f.txt'. + + If omitted, the local name of the file will be used. Or, + if file_obj is a file object without a `name` attribute, + passing `relative_url` is mandatory. - If omitted, filename will be used. Returns: Future[:class:`~pubtools.pulplib.Task`] A future which is resolved when import succeeds. The future contains the task to import uploaded content - to repository + to repository. Raises: DetachedException If this instance is not attached to a Pulp client. + + .. versionadded:: 1.2.0 """ if not self._client: raise DetachedException() @@ -61,15 +73,16 @@ def upload_file(self, file_obj, relative_url=None): # request upload id and wait for it upload_id = self._client._request_upload().result()["upload_id"] - upload_complete_f, checksum, size = self._client._do_upload_file( - upload_id, self.id, file_obj - ) - - unit_key = {"name": relative_url, "digest": checksum, "size": size} + upload_complete_f = self._client._do_upload_file(upload_id, file_obj) import_complete_f = f_flat_map( upload_complete_f, - lambda _: self._client._do_import(self.id, upload_id, "iso", unit_key), + lambda upload: self._client._do_import( + self.id, + upload_id, + "iso", + {"name": relative_url, "digest": upload[0], "size": upload[1]}, + ), ) f_map( @@ -79,7 +92,7 @@ def upload_file(self, file_obj, relative_url=None): return import_complete_f def _get_relative_url(self, file_obj, relative_url): - is_path = isinstance(file_obj, str) + is_path = isinstance(file_obj, six.string_types) if is_path: if not relative_url: relative_url = file_obj @@ -87,8 +100,7 @@ def _get_relative_url(self, file_obj, relative_url): _, name = os.path.split(file_obj) relative_url = os.path.join(relative_url, name) elif not is_path and (not relative_url or relative_url.endswith("/")): - msg = "Must provide complete relative_url if the file's not from disk" - LOG.exception(msg) - raise ValueError(msg) + msg = "%s is missing a name attribute and relative_url was not provided" + raise ValueError(msg % file_obj) return relative_url From 7dd742e7adcf4d63867bce6fdb39823cfb0e3dc2 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Wed, 31 Jul 2019 12:16:34 -0400 Subject: [PATCH 06/11] Small fixes from Rohan's suggestion and add fake content --- examples/upload-files | 9 ++- pubtools/pulplib/_impl/client/client.py | 24 ++++---- pubtools/pulplib/_impl/fake/client.py | 35 +++++++++++ pubtools/pulplib/_impl/fake/controller.py | 15 +++++ .../pulplib/_impl/model/repository/file.py | 19 +++--- tests/fake/test_fake_publish.py | 61 ++++++++++++++++++- 6 files changed, 138 insertions(+), 25 deletions(-) diff --git a/examples/upload-files b/examples/upload-files index 689c8a53..bcf0264e 100755 --- a/examples/upload-files +++ b/examples/upload-files @@ -24,7 +24,7 @@ def upload(client, path, repo_id): for up in uploads: result = up.result() - log.debug("Import task finished:\n", result) + log.debug("Import task finished:\n%s", result) log.info("Uploaded %s files to repository %s", len(uploads), repo_id) @@ -49,13 +49,13 @@ def main(): parser = ArgumentParser(description="Upload files to Repository") parser.add_argument("--url", help="Pulp server URL") + parser.add_argument("--repo-id", action="store") + parser.add_argument("--path", action="store", help="Path to a file or a directory") parser.add_argument("--username", help="Pulp username") parser.add_argument( "--password", help="Pulp password (or set PULP_PASSWORD in env)" ) parser.add_argument("--debug", action="store_true") - parser.add_argument("repo-id", action="store") - parser.add_argument("path", action="store", help="Path to a file or a directory") p = parser.parse_args() @@ -65,6 +65,9 @@ def main(): if not p.repo_id: parser.error("--repo-id is required") + if not p.path: + parser.error("--path is required") + if p.debug: logging.getLogger("pubtools.pulplib").setLevel(logging.DEBUG) log.setLevel(logging.DEBUG) diff --git a/pubtools/pulplib/_impl/client/client.py b/pubtools/pulplib/_impl/client/client.py index 72a20e59..d63674ac 100644 --- a/pubtools/pulplib/_impl/client/client.py +++ b/pubtools/pulplib/_impl/client/client.py @@ -2,7 +2,6 @@ import logging import threading import hashlib -import six from functools import partial import requests @@ -182,7 +181,7 @@ def search_repository(self, criteria=None): response_f, lambda data: self._handle_page(Repository, search, data) ) - def _do_upload_file(self, upload_id, file_obj): + def _do_upload_file(self, upload_id, file_obj, name): def do_next_upload(checksum, size): data = file_obj.read(self._CHUNK_SIZE) if data: @@ -191,18 +190,19 @@ def do_next_upload(checksum, size): self._do_upload(data, upload_id, size), lambda _: do_next_upload(checksum, size + len(data)), ) - else: - # nothing more to upload, return checksum and size - if is_string: - file_obj.close() - return f_return((checksum.hexdigest(), size)) - - is_string = isinstance(file_obj, six.string_types) - if is_string: + # nothing more to upload, return checksum and size + return f_return((checksum.hexdigest(), size)) + + is_file_object = "close" in dir(file_obj) + if not is_file_object: file_obj = open(file_obj, "rb") - LOG.info("Uploading %s to Pulp", file_obj) - return f_flat_map(f_return(), lambda _: do_next_upload(hashlib.sha256(), 0)) + LOG.info("Uploading %s to Pulp", name) + upload_f = f_flat_map(f_return(), lambda _: do_next_upload(hashlib.sha256(), 0)) + + if not is_file_object: + upload_f.add_done_callback(lambda _: file_obj.close()) + return upload_f def _publish_repository(self, repo, distributors_with_config): tasks_f = f_return([]) diff --git a/pubtools/pulplib/_impl/fake/client.py b/pubtools/pulplib/_impl/fake/client.py index 049e31a0..874a1162 100644 --- a/pubtools/pulplib/_impl/fake/client.py +++ b/pubtools/pulplib/_impl/fake/client.py @@ -1,6 +1,7 @@ import random import uuid import threading +import hashlib from collections import namedtuple @@ -13,6 +14,7 @@ from .match import match_object Publish = namedtuple("Publish", ["repository", "tasks"]) +Upload = namedtuple("Upload", ["repo_id", "tasks"]) class FakeClient(object): @@ -28,6 +30,7 @@ class FakeClient(object): def __init__(self): self._repositories = [] self._publish_history = [] + self._upload_history = [] self._lock = threading.RLock() self._uuidgen = random.Random() self._uuidgen.seed(0) @@ -79,6 +82,35 @@ def get_repository(self, repository_id): return f_return(data[0]) + def _do_upload_file(self, upload_id, file_obj, name): + # pylint: disable=unused-argument + out = f_return((hashlib.sha256().hexdigest(), random.randint(10, 1000))) + + if "close" not in dir(file_obj): + file_obj = open(file_obj, "rb") + out.add_done_callback(lambda _: file_obj.close()) + + return out + + def _request_upload(self): + upload_request = { + "_href": "/pulp/api/v2/content/uploads/%s/" % self._request_id(), + "upload_id": "%s" % self._request_id, + } + + return f_return(upload_request) + + def _do_import(self, repo_id, upload_id, unit_type_id, unit_key): + # pylint: disable=unused-argument + task = Task(id=self._next_task_id(), completed=True, succeeded=True) + + self._upload_history.append(Upload(repo_id, [task])) + + return f_return([task]) + + def _delete_upload_request(self): + return f_return() # pragma: no cover + def _delete_resource(self, resource_type, resource_id): if resource_type == "repositories": return self._delete_repository(resource_id) @@ -128,3 +160,6 @@ def _next_task_id(self): with self._lock: next_raw_id = self._uuidgen.randint(0, 2 ** 128) return str(uuid.UUID(int=next_raw_id)) + + def _request_id(self): + return self._next_task_id() diff --git a/pubtools/pulplib/_impl/fake/controller.py b/pubtools/pulplib/_impl/fake/controller.py index 1d7a783d..40cd7c43 100644 --- a/pubtools/pulplib/_impl/fake/controller.py +++ b/pubtools/pulplib/_impl/fake/controller.py @@ -79,3 +79,18 @@ def publish_history(self): of this publish """ return self.client._publish_history[:] + + @property + def upload_history(self): + """A list of upload tasks triggered via this client. + + Each element of this list is a named tuple with the following attributes, + in order: + + ``repository``: + :class:`~pubtools.pulplib.Repository` for which upload was triggered + ``tasks``: + list of :class:`~pubtools.pulplib.Task` generated as a result + of this upload + """ + return self.client._upload_history[:] diff --git a/pubtools/pulplib/_impl/model/repository/file.py b/pubtools/pulplib/_impl/model/repository/file.py index 1f5b937b..68e1aef7 100644 --- a/pubtools/pulplib/_impl/model/repository/file.py +++ b/pubtools/pulplib/_impl/model/repository/file.py @@ -1,6 +1,5 @@ import os import logging -import six from more_executors.futures import f_flat_map, f_map @@ -37,14 +36,15 @@ def upload_file(self, file_obj, relative_url=None): Args: file_obj (str, file object) If it's a string, then it's the path of file to upload. - Else, it ought to be a file-like object, see: - https://docs.python.org/3/glossary.html#term-file-object + Else, it ought to be a + `file-like object `_. + relative_url (str) Path that should be used in remote repository, can either be a path to a directory or a path to a file, e.g: - - if relative_url is 'foo/bar' and file_obj has name 'f.txt', - the result remote path wll be 'foo/bar/f.txt'. + - if relative_url is 'foo/bar/' and file_obj has name 'f.txt', + the resulting remote path wll be 'foo/bar/f.txt'. - if relative_url is 'foo/bar/f.txt', no matter what the name of file_obj is, the remote path is 'foo/bar/f.txt'. @@ -69,11 +69,12 @@ def upload_file(self, file_obj, relative_url=None): raise DetachedException() relative_url = self._get_relative_url(file_obj, relative_url) + name = os.path.basename(relative_url) # request upload id and wait for it upload_id = self._client._request_upload().result()["upload_id"] - upload_complete_f = self._client._do_upload_file(upload_id, file_obj) + upload_complete_f = self._client._do_upload_file(upload_id, file_obj, name) import_complete_f = f_flat_map( upload_complete_f, @@ -92,14 +93,14 @@ def upload_file(self, file_obj, relative_url=None): return import_complete_f def _get_relative_url(self, file_obj, relative_url): - is_path = isinstance(file_obj, six.string_types) - if is_path: + is_file_object = "close" in dir(file_obj) + if not is_file_object: if not relative_url: relative_url = file_obj elif relative_url.endswith("/"): _, name = os.path.split(file_obj) relative_url = os.path.join(relative_url, name) - elif not is_path and (not relative_url or relative_url.endswith("/")): + elif is_file_object and (not relative_url or relative_url.endswith("/")): msg = "%s is missing a name attribute and relative_url was not provided" raise ValueError(msg % file_obj) diff --git a/tests/fake/test_fake_publish.py b/tests/fake/test_fake_publish.py index 8f823fd8..ebf0aafa 100644 --- a/tests/fake/test_fake_publish.py +++ b/tests/fake/test_fake_publish.py @@ -1,4 +1,14 @@ -from pubtools.pulplib import FakeController, YumRepository, Distributor, PulpException +import sys + +import pytest + +from pubtools.pulplib import ( + FakeController, + YumRepository, + FileRepository, + Distributor, + PulpException, +) def test_can_publish(): @@ -63,3 +73,52 @@ def test_publish_absent_raises(): exception = repo_copy2.publish().exception() assert isinstance(exception, PulpException) assert "repo1 not found" in str(exception) + + +def test_can_upload(tmpdir): + """repo.upload_file() succeeds with fake client and populates upload_history.""" + controller = FakeController() + + controller.insert_repository(FileRepository(id="repo1")) + + client = controller.client + repo1 = client.get_repository("repo1").result() + + somefile = tmpdir.join("some-file.txt") + somefile.write(b"there is some binary data:\x00\x01\x02") + + upload_f = repo1.upload_file(str(somefile)) + + # The future should resolve successfully + tasks = upload_f.result() + + # It should have returned at least one successful task. + assert tasks + for task in tasks: + assert task.succeeded + + # The change should be reflected in the controller's publish history + history = controller.upload_history + + assert len(history) == 1 + assert history[0].repo_id == "repo1" + assert history[0].tasks == tasks + + +def test_upload_nonexistent_file_raises(): + """repo.upload_file() with nonexistent file fails with fake client""" + controller = FakeController() + + controller.insert_repository(FileRepository(id="repo1")) + + client = controller.client + repo1 = client.get_repository("repo1").result() + + # If file's not found, Python 2 raises IOError and Python 3 raises + # FileNotFoundError. The latter one is not defined in Python 2. + if sys.version_info < (3,): + exception = IOError + else: + exception = FileNotFoundError + with pytest.raises(exception): + upload_f = repo1.upload_file("nonexistent_file") From bcffbf0b431ff1d7f244f86a9d2e9f9bb9e94cef Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Thu, 1 Aug 2019 11:17:42 -0400 Subject: [PATCH 07/11] Small fixes according to Rohan's suggestions --- pubtools/pulplib/_impl/fake/client.py | 33 ++++++-- pubtools/pulplib/_impl/fake/controller.py | 8 +- .../pulplib/_impl/model/repository/file.py | 2 +- tests/fake/test_fake_publish.py | 61 +------------- tests/fake/test_fake_upload.py | 82 +++++++++++++++++++ tests/repository/test_upload.py | 7 +- 6 files changed, 119 insertions(+), 74 deletions(-) create mode 100644 tests/fake/test_fake_upload.py diff --git a/pubtools/pulplib/_impl/fake/client.py b/pubtools/pulplib/_impl/fake/client.py index 874a1162..66449cd3 100644 --- a/pubtools/pulplib/_impl/fake/client.py +++ b/pubtools/pulplib/_impl/fake/client.py @@ -6,7 +6,7 @@ from collections import namedtuple import six -from more_executors.futures import f_return, f_return_error +from more_executors.futures import f_return, f_return_error, f_flat_map from pubtools.pulplib import Page, PulpException, Criteria, Task from .. import compat_attr as attr @@ -14,7 +14,7 @@ from .match import match_object Publish = namedtuple("Publish", ["repository", "tasks"]) -Upload = namedtuple("Upload", ["repo_id", "tasks"]) +Upload = namedtuple("Upload", ["repository", "tasks", "unit_type_id", "unit_key"]) class FakeClient(object): @@ -84,10 +84,21 @@ def get_repository(self, repository_id): def _do_upload_file(self, upload_id, file_obj, name): # pylint: disable=unused-argument - out = f_return((hashlib.sha256().hexdigest(), random.randint(10, 1000))) - - if "close" not in dir(file_obj): + is_file_obj = "close" in dir(file_obj) + if not is_file_obj: file_obj = open(file_obj, "rb") + + def do_next_upload(checksum, size): + data = file_obj.read(1024 * 1024) + if data: + checksum.update(data) + size += len(data) + return do_next_upload(checksum, size) + return f_return((checksum.hexdigest(), size)) + + out = f_flat_map(f_return(), lambda _: do_next_upload(hashlib.sha256(), 0)) + + if not is_file_obj: out.add_done_callback(lambda _: file_obj.close()) return out @@ -102,15 +113,19 @@ def _request_upload(self): def _do_import(self, repo_id, upload_id, unit_type_id, unit_key): # pylint: disable=unused-argument + repo_f = self.get_repository(repo_id) + if repo_f.exception(): + # Repo can't be found, let that exception propagate + return repo_f + + repo = repo_f.result() + task = Task(id=self._next_task_id(), completed=True, succeeded=True) - self._upload_history.append(Upload(repo_id, [task])) + self._upload_history.append(Upload(repo, [task], unit_type_id, unit_key)) return f_return([task]) - def _delete_upload_request(self): - return f_return() # pragma: no cover - def _delete_resource(self, resource_type, resource_id): if resource_type == "repositories": return self._delete_repository(resource_id) diff --git a/pubtools/pulplib/_impl/fake/controller.py b/pubtools/pulplib/_impl/fake/controller.py index 40cd7c43..7a54bde4 100644 --- a/pubtools/pulplib/_impl/fake/controller.py +++ b/pubtools/pulplib/_impl/fake/controller.py @@ -89,8 +89,12 @@ def upload_history(self): ``repository``: :class:`~pubtools.pulplib.Repository` for which upload was triggered - ``tasks``: - list of :class:`~pubtools.pulplib.Task` generated as a result + ``task``: + :class:`~pubtools.pulplib.Task` generated as a result of this upload + ``unit_type_id`` (str): + A string used to indicate the type of uploaded content + ``unit_key`` (dictionary): + A dictionary includes information about this upload """ return self.client._upload_history[:] diff --git a/pubtools/pulplib/_impl/model/repository/file.py b/pubtools/pulplib/_impl/model/repository/file.py index 68e1aef7..9b852f39 100644 --- a/pubtools/pulplib/_impl/model/repository/file.py +++ b/pubtools/pulplib/_impl/model/repository/file.py @@ -53,7 +53,7 @@ def upload_file(self, file_obj, relative_url=None): passing `relative_url` is mandatory. Returns: - Future[:class:`~pubtools.pulplib.Task`] + Future[list of :class:`~pubtools.pulplib.Task`] A future which is resolved when import succeeds. The future contains the task to import uploaded content diff --git a/tests/fake/test_fake_publish.py b/tests/fake/test_fake_publish.py index ebf0aafa..8f823fd8 100644 --- a/tests/fake/test_fake_publish.py +++ b/tests/fake/test_fake_publish.py @@ -1,14 +1,4 @@ -import sys - -import pytest - -from pubtools.pulplib import ( - FakeController, - YumRepository, - FileRepository, - Distributor, - PulpException, -) +from pubtools.pulplib import FakeController, YumRepository, Distributor, PulpException def test_can_publish(): @@ -73,52 +63,3 @@ def test_publish_absent_raises(): exception = repo_copy2.publish().exception() assert isinstance(exception, PulpException) assert "repo1 not found" in str(exception) - - -def test_can_upload(tmpdir): - """repo.upload_file() succeeds with fake client and populates upload_history.""" - controller = FakeController() - - controller.insert_repository(FileRepository(id="repo1")) - - client = controller.client - repo1 = client.get_repository("repo1").result() - - somefile = tmpdir.join("some-file.txt") - somefile.write(b"there is some binary data:\x00\x01\x02") - - upload_f = repo1.upload_file(str(somefile)) - - # The future should resolve successfully - tasks = upload_f.result() - - # It should have returned at least one successful task. - assert tasks - for task in tasks: - assert task.succeeded - - # The change should be reflected in the controller's publish history - history = controller.upload_history - - assert len(history) == 1 - assert history[0].repo_id == "repo1" - assert history[0].tasks == tasks - - -def test_upload_nonexistent_file_raises(): - """repo.upload_file() with nonexistent file fails with fake client""" - controller = FakeController() - - controller.insert_repository(FileRepository(id="repo1")) - - client = controller.client - repo1 = client.get_repository("repo1").result() - - # If file's not found, Python 2 raises IOError and Python 3 raises - # FileNotFoundError. The latter one is not defined in Python 2. - if sys.version_info < (3,): - exception = IOError - else: - exception = FileNotFoundError - with pytest.raises(exception): - upload_f = repo1.upload_file("nonexistent_file") diff --git a/tests/fake/test_fake_upload.py b/tests/fake/test_fake_upload.py new file mode 100644 index 00000000..a502eebb --- /dev/null +++ b/tests/fake/test_fake_upload.py @@ -0,0 +1,82 @@ +import sys + +import pytest + +from pubtools.pulplib import FakeController, FileRepository, PulpException + + +def test_can_upload(tmpdir): + """repo.upload_file() succeeds with fake client and populates upload_history.""" + controller = FakeController() + + controller.insert_repository(FileRepository(id="repo1")) + + client = controller.client + repo1 = client.get_repository("repo1").result() + + somefile = tmpdir.join("some-file.txt") + somefile.write(b"there is some binary data:\x00\x01\x02") + + upload_f = repo1.upload_file(str(somefile)) + + # The future should resolve successfully + tasks = upload_f.result() + + # The task should be successful. + assert tasks[0].succeeded + + # The change should be reflected in the controller's upload history + history = controller.upload_history + unit_key = { + "name": str(somefile), + "digest": "fad3fc1e6d583b2003ec0a5273702ed8fcc2504271c87c40d9176467ebe218cb", + "size": 29, + } + + assert len(history) == 1 + assert history[0].repository == repo1 + assert history[0].tasks == tasks + assert history[0].unit_type_id == "iso" + assert history[0].unit_key == unit_key + + +def test_upload_nonexistent_file_raises(): + """repo.upload_file() with nonexistent file fails with fake client""" + controller = FakeController() + + controller.insert_repository(FileRepository(id="repo1")) + + client = controller.client + repo1 = client.get_repository("repo1").result() + + # If file's not found, Python 2 raises IOError and Python 3 raises + # FileNotFoundError. The latter one is not defined in Python 2. + if sys.version_info < (3,): + exception = IOError + else: + exception = FileNotFoundError + with pytest.raises(exception): + upload_f = repo1.upload_file("nonexistent_file") + + +def test_upload_repo_absent_raises(tmpdir): + controller = FakeController() + + controller.insert_repository(FileRepository(id="repo1")) + + client = controller.client + repo1 = client.get_repository("repo1").result() + + somefile = tmpdir.join("some-file.txt") + somefile.write(b"there is some binary data:\x00\x01\x02") + + repo_copy1 = client.get_repository("repo1").result() + repo_copy2 = client.get_repository("repo1").result() + + # if repo's deleted + assert repo_copy1.delete().result() + + exception = repo_copy2.upload_file(str(somefile)).exception() + + assert isinstance(exception, PulpException) + assert "repo1 not found" in str(exception) diff --git a/tests/repository/test_upload.py b/tests/repository/test_upload.py index d26b0dd1..d6c1c9d8 100644 --- a/tests/repository/test_upload.py +++ b/tests/repository/test_upload.py @@ -1,4 +1,5 @@ import logging +import time import pytest import json import io @@ -72,8 +73,10 @@ def test_upload_file(client, requests_mocker, tmpdir, caplog): Task(id="task1", succeeded=True, completed=True) ] - # it' possible the delete has been called while doing assertion - assert requests_mocker.call_count == 5 or 6 + # sleep for .1 second to let the whole process finish. + time.sleep(0.1) + + assert requests_mocker.call_count == 6 # 4th call should be import, check if right unit_key's passed import_request = requests_mocker.request_history[3].json() From 49f6443c4eea4585dc0bb7e7cda4cd104d9b5067 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Fri, 2 Aug 2019 10:17:50 -0400 Subject: [PATCH 08/11] Expose name and sha256 instead of unit_type_id and unit_key in fake client --- pubtools/pulplib/_impl/fake/client.py | 6 ++++-- pubtools/pulplib/_impl/fake/controller.py | 8 ++++---- tests/fake/test_fake_upload.py | 10 +++------- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/pubtools/pulplib/_impl/fake/client.py b/pubtools/pulplib/_impl/fake/client.py index 66449cd3..b66b6786 100644 --- a/pubtools/pulplib/_impl/fake/client.py +++ b/pubtools/pulplib/_impl/fake/client.py @@ -14,7 +14,7 @@ from .match import match_object Publish = namedtuple("Publish", ["repository", "tasks"]) -Upload = namedtuple("Upload", ["repository", "tasks", "unit_type_id", "unit_key"]) +Upload = namedtuple("Upload", ["repository", "tasks", "name", "sha256"]) class FakeClient(object): @@ -122,7 +122,9 @@ def _do_import(self, repo_id, upload_id, unit_type_id, unit_key): task = Task(id=self._next_task_id(), completed=True, succeeded=True) - self._upload_history.append(Upload(repo, [task], unit_type_id, unit_key)) + self._upload_history.append( + Upload(repo, [task], unit_key["name"], unit_key["digest"]) + ) return f_return([task]) diff --git a/pubtools/pulplib/_impl/fake/controller.py b/pubtools/pulplib/_impl/fake/controller.py index 7a54bde4..ad3c2068 100644 --- a/pubtools/pulplib/_impl/fake/controller.py +++ b/pubtools/pulplib/_impl/fake/controller.py @@ -92,9 +92,9 @@ def upload_history(self): ``task``: :class:`~pubtools.pulplib.Task` generated as a result of this upload - ``unit_type_id`` (str): - A string used to indicate the type of uploaded content - ``unit_key`` (dictionary): - A dictionary includes information about this upload + ``name`` (str): + the remote path used + ``sha256`` (str): + checksum of the file uploaded """ return self.client._upload_history[:] diff --git a/tests/fake/test_fake_upload.py b/tests/fake/test_fake_upload.py index a502eebb..1c5e024c 100644 --- a/tests/fake/test_fake_upload.py +++ b/tests/fake/test_fake_upload.py @@ -27,17 +27,13 @@ def test_can_upload(tmpdir): # The change should be reflected in the controller's upload history history = controller.upload_history - unit_key = { - "name": str(somefile), - "digest": "fad3fc1e6d583b2003ec0a5273702ed8fcc2504271c87c40d9176467ebe218cb", - "size": 29, - } + digest = "fad3fc1e6d583b2003ec0a5273702ed8fcc2504271c87c40d9176467ebe218cb" assert len(history) == 1 assert history[0].repository == repo1 assert history[0].tasks == tasks - assert history[0].unit_type_id == "iso" - assert history[0].unit_key == unit_key + assert history[0].name == str(somefile) + assert history[0].sha256 == digest def test_upload_nonexistent_file_raises(): From 40299b1344c7b7107d1cbe34ba7bffaae164144c Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Mon, 5 Aug 2019 10:52:09 -0400 Subject: [PATCH 09/11] Small changes per Rohan's suggestions --- pubtools/pulplib/_impl/fake/client.py | 6 +++--- pubtools/pulplib/_impl/fake/controller.py | 4 ++-- tests/repository/test_upload.py | 19 ++++++++++++++----- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/pubtools/pulplib/_impl/fake/client.py b/pubtools/pulplib/_impl/fake/client.py index b66b6786..21ebdeb6 100644 --- a/pubtools/pulplib/_impl/fake/client.py +++ b/pubtools/pulplib/_impl/fake/client.py @@ -105,8 +105,8 @@ def do_next_upload(checksum, size): def _request_upload(self): upload_request = { - "_href": "/pulp/api/v2/content/uploads/%s/" % self._request_id(), - "upload_id": "%s" % self._request_id, + "_href": "/pulp/api/v2/content/uploads/%s/" % self._next_request_id(), + "upload_id": "%s" % self._next_request_id(), } return f_return(upload_request) @@ -178,5 +178,5 @@ def _next_task_id(self): next_raw_id = self._uuidgen.randint(0, 2 ** 128) return str(uuid.UUID(int=next_raw_id)) - def _request_id(self): + def _next_request_id(self): return self._next_task_id() diff --git a/pubtools/pulplib/_impl/fake/controller.py b/pubtools/pulplib/_impl/fake/controller.py index ad3c2068..92ac42fa 100644 --- a/pubtools/pulplib/_impl/fake/controller.py +++ b/pubtools/pulplib/_impl/fake/controller.py @@ -89,8 +89,8 @@ def upload_history(self): ``repository``: :class:`~pubtools.pulplib.Repository` for which upload was triggered - ``task``: - :class:`~pubtools.pulplib.Task` generated as a result + ``tasks``: + list of :class:`~pubtools.pulplib.Task` generated as a result of this upload ``name`` (str): the remote path used diff --git a/tests/repository/test_upload.py b/tests/repository/test_upload.py index d6c1c9d8..c74749f2 100644 --- a/tests/repository/test_upload.py +++ b/tests/repository/test_upload.py @@ -73,10 +73,19 @@ def test_upload_file(client, requests_mocker, tmpdir, caplog): Task(id="task1", succeeded=True, completed=True) ] - # sleep for .1 second to let the whole process finish. - time.sleep(0.1) - - assert requests_mocker.call_count == 6 + # the 6th request call might not be done in time, try 1000 + # times with .01 sec sleep before next try. + for i in range(1000): + time.sleep(0.01) + try: + assert requests_mocker.call_count == 6 + except AssertionError: + if i != 999: + continue + else: + raise + else: + break # 4th call should be import, check if right unit_key's passed import_request = requests_mocker.request_history[3].json() @@ -88,7 +97,7 @@ def test_upload_file(client, requests_mocker, tmpdir, caplog): assert import_request["unit_key"] == import_unit_key messages = caplog.messages - # task's spwaned and completed + # task's spawned and completed assert "Created Pulp task: task1" in messages assert "Pulp task completed: task1" in messages From abe6c4be4d6ffcd1018acc7ec8176aca14001f92 Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Mon, 5 Aug 2019 12:59:59 -0400 Subject: [PATCH 10/11] Set relative_url to basename of local path if relative_url's not provided --- pubtools/pulplib/_impl/model/repository/file.py | 4 ++-- tests/fake/test_fake_upload.py | 2 +- tests/repository/test_upload.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pubtools/pulplib/_impl/model/repository/file.py b/pubtools/pulplib/_impl/model/repository/file.py index 9b852f39..7610bb1c 100644 --- a/pubtools/pulplib/_impl/model/repository/file.py +++ b/pubtools/pulplib/_impl/model/repository/file.py @@ -95,10 +95,10 @@ def upload_file(self, file_obj, relative_url=None): def _get_relative_url(self, file_obj, relative_url): is_file_object = "close" in dir(file_obj) if not is_file_object: + name = os.path.basename(file_obj) if not relative_url: - relative_url = file_obj + relative_url = name elif relative_url.endswith("/"): - _, name = os.path.split(file_obj) relative_url = os.path.join(relative_url, name) elif is_file_object and (not relative_url or relative_url.endswith("/")): msg = "%s is missing a name attribute and relative_url was not provided" diff --git a/tests/fake/test_fake_upload.py b/tests/fake/test_fake_upload.py index 1c5e024c..8f12306e 100644 --- a/tests/fake/test_fake_upload.py +++ b/tests/fake/test_fake_upload.py @@ -32,7 +32,7 @@ def test_can_upload(tmpdir): assert len(history) == 1 assert history[0].repository == repo1 assert history[0].tasks == tasks - assert history[0].name == str(somefile) + assert history[0].name == somefile.basename assert history[0].sha256 == digest diff --git a/tests/repository/test_upload.py b/tests/repository/test_upload.py index c74749f2..55205f10 100644 --- a/tests/repository/test_upload.py +++ b/tests/repository/test_upload.py @@ -90,7 +90,7 @@ def test_upload_file(client, requests_mocker, tmpdir, caplog): # 4th call should be import, check if right unit_key's passed import_request = requests_mocker.request_history[3].json() import_unit_key = { - u"name": str(somefile), + u"name": somefile.basename, u"digest": u"fad3fc1e6d583b2003ec0a5273702ed8fcc2504271c87c40d9176467ebe218cb", u"size": 29, } @@ -117,7 +117,7 @@ def test_get_relative_url(tmpdir, relative_url, expected): assert result == expected -def test_get_relative_url_with_file_object(): +def test_get_relative_url_with_file_object(tmpdir): repo = FileRepository(id="some-repo") file_obj = io.StringIO() From 370ecdb44e085fc8cdfd533a1caee8b014371c2e Mon Sep 17 00:00:00 2001 From: Jay Zhang Date: Tue, 6 Aug 2019 10:05:00 -0400 Subject: [PATCH 11/11] Fix bug in example/upload-files and import request - add verify option to upload-files - fix path error in upload-files - 'digest' should rather be 'checksum' in unit_key in import request - fix url error for upload --- examples/upload-files | 5 +++-- pubtools/pulplib/_impl/client/client.py | 4 ++-- pubtools/pulplib/_impl/fake/client.py | 2 +- pubtools/pulplib/_impl/model/repository/file.py | 2 +- tests/repository/test_upload.py | 8 +++++--- 5 files changed, 12 insertions(+), 9 deletions(-) diff --git a/examples/upload-files b/examples/upload-files index bcf0264e..2d6f8f7f 100755 --- a/examples/upload-files +++ b/examples/upload-files @@ -14,8 +14,8 @@ def upload(client, path, repo_id): uploads = [] if os.path.isdir(path): for file in os.listdir(path): + file = os.path.join(path, file) if os.path.isfile(file): - file = os.path.join(path, file) log.debug("Uploading %s to repo %s in threads", file, repo_id) uploads.append(repo.upload_file(file)) elif os.path.isfile(path): @@ -40,7 +40,7 @@ def make_client(args): log.warning("No password provided for %s", args.username) auth = (args.username, args.password) - return Client(args.url, auth=auth) + return Client(args.url, auth=auth, verify=not args.insecure) def main(): @@ -56,6 +56,7 @@ def main(): "--password", help="Pulp password (or set PULP_PASSWORD in env)" ) parser.add_argument("--debug", action="store_true") + parser.add_argument("--insecure", default=False, action="store_true") p = parser.parse_args() diff --git a/pubtools/pulplib/_impl/client/client.py b/pubtools/pulplib/_impl/client/client.py index d63674ac..5c65e616 100644 --- a/pubtools/pulplib/_impl/client/client.py +++ b/pubtools/pulplib/_impl/client/client.py @@ -335,7 +335,7 @@ def _request_upload(self): def _do_upload(self, data, upload_id, offset): url = os.path.join( - self._url, "pulp/api/v2/contents/%s/%s" % (upload_id, offset) + self._url, "pulp/api/v2/content/uploads/%s/%s/" % (upload_id, offset) ) return self._request_executor.submit( @@ -353,7 +353,7 @@ def _do_import(self, repo_id, upload_id, unit_type_id, unit_key): "unit_key": unit_key, } - LOG.debug("Importing contents to repo %s with %s", repo_id, upload_id) + LOG.debug("Importing contents to repo %s with upload id %s", repo_id, upload_id) return self._task_executor.submit( self._do_request, method="POST", url=url, json=body ) diff --git a/pubtools/pulplib/_impl/fake/client.py b/pubtools/pulplib/_impl/fake/client.py index 21ebdeb6..34ab5755 100644 --- a/pubtools/pulplib/_impl/fake/client.py +++ b/pubtools/pulplib/_impl/fake/client.py @@ -123,7 +123,7 @@ def _do_import(self, repo_id, upload_id, unit_type_id, unit_key): task = Task(id=self._next_task_id(), completed=True, succeeded=True) self._upload_history.append( - Upload(repo, [task], unit_key["name"], unit_key["digest"]) + Upload(repo, [task], unit_key["name"], unit_key["checksum"]) ) return f_return([task]) diff --git a/pubtools/pulplib/_impl/model/repository/file.py b/pubtools/pulplib/_impl/model/repository/file.py index 7610bb1c..5b77ea61 100644 --- a/pubtools/pulplib/_impl/model/repository/file.py +++ b/pubtools/pulplib/_impl/model/repository/file.py @@ -82,7 +82,7 @@ def upload_file(self, file_obj, relative_url=None): self.id, upload_id, "iso", - {"name": relative_url, "digest": upload[0], "size": upload[1]}, + {"name": relative_url, "checksum": upload[0], "size": upload[1]}, ), ) diff --git a/tests/repository/test_upload.py b/tests/repository/test_upload.py index 55205f10..f4bfa66a 100644 --- a/tests/repository/test_upload.py +++ b/tests/repository/test_upload.py @@ -52,10 +52,12 @@ def test_upload_file(client, requests_mocker, tmpdir, caplog): "https://pulp.example.com/pulp/api/v2/content/uploads/", json=request_body ) requests_mocker.put( - "https://pulp.example.com/pulp/api/v2/contents/%s/0" % upload_id, json=[] + "https://pulp.example.com/pulp/api/v2/content/uploads/%s/0/" % upload_id, + json=[], ) requests_mocker.put( - "https://pulp.example.com/pulp/api/v2/contents/%s/20" % upload_id, json=[] + "https://pulp.example.com/pulp/api/v2/content/uploads/%s/20/" % upload_id, + json=[], ) requests_mocker.post( "https://pulp.example.com/pulp/api/v2/repositories/%s/actions/import_upload/" @@ -91,7 +93,7 @@ def test_upload_file(client, requests_mocker, tmpdir, caplog): import_request = requests_mocker.request_history[3].json() import_unit_key = { u"name": somefile.basename, - u"digest": u"fad3fc1e6d583b2003ec0a5273702ed8fcc2504271c87c40d9176467ebe218cb", + u"checksum": u"fad3fc1e6d583b2003ec0a5273702ed8fcc2504271c87c40d9176467ebe218cb", u"size": 29, } assert import_request["unit_key"] == import_unit_key