From 95bb0cad5a5c88d9ebd02c71cd17f145f8dc9542 Mon Sep 17 00:00:00 2001 From: Adrian Vogelsgesang Date: Sat, 18 Sep 2021 22:07:44 +0200 Subject: [PATCH 1/2] Expose the `fileuploads` API endpoint We had at least two independent re-implementations [1, 2] of file uploads within the last 4 months. And this was despite the fact that both projects already used TSC which would offer this functionality. Currently, the upload functionality in TSC is hard to discover as it is not exposed like all other REST functions. Instead of `server.fileuploads`, one has to first create an instance of the (undocumented) `Fileuploads` class. The upload functionality was probably because it should be usually unnecessary: The uploaded files are usually part of publishing a workbook/datasource/... and the corresponding `datasources.publish` (and similar) already take care of the upload internally. However, TSC isn't always up-to-date with new REST APIs, and by exposing file uploads directly we can make sure to offer the best possible experience to users of TSC also in those transition periods. This commit: * turns the `Fileuploads` class into a normal endpoint class which is not tied to one upload (So far, `Fileuploads` was not stateless. Now it is) * adds the endpoint to `server`, such that file uploads are available as `server.fileuploads` * adjusts all other users to use `server.fileuploads` instead of constructing an ad hoc instance of the `Fileuploads` class Documentation will be added in a separate commit. [1] https://github.com/jharris126/tableau-data-update-api-samples/blob/41f51ae4d220de55caf63e91fe9eff5694b9456a/basic/basic_incremental_load.py#L23 [2] https://github.com/tableau/hyper-api-samples/blob/382e66481ec8339407cf9cfa5d41fcdcf3f6a0fb/Community-Supported/clouddb-extractor/tableau_restapi_helpers.py#L165 --- .../server/endpoint/__init__.py | 1 + .../server/endpoint/datasources_endpoint.py | 3 +- .../server/endpoint/fileuploads_endpoint.py | 36 ++++++++----------- .../server/endpoint/flows_endpoint.py | 3 +- .../server/endpoint/workbooks_endpoint.py | 3 +- tableauserverclient/server/server.py | 2 ++ test/test_fileuploads.py | 15 +++----- 7 files changed, 24 insertions(+), 39 deletions(-) diff --git a/tableauserverclient/server/endpoint/__init__.py b/tableauserverclient/server/endpoint/__init__.py index 8653c0254..29fe93299 100644 --- a/tableauserverclient/server/endpoint/__init__.py +++ b/tableauserverclient/server/endpoint/__init__.py @@ -5,6 +5,7 @@ from .databases_endpoint import Databases from .endpoint import Endpoint from .favorites_endpoint import Favorites +from .fileuploads_endpoint import Fileuploads from .flows_endpoint import Flows from .exceptions import ( ServerResponseError, diff --git a/tableauserverclient/server/endpoint/datasources_endpoint.py b/tableauserverclient/server/endpoint/datasources_endpoint.py index 7b80c2b2b..b67332f7d 100644 --- a/tableauserverclient/server/endpoint/datasources_endpoint.py +++ b/tableauserverclient/server/endpoint/datasources_endpoint.py @@ -2,7 +2,6 @@ from .exceptions import InternalServerError, MissingRequiredFieldError from .permissions_endpoint import _PermissionsEndpoint from .dqw_endpoint import _DataQualityWarningEndpoint -from .fileuploads_endpoint import Fileuploads from .resource_tagger import _ResourceTagger from .. import RequestFactory, DatasourceItem, PaginationItem, ConnectionItem from ..query import QuerySet @@ -244,7 +243,7 @@ def publish( # Determine if chunking is required (64MB is the limit for single upload method) if file_size >= FILESIZE_LIMIT: logger.info("Publishing {0} to server with chunking method (datasource over 64MB)".format(filename)) - upload_session_id = Fileuploads.upload_chunks(self.parent_srv, file) + upload_session_id = self.parent_srv.fileuploads.upload(file) url = "{0}&uploadSessionId={1}".format(url, upload_session_id) xml_request, content_type = RequestFactory.Datasource.publish_req_chunked( datasource_item, connection_credentials, connections diff --git a/tableauserverclient/server/endpoint/fileuploads_endpoint.py b/tableauserverclient/server/endpoint/fileuploads_endpoint.py index 05a3ce17c..046406c16 100644 --- a/tableauserverclient/server/endpoint/fileuploads_endpoint.py +++ b/tableauserverclient/server/endpoint/fileuploads_endpoint.py @@ -14,7 +14,6 @@ class Fileuploads(Endpoint): def __init__(self, parent_srv): super(Fileuploads, self).__init__(parent_srv) - self.upload_id = "" @property def baseurl(self): @@ -25,21 +24,18 @@ def initiate(self): url = self.baseurl server_response = self.post_request(url, "") fileupload_item = FileuploadItem.from_response(server_response.content, self.parent_srv.namespace) - self.upload_id = fileupload_item.upload_session_id - logger.info("Initiated file upload session (ID: {0})".format(self.upload_id)) - return self.upload_id + upload_id = fileupload_item.upload_session_id + logger.info("Initiated file upload session (ID: {0})".format(upload_id)) + return upload_id @api(version="2.0") - def append(self, xml_request, content_type): - if not self.upload_id: - error = "File upload session must be initiated first." - raise MissingRequiredFieldError(error) - url = "{0}/{1}".format(self.baseurl, self.upload_id) - server_response = self.put_request(url, xml_request, content_type) - logger.info("Uploading a chunk to session (ID: {0})".format(self.upload_id)) + def append(self, upload_id, data, content_type): + url = "{0}/{1}".format(self.baseurl, upload_id) + server_response = self.put_request(url, data, content_type) + logger.info("Uploading a chunk to session (ID: {0})".format(upload_id)) return FileuploadItem.from_response(server_response.content, self.parent_srv.namespace) - def read_chunks(self, file): + def _read_chunks(self, file): file_opened = False try: file_content = open(file, "rb") @@ -55,15 +51,11 @@ def read_chunks(self, file): break yield chunked_content - @classmethod - def upload_chunks(cls, parent_srv, file): - file_uploader = cls(parent_srv) - upload_id = file_uploader.initiate() - - chunks = file_uploader.read_chunks(file) - for chunk in chunks: - xml_request, content_type = RequestFactory.Fileupload.chunk_req(chunk) - fileupload_item = file_uploader.append(xml_request, content_type) + def upload(self, file): + upload_id = self.initiate() + for chunk in self._read_chunks(file): + request, content_type = RequestFactory.Fileupload.chunk_req(chunk) + fileupload_item = self.append(upload_id, request, content_type) logger.info("\tPublished {0}MB".format(fileupload_item.file_size)) - logger.info("\tCommitting file upload...") + logger.info("File upload finished (ID: {0})".format(upload_id)) return upload_id diff --git a/tableauserverclient/server/endpoint/flows_endpoint.py b/tableauserverclient/server/endpoint/flows_endpoint.py index 475166aad..eb2de4ac9 100644 --- a/tableauserverclient/server/endpoint/flows_endpoint.py +++ b/tableauserverclient/server/endpoint/flows_endpoint.py @@ -2,7 +2,6 @@ from .exceptions import InternalServerError, MissingRequiredFieldError from .permissions_endpoint import _PermissionsEndpoint from .dqw_endpoint import _DataQualityWarningEndpoint -from .fileuploads_endpoint import Fileuploads from .resource_tagger import _ResourceTagger from .. import RequestFactory, FlowItem, PaginationItem, ConnectionItem from ...filesys_helpers import to_filename, make_download_path @@ -169,7 +168,7 @@ def publish(self, flow_item, file_path, mode, connections=None): # Determine if chunking is required (64MB is the limit for single upload method) if os.path.getsize(file_path) >= FILESIZE_LIMIT: logger.info("Publishing {0} to server with chunking method (flow over 64MB)".format(filename)) - upload_session_id = Fileuploads.upload_chunks(self.parent_srv, file_path) + upload_session_id = self.parent_srv.fileuploads.upload(file_path) url = "{0}&uploadSessionId={1}".format(url, upload_session_id) xml_request, content_type = RequestFactory.Flow.publish_req_chunked(flow_item, connections) else: diff --git a/tableauserverclient/server/endpoint/workbooks_endpoint.py b/tableauserverclient/server/endpoint/workbooks_endpoint.py index df14674c6..a3f14c291 100644 --- a/tableauserverclient/server/endpoint/workbooks_endpoint.py +++ b/tableauserverclient/server/endpoint/workbooks_endpoint.py @@ -1,7 +1,6 @@ from .endpoint import QuerysetEndpoint, api, parameter_added_in from .exceptions import InternalServerError, MissingRequiredFieldError from .permissions_endpoint import _PermissionsEndpoint -from .fileuploads_endpoint import Fileuploads from .resource_tagger import _ResourceTagger from .. import RequestFactory, WorkbookItem, ConnectionItem, ViewItem, PaginationItem from ...models.job_item import JobItem @@ -344,7 +343,7 @@ def publish( # Determine if chunking is required (64MB is the limit for single upload method) if file_size >= FILESIZE_LIMIT: logger.info("Publishing {0} to server with chunking method (workbook over 64MB)".format(workbook_item.name)) - upload_session_id = Fileuploads.upload_chunks(self.parent_srv, file) + upload_session_id = self.parent_srv.fileuploads.upload(file) url = "{0}&uploadSessionId={1}".format(url, upload_session_id) conn_creds = connection_credentials xml_request, content_type = RequestFactory.Workbook.publish_req_chunked( diff --git a/tableauserverclient/server/server.py b/tableauserverclient/server/server.py index 057c98877..a20694a92 100644 --- a/tableauserverclient/server/server.py +++ b/tableauserverclient/server/server.py @@ -24,6 +24,7 @@ DataAccelerationReport, Favorites, DataAlerts, + Fileuploads, ) from .endpoint.exceptions import ( EndpointUnavailableError, @@ -82,6 +83,7 @@ def __init__(self, server_address, use_server_version=False): self.webhooks = Webhooks(self) self.data_acceleration_report = DataAccelerationReport(self) self.data_alerts = DataAlerts(self) + self.fileuploads = Fileuploads(self) self._namespace = Namespace() if use_server_version: diff --git a/test/test_fileuploads.py b/test/test_fileuploads.py index 9d115636f..51662e4a2 100644 --- a/test/test_fileuploads.py +++ b/test/test_fileuploads.py @@ -4,7 +4,6 @@ from ._utils import asset from tableauserverclient.server import Server -from tableauserverclient.server.endpoint.fileuploads_endpoint import Fileuploads TEST_ASSET_DIR = os.path.join(os.path.dirname(__file__), 'assets') FILEUPLOAD_INITIALIZE = os.path.join(TEST_ASSET_DIR, 'fileupload_initialize.xml') @@ -22,23 +21,18 @@ def setUp(self): self.baseurl = '{}/sites/{}/fileUploads'.format(self.server.baseurl, self.server.site_id) def test_read_chunks_file_path(self): - fileuploads = Fileuploads(self.server) - file_path = asset('SampleWB.twbx') - chunks = fileuploads.read_chunks(file_path) + chunks = self.server.fileuploads._read_chunks(file_path) for chunk in chunks: self.assertIsNotNone(chunk) def test_read_chunks_file_object(self): - fileuploads = Fileuploads(self.server) - with open(asset('SampleWB.twbx'), 'rb') as f: - chunks = fileuploads.read_chunks(f) + chunks = self.server.fileuploads._read_chunks(f) for chunk in chunks: self.assertIsNotNone(chunk) def test_upload_chunks_file_path(self): - fileuploads = Fileuploads(self.server) file_path = asset('SampleWB.twbx') upload_id = '7720:170fe6b1c1c7422dadff20f944d58a52-1:0' @@ -49,12 +43,11 @@ def test_upload_chunks_file_path(self): with requests_mock.mock() as m: m.post(self.baseurl, text=initialize_response_xml) m.put(self.baseurl + '/' + upload_id, text=append_response_xml) - actual = fileuploads.upload_chunks(self.server, file_path) + actual = self.server.fileuploads.upload(file_path) self.assertEqual(upload_id, actual) def test_upload_chunks_file_object(self): - fileuploads = Fileuploads(self.server) upload_id = '7720:170fe6b1c1c7422dadff20f944d58a52-1:0' with open(asset('SampleWB.twbx'), 'rb') as file_content: @@ -65,6 +58,6 @@ def test_upload_chunks_file_object(self): with requests_mock.mock() as m: m.post(self.baseurl, text=initialize_response_xml) m.put(self.baseurl + '/' + upload_id, text=append_response_xml) - actual = fileuploads.upload_chunks(self.server, file_content) + actual = self.server.fileuploads.upload(file_content) self.assertEqual(upload_id, actual) From 3abe4e94def4bc56fb188e04aa1e51fb71fcae1b Mon Sep 17 00:00:00 2001 From: Adrian Vogelsgesang Date: Thu, 23 Sep 2021 13:17:55 +0200 Subject: [PATCH 2/2] Make `Fileuploads._read_chunks` exception-safe --- .../server/endpoint/fileuploads_endpoint.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tableauserverclient/server/endpoint/fileuploads_endpoint.py b/tableauserverclient/server/endpoint/fileuploads_endpoint.py index 046406c16..b70cffbaa 100644 --- a/tableauserverclient/server/endpoint/fileuploads_endpoint.py +++ b/tableauserverclient/server/endpoint/fileuploads_endpoint.py @@ -43,13 +43,15 @@ def _read_chunks(self, file): except TypeError: file_content = file - while True: - chunked_content = file_content.read(CHUNK_SIZE) - if not chunked_content: - if file_opened: - file_content.close() - break - yield chunked_content + try: + while True: + chunked_content = file_content.read(CHUNK_SIZE) + if not chunked_content: + break + yield chunked_content + finally: + if file_opened: + file_content.close() def upload(self, file): upload_id = self.initiate()