From 0ae5e715134fe27c12b676ed6a08b7b17cfc17b7 Mon Sep 17 00:00:00 2001 From: T8y8 Date: Sun, 11 Dec 2016 22:11:57 -0800 Subject: [PATCH 1/2] Workbook download now streams contents to file instead of loading all into memory --- tableauserverclient/server/endpoint/endpoint.py | 9 +++++---- .../server/endpoint/workbooks_endpoint.py | 7 +++++-- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tableauserverclient/server/endpoint/endpoint.py b/tableauserverclient/server/endpoint/endpoint.py index c90b91004..e29ab3d82 100644 --- a/tableauserverclient/server/endpoint/endpoint.py +++ b/tableauserverclient/server/endpoint/endpoint.py @@ -21,10 +21,11 @@ def _make_common_headers(auth_token, content_type): return headers - def _make_request(self, method, url, content=None, request_object=None, auth_token=None, content_type=None): + def _make_request(self, method, url, content=None, request_object=None, + auth_token=None, content_type=None, parameters=None): if request_object is not None: url = request_object.apply_query_params(url) - parameters = {} + parameters = parameters or {} parameters.update(self.parent_srv.http_options) parameters['headers'] = Endpoint._make_common_headers(auth_token, content_type) @@ -49,9 +50,9 @@ def _check_status(server_response): def get_unauthenticated_request(self, url, request_object=None): return self._make_request(self.parent_srv.session.get, url, request_object=request_object) - def get_request(self, url, request_object=None): + def get_request(self, url, request_object=None, parameters=None): return self._make_request(self.parent_srv.session.get, url, auth_token=self.parent_srv.auth_token, - request_object=request_object) + request_object=request_object, parameters=parameters) def delete_request(self, url): # We don't return anything for a delete diff --git a/tableauserverclient/server/endpoint/workbooks_endpoint.py b/tableauserverclient/server/endpoint/workbooks_endpoint.py index 6aabc6029..55a7b04cd 100644 --- a/tableauserverclient/server/endpoint/workbooks_endpoint.py +++ b/tableauserverclient/server/endpoint/workbooks_endpoint.py @@ -92,7 +92,7 @@ def download(self, workbook_id, filepath=None): error = "Workbook ID undefined." raise ValueError(error) url = "{0}/{1}/content".format(self.baseurl, workbook_id) - server_response = self.get_request(url) + server_response = self.get_request(url, parameters={"stream": True}) _, params = cgi.parse_header(server_response.headers['Content-Disposition']) filename = os.path.basename(params['filename']) if filepath is None: @@ -101,7 +101,10 @@ def download(self, workbook_id, filepath=None): filepath = os.path.join(filepath, filename) with open(filepath, 'wb') as f: - f.write(server_response.content) + chunk_size = 1024 * 1024 * 4 # 4MB + for chunk in server_response.iter_content(chunk_size): + f.write(chunk) + server_response.close() logger.info('Downloaded workbook to {0} (ID: {1})'.format(filepath, workbook_id)) return os.path.abspath(filepath) From 770630c1df255368efd057f185d984bf3cb5f1ec Mon Sep 17 00:00:00 2001 From: T8y8 Date: Mon, 12 Dec 2016 12:46:51 -0800 Subject: [PATCH 2/2] add datasources too --- .../server/endpoint/datasources_endpoint.py | 23 +++++++++------- .../server/endpoint/workbooks_endpoint.py | 26 +++++++++---------- 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/tableauserverclient/server/endpoint/datasources_endpoint.py b/tableauserverclient/server/endpoint/datasources_endpoint.py index e8e4e4bf6..af8efcd13 100644 --- a/tableauserverclient/server/endpoint/datasources_endpoint.py +++ b/tableauserverclient/server/endpoint/datasources_endpoint.py @@ -6,6 +6,7 @@ import logging import copy import cgi +from contextlib import closing # The maximum size of a file that can be published in a single request is 64MB FILESIZE_LIMIT = 1024 * 1024 * 64 # 64MB @@ -64,16 +65,18 @@ def download(self, datasource_id, filepath=None): error = "Datasource ID undefined." raise ValueError(error) url = "{0}/{1}/content".format(self.baseurl, datasource_id) - server_response = self.get_request(url) - _, params = cgi.parse_header(server_response.headers['Content-Disposition']) - filename = os.path.basename(params['filename']) - if filepath is None: - filepath = filename - elif os.path.isdir(filepath): - filepath = os.path.join(filepath, filename) - - with open(filepath, 'wb') as f: - f.write(server_response.content) + with closing(self.get_request(url, parameters={'stream': True})) as server_response: + _, params = cgi.parse_header(server_response.headers['Content-Disposition']) + filename = os.path.basename(params['filename']) + if filepath is None: + filepath = filename + elif os.path.isdir(filepath): + filepath = os.path.join(filepath, filename) + + with open(filepath, 'wb') as f: + for chunk in server_response.iter_content(1024): # 1KB + f.write(chunk) + logger.info('Downloaded datasource to {0} (ID: {1})'.format(filepath, datasource_id)) return os.path.abspath(filepath) diff --git a/tableauserverclient/server/endpoint/workbooks_endpoint.py b/tableauserverclient/server/endpoint/workbooks_endpoint.py index 55a7b04cd..eb185476e 100644 --- a/tableauserverclient/server/endpoint/workbooks_endpoint.py +++ b/tableauserverclient/server/endpoint/workbooks_endpoint.py @@ -7,6 +7,7 @@ import logging import copy import cgi +from contextlib import closing # The maximum size of a file that can be published in a single request is 64MB FILESIZE_LIMIT = 1024 * 1024 * 64 # 64MB @@ -92,19 +93,18 @@ def download(self, workbook_id, filepath=None): error = "Workbook ID undefined." raise ValueError(error) url = "{0}/{1}/content".format(self.baseurl, workbook_id) - server_response = self.get_request(url, parameters={"stream": True}) - _, params = cgi.parse_header(server_response.headers['Content-Disposition']) - filename = os.path.basename(params['filename']) - if filepath is None: - filepath = filename - elif os.path.isdir(filepath): - filepath = os.path.join(filepath, filename) - - with open(filepath, 'wb') as f: - chunk_size = 1024 * 1024 * 4 # 4MB - for chunk in server_response.iter_content(chunk_size): - f.write(chunk) - server_response.close() + + with closing(self.get_request(url, parameters={"stream": True})) as server_response: + _, params = cgi.parse_header(server_response.headers['Content-Disposition']) + filename = os.path.basename(params['filename']) + if filepath is None: + filepath = filename + elif os.path.isdir(filepath): + filepath = os.path.join(filepath, filename) + + with open(filepath, 'wb') as f: + for chunk in server_response.iter_content(1024): # 1KB + f.write(chunk) logger.info('Downloaded workbook to {0} (ID: {1})'.format(filepath, workbook_id)) return os.path.abspath(filepath)