From 1b9d99963d619988bf6d07a9a17830dee4e2240e Mon Sep 17 00:00:00 2001 From: Russell Hay Date: Mon, 10 Apr 2017 10:34:46 -0700 Subject: [PATCH] auto-sanitize filenames on download --- samples/initialize_server.py | 4 ++-- tableauserverclient/filesys_helpers.py | 6 ++++++ .../server/endpoint/datasources_endpoint.py | 3 ++- .../server/endpoint/workbooks_endpoint.py | 3 ++- test/test_datasource.py | 11 +++++++++++ test/test_workbook.py | 11 +++++++++++ 6 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 tableauserverclient/filesys_helpers.py diff --git a/samples/initialize_server.py b/samples/initialize_server.py index 848159ae6..a3e312ce9 100644 --- a/samples/initialize_server.py +++ b/samples/initialize_server.py @@ -11,7 +11,6 @@ import tableauserverclient as TSC - def main(): parser = argparse.ArgumentParser(description='Initialize a server with content.') parser.add_argument('--server', '-s', required=True, help='server address') @@ -68,7 +67,8 @@ def main(): ################################################################################ # Step 4: Create the project we need only if it doesn't exist ################################################################################ - import time; time.sleep(2) # sad panda...something about eventually consistent model + import time + time.sleep(2) # sad panda...something about eventually consistent model all_projects = TSC.Pager(server_upload.projects) project = next((p for p in all_projects if p.name.lower() == args.project.lower()), None) diff --git a/tableauserverclient/filesys_helpers.py b/tableauserverclient/filesys_helpers.py new file mode 100644 index 000000000..0cf304b32 --- /dev/null +++ b/tableauserverclient/filesys_helpers.py @@ -0,0 +1,6 @@ +ALLOWED_SPECIAL = (' ', '.', '_', '-') + + +def to_filename(string_to_sanitize): + sanitized = (c for c in string_to_sanitize if c.isalnum() or c in ALLOWED_SPECIAL) + return "".join(sanitized) diff --git a/tableauserverclient/server/endpoint/datasources_endpoint.py b/tableauserverclient/server/endpoint/datasources_endpoint.py index 6ab275d3f..549173645 100644 --- a/tableauserverclient/server/endpoint/datasources_endpoint.py +++ b/tableauserverclient/server/endpoint/datasources_endpoint.py @@ -2,6 +2,7 @@ from .exceptions import MissingRequiredFieldError from .fileuploads_endpoint import Fileuploads from .. import RequestFactory, DatasourceItem, PaginationItem, ConnectionItem +from ...filesys_helpers import to_filename import os import logging import copy @@ -77,7 +78,7 @@ def download(self, datasource_id, filepath=None, no_extract=False): with closing(self.get_request(url, parameters={'stream': True})) as server_response: _, params = cgi.parse_header(server_response.headers['Content-Disposition']) - filename = os.path.basename(params['filename']) + filename = to_filename(os.path.basename(params['filename'])) if filepath is None: filepath = filename elif os.path.isdir(filepath): diff --git a/tableauserverclient/server/endpoint/workbooks_endpoint.py b/tableauserverclient/server/endpoint/workbooks_endpoint.py index 850df9f71..4d72f69d0 100644 --- a/tableauserverclient/server/endpoint/workbooks_endpoint.py +++ b/tableauserverclient/server/endpoint/workbooks_endpoint.py @@ -3,6 +3,7 @@ from .fileuploads_endpoint import Fileuploads from .. import RequestFactory, WorkbookItem, ConnectionItem, ViewItem, PaginationItem from ...models.tag_item import TagItem +from ...filesys_helpers import to_filename import os import logging import copy @@ -112,7 +113,7 @@ def download(self, workbook_id, filepath=None, no_extract=False): with closing(self.get_request(url, parameters={"stream": True})) as server_response: _, params = cgi.parse_header(server_response.headers['Content-Disposition']) - filename = os.path.basename(params['filename']) + filename = to_filename(os.path.basename(params['filename'])) if filepath is None: filepath = filename elif os.path.isdir(filepath): diff --git a/test/test_datasource.py b/test/test_datasource.py index ebf17cfe9..a2732dba8 100644 --- a/test/test_datasource.py +++ b/test/test_datasource.py @@ -145,6 +145,17 @@ def test_download(self): self.assertTrue(os.path.exists(file_path)) os.remove(file_path) + def test_download_sanitizes_name(self): + filename = "Name,With,Commas.tds" + disposition = 'name="tableau_workbook"; filename="{}"'.format(filename) + with requests_mock.mock() as m: + m.get(self.baseurl + '/1f951daf-4061-451a-9df1-69a8062664f2/content', + headers={'Content-Disposition': disposition}) + file_path = self.server.datasources.download('1f951daf-4061-451a-9df1-69a8062664f2') + self.assertEqual(os.path.basename(file_path), "NameWithCommas.tds") + self.assertTrue(os.path.exists(file_path)) + os.remove(file_path) + def test_download_extract_only(self): # Pretend we're 2.5 for 'extract_only' self.server.version = "2.5" diff --git a/test/test_workbook.py b/test/test_workbook.py index d276ecea1..0c5ecca1c 100644 --- a/test/test_workbook.py +++ b/test/test_workbook.py @@ -170,6 +170,17 @@ def test_download(self): self.assertTrue(os.path.exists(file_path)) os.remove(file_path) + def test_download_sanitizes_name(self): + filename = "Name,With,Commas.twbx" + disposition = 'name="tableau_workbook"; filename="{}"'.format(filename) + with requests_mock.mock() as m: + m.get(self.baseurl + '/1f951daf-4061-451a-9df1-69a8062664f2/content', + headers={'Content-Disposition': disposition}) + file_path = self.server.workbooks.download('1f951daf-4061-451a-9df1-69a8062664f2') + self.assertEqual(os.path.basename(file_path), "NameWithCommas.twbx") + self.assertTrue(os.path.exists(file_path)) + os.remove(file_path) + def test_download_extract_only(self): # Pretend we're 2.5 for 'extract_only' self.server.version = "2.5"