From d4fe1c3d39bd3381fae3a428f2868cfc93e91f37 Mon Sep 17 00:00:00 2001 From: T8y8 Date: Sun, 30 Oct 2016 20:44:20 -0700 Subject: [PATCH 1/7] Initial Draft of Paging Getter --- tableauserverclient/server/__init__.py | 2 +- tableauserverclient/server/server.py | 39 ++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/tableauserverclient/server/__init__.py b/tableauserverclient/server/__init__.py index a8b78b7fc..4caa5d163 100644 --- a/tableauserverclient/server/__init__.py +++ b/tableauserverclient/server/__init__.py @@ -7,5 +7,5 @@ UserItem, ViewItem, WorkbookItem, NAMESPACE from .endpoint import Auth, Datasources, Endpoint, Groups, Projects, Schedules, \ Sites, Users, Views, Workbooks, ServerResponseError, MissingRequiredFieldError -from .server import Server +from .server import Server, Pager from .exceptions import NotSignedInError diff --git a/tableauserverclient/server/server.py b/tableauserverclient/server/server.py index 5e85063d2..88f86ae3f 100644 --- a/tableauserverclient/server/server.py +++ b/tableauserverclient/server/server.py @@ -1,8 +1,47 @@ from .exceptions import NotSignedInError from .endpoint import Sites, Views, Users, Groups, Workbooks, Datasources, Projects, Auth, Schedules, ServerInfo +from . import RequestOptions + import requests +class Pager(object): + """ This class returns a generator that will iterate over all of the results. + + server is the server object that will be used when calling the callback. It will be passed + to the callback on each iteration + + Callback is expected to take a server object and a request options and return two values, an array of results, + and the pagination item from the current call. This will be used to build subsequent requests. + """ + + def __init__(self, fetcher, opts=None): + self._fetcher = fetcher.get + self._options = opts + + def __call__(self): + current_item_list, last_pagination_item = self._fetcher(self._options) + count = 0 + + while count < last_pagination_item.total_available: + if len(current_item_list) == 0: + current_item_list, last_pagination_item = self._load_next_page(current_item_list, last_pagination_item) + + yield current_item_list.pop(0) + count += 1 + + def __iter__(self): + return self() + + def _load_next_page(self, current_item_list, last_pagination_item): + next_page = last_pagination_item.page_number + 1 + opts = RequestOptions(pagenumber=next_page, pagesize=last_pagination_item.page_size) + if self._options is not None: + opts.sort, opts.filter = self._options.sort, self._options.filter + current_item_list, last_pagination_item = self._fetcher(opts) + return current_item_list, last_pagination_item + + class Server(object): class PublishMode: Append = 'Append' From e2fbec6fc93524f92127d343bdb3e29128c5312b Mon Sep 17 00:00:00 2001 From: T8y8 Date: Mon, 31 Oct 2016 13:01:28 -0700 Subject: [PATCH 2/7] Address feedback on PR, add tests, and clean up some comments. --- tableauserverclient/__init__.py | 2 +- tableauserverclient/server/__init__.py | 3 +- tableauserverclient/server/server.py | 38 ----------- test/assets/workbook_get_page_1.xml | 11 +++ test/assets/workbook_get_page_2.xml | 14 ++++ test/assets/workbook_get_page_3.xml | 10 +++ test/test_pager.py | 92 ++++++++++++++++++++++++++ 7 files changed, 130 insertions(+), 40 deletions(-) create mode 100644 test/assets/workbook_get_page_1.xml create mode 100644 test/assets/workbook_get_page_2.xml create mode 100644 test/assets/workbook_get_page_3.xml create mode 100644 test/test_pager.py diff --git a/tableauserverclient/__init__.py b/tableauserverclient/__init__.py index 107b6b2d9..c7a628d83 100644 --- a/tableauserverclient/__init__.py +++ b/tableauserverclient/__init__.py @@ -4,7 +4,7 @@ SiteItem, TableauAuth, UserItem, ViewItem, WorkbookItem, UnpopulatedPropertyError, \ HourlyInterval, DailyInterval, WeeklyInterval, MonthlyInterval, IntervalItem from .server import RequestOptions, Filter, Sort, Server, ServerResponseError,\ - MissingRequiredFieldError, NotSignedInError + MissingRequiredFieldError, NotSignedInError, Pager __version__ = '0.0.1' __VERSION__ = __version__ diff --git a/tableauserverclient/server/__init__.py b/tableauserverclient/server/__init__.py index 4caa5d163..e74e3cea6 100644 --- a/tableauserverclient/server/__init__.py +++ b/tableauserverclient/server/__init__.py @@ -7,5 +7,6 @@ UserItem, ViewItem, WorkbookItem, NAMESPACE from .endpoint import Auth, Datasources, Endpoint, Groups, Projects, Schedules, \ Sites, Users, Views, Workbooks, ServerResponseError, MissingRequiredFieldError -from .server import Server, Pager +from .server import Server +from .pager import Pager from .exceptions import NotSignedInError diff --git a/tableauserverclient/server/server.py b/tableauserverclient/server/server.py index 88f86ae3f..2cb08a892 100644 --- a/tableauserverclient/server/server.py +++ b/tableauserverclient/server/server.py @@ -1,47 +1,9 @@ from .exceptions import NotSignedInError from .endpoint import Sites, Views, Users, Groups, Workbooks, Datasources, Projects, Auth, Schedules, ServerInfo -from . import RequestOptions import requests -class Pager(object): - """ This class returns a generator that will iterate over all of the results. - - server is the server object that will be used when calling the callback. It will be passed - to the callback on each iteration - - Callback is expected to take a server object and a request options and return two values, an array of results, - and the pagination item from the current call. This will be used to build subsequent requests. - """ - - def __init__(self, fetcher, opts=None): - self._fetcher = fetcher.get - self._options = opts - - def __call__(self): - current_item_list, last_pagination_item = self._fetcher(self._options) - count = 0 - - while count < last_pagination_item.total_available: - if len(current_item_list) == 0: - current_item_list, last_pagination_item = self._load_next_page(current_item_list, last_pagination_item) - - yield current_item_list.pop(0) - count += 1 - - def __iter__(self): - return self() - - def _load_next_page(self, current_item_list, last_pagination_item): - next_page = last_pagination_item.page_number + 1 - opts = RequestOptions(pagenumber=next_page, pagesize=last_pagination_item.page_size) - if self._options is not None: - opts.sort, opts.filter = self._options.sort, self._options.filter - current_item_list, last_pagination_item = self._fetcher(opts) - return current_item_list, last_pagination_item - - class Server(object): class PublishMode: Append = 'Append' diff --git a/test/assets/workbook_get_page_1.xml b/test/assets/workbook_get_page_1.xml new file mode 100644 index 000000000..a5dfdcf89 --- /dev/null +++ b/test/assets/workbook_get_page_1.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/test/assets/workbook_get_page_2.xml b/test/assets/workbook_get_page_2.xml new file mode 100644 index 000000000..456cc1bcf --- /dev/null +++ b/test/assets/workbook_get_page_2.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + diff --git a/test/assets/workbook_get_page_3.xml b/test/assets/workbook_get_page_3.xml new file mode 100644 index 000000000..e2fad1f2b --- /dev/null +++ b/test/assets/workbook_get_page_3.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/test/test_pager.py b/test/test_pager.py new file mode 100644 index 000000000..cd1c1b169 --- /dev/null +++ b/test/test_pager.py @@ -0,0 +1,92 @@ +import unittest +import os +import requests_mock +import tableauserverclient as TSC + +TEST_ASSET_DIR = os.path.join(os.path.dirname(__file__), 'assets') + +GET_XML_PAGE1 = os.path.join(TEST_ASSET_DIR, 'workbook_get_page_1.xml') +GET_XML_PAGE2 = os.path.join(TEST_ASSET_DIR, 'workbook_get_page_2.xml') +GET_XML_PAGE3 = os.path.join(TEST_ASSET_DIR, 'workbook_get_page_3.xml') + + + +class PagerTests(unittest.TestCase): + def setUp(self): + self.server = TSC.Server('http://test') + + # Fake sign in + self.server._site_id = 'dad65087-b08b-4603-af4e-2887b8aafc67' + self.server._auth_token = 'j80k54ll2lfMZ0tv97mlPvvSCRyD0DOM' + + self.baseurl = self.server.workbooks.baseurl + + def test_pager_with_no_options(self): + with open(GET_XML_PAGE1, 'rb') as f: + page_1 = f.read().decode('utf-8') + with open(GET_XML_PAGE2, 'rb') as f: + page_2 = f.read().decode('utf-8') + with open(GET_XML_PAGE3, 'rb') as f: + page_3 = f.read().decode('utf-8') + with requests_mock.mock() as m: + # Register Pager with default request options + m.get(self.baseurl, text=page_1) + + # Register Pager with some pages + m.get(self.baseurl + "?pageNumber=1&pageSize=1", text=page_1) + m.get(self.baseurl + "?pageNumber=2&pageSize=1", text=page_2) + m.get(self.baseurl + "?pageNumber=3&pageSize=1", text=page_3) + + # No options should get all 3 + workbooks = TSC.Pager(self.server.workbooks) + self.assertTrue(len(list(workbooks)) == 3) + + # Let's check that workbook items aren't duplicates + workbooks = TSC.Pager(self.server.workbooks) + wb1, wb2, wb3 = workbooks + self.assertEqual(wb1.name, 'Page1Workbook') + self.assertEqual(wb2.name, 'Page2Workbook') + self.assertEqual(wb3.name, 'Page3Workbook') + + + + def test_pager_with_options(self): + with open(GET_XML_PAGE1, 'rb') as f: + page_1 = f.read().decode('utf-8') + with open(GET_XML_PAGE2, 'rb') as f: + page_2 = f.read().decode('utf-8') + with open(GET_XML_PAGE3, 'rb') as f: + page_3 = f.read().decode('utf-8') + with requests_mock.mock() as m: + # Register Pager with default request options + m.get(self.baseurl, text=page_1) + + # Register Pager with some pages + m.get(self.baseurl + "?pageNumber=1&pageSize=1", text=page_1) + m.get(self.baseurl + "?pageNumber=2&pageSize=1", text=page_2) + m.get(self.baseurl + "?pageNumber=3&pageSize=1", text=page_3) + m.get(self.baseurl + "?pageNumber=1&pageSize=3", text=page_1) + + + # Starting on page 2 should get 2 out of 3 + opts = TSC.RequestOptions(2,1) + workbooks = TSC.Pager(self.server.workbooks, opts) + self.assertTrue(len(list(workbooks)) == 2) + + # Starting on 1 with pagesize of 3 should get all 3 + opts = TSC.RequestOptions(1,3) + workbooks = TSC.Pager(self.server.workbooks, opts) + self.assertTrue(len(list(workbooks)) == 3) + + # Starting on 3 with pagesize of 1 should get the last item + opts = TSC.RequestOptions(3,1) + workbooks = TSC.Pager(self.server.workbooks, opts) + self.assertTrue(len(list(workbooks)) == 1) + + # Starting on 3 with pagesize of 1 should get the last item + opts = TSC.RequestOptions(3,1) + workbooks = list(TSC.Pager(self.server.workbooks, opts)) + self.assertTrue(len(workbooks) == 1) + # Should have the last workbook + wb3 = workbooks.pop() + self.assertEqual(wb3.name, 'Page3Workbook') From e59860e46a50cd9191faefb3c8eb0d376914ef89 Mon Sep 17 00:00:00 2001 From: T8y8 Date: Mon, 31 Oct 2016 13:08:41 -0700 Subject: [PATCH 3/7] Fix tests, pep8 --- test/test_pager.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/test/test_pager.py b/test/test_pager.py index cd1c1b169..9c4632876 100644 --- a/test/test_pager.py +++ b/test/test_pager.py @@ -10,7 +10,6 @@ GET_XML_PAGE3 = os.path.join(TEST_ASSET_DIR, 'workbook_get_page_3.xml') - class PagerTests(unittest.TestCase): def setUp(self): self.server = TSC.Server('http://test') @@ -48,8 +47,6 @@ def test_pager_with_no_options(self): self.assertEqual(wb2.name, 'Page2Workbook') self.assertEqual(wb3.name, 'Page3Workbook') - - def test_pager_with_options(self): with open(GET_XML_PAGE1, 'rb') as f: page_1 = f.read().decode('utf-8') @@ -67,24 +64,25 @@ def test_pager_with_options(self): m.get(self.baseurl + "?pageNumber=3&pageSize=1", text=page_3) m.get(self.baseurl + "?pageNumber=1&pageSize=3", text=page_1) - # Starting on page 2 should get 2 out of 3 - opts = TSC.RequestOptions(2,1) - workbooks = TSC.Pager(self.server.workbooks, opts) - self.assertTrue(len(list(workbooks)) == 2) + opts = TSC.RequestOptions(2, 1) + workbooks = list(TSC.Pager(self.server.workbooks, opts)) + self.assertTrue(len(workbooks) == 2) + wb2, wb3 = workbooks + self.assertEqual(wb2.name, 'Page2Workbook') + self.assertEqual(wb3.name, 'Page3Workbook') # Starting on 1 with pagesize of 3 should get all 3 - opts = TSC.RequestOptions(1,3) - workbooks = TSC.Pager(self.server.workbooks, opts) - self.assertTrue(len(list(workbooks)) == 3) - - # Starting on 3 with pagesize of 1 should get the last item - opts = TSC.RequestOptions(3,1) - workbooks = TSC.Pager(self.server.workbooks, opts) - self.assertTrue(len(list(workbooks)) == 1) + opts = TSC.RequestOptions(1, 3) + workbooks = list(TSC.Pager(self.server.workbooks, opts)) + self.assertTrue(len(workbooks) == 3) + wb1, wb2, wb3 = workbooks + self.assertEqual(wb1.name, 'Page1Workbook') + self.assertEqual(wb2.name, 'Page2Workbook') + self.assertEqual(wb3.name, 'Page3Workbook') # Starting on 3 with pagesize of 1 should get the last item - opts = TSC.RequestOptions(3,1) + opts = TSC.RequestOptions(3, 1) workbooks = list(TSC.Pager(self.server.workbooks, opts)) self.assertTrue(len(workbooks) == 1) # Should have the last workbook From 6d15d32bc5b161181a2cf7733c50b548c1569b79 Mon Sep 17 00:00:00 2001 From: T8y8 Date: Mon, 31 Oct 2016 13:10:14 -0700 Subject: [PATCH 4/7] Wow, Monday --- tableauserverclient/server/pager.py | 43 +++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tableauserverclient/server/pager.py diff --git a/tableauserverclient/server/pager.py b/tableauserverclient/server/pager.py new file mode 100644 index 000000000..eaad398af --- /dev/null +++ b/tableauserverclient/server/pager.py @@ -0,0 +1,43 @@ +from . import RequestOptions + + +class Pager(object): + """ + Generator that takes an endpoint with `.get` and lazily loads items from Server. + Supports all `RequestOptions` including starting on any page. + """ + + def __init__(self, endpoint, request_opts=None): + self._endpoint = endpoint.get + self._options = request_opts + + # If we have options we could be starting on any page, backfill the count + if self._options: + self._count = ((self._options.pagenumber - 1) * self._options.pagesize) + else: + self._count = 0 + + def __iter__(self): + # Fetch the first page + current_item_list, last_pagination_item = self._endpoint(self._options) + + # Get the rest on demand as a generator + while self._count < last_pagination_item.total_available: + if len(current_item_list) == 0: + current_item_list, last_pagination_item = self._load_next_page(last_pagination_item) + + try: + yield current_item_list.pop(0) + self._count += 1 + + except IndexError: + # The total count on Server changed while fetching exit gracefully + raise StopIteration + + def _load_next_page(self, last_pagination_item): + next_page = last_pagination_item.page_number + 1 + opts = RequestOptions(pagenumber=next_page, pagesize=last_pagination_item.page_size) + if self._options is not None: + opts.sort, opts.filter = self._options.sort, self._options.filter + current_item_list, last_pagination_item = self._endpoint(opts) + return current_item_list, last_pagination_item From 080634be3974392cc16c62cad7b4bb1dd85b5bc4 Mon Sep 17 00:00:00 2001 From: T8y8 Date: Mon, 31 Oct 2016 14:02:28 -0700 Subject: [PATCH 5/7] Update Samples and feedback --- samples/pagination_sample.py | 54 ++++++++++++++---------------------- test/test_pager.py | 3 -- 2 files changed, 21 insertions(+), 36 deletions(-) diff --git a/samples/pagination_sample.py b/samples/pagination_sample.py index 7bc5b8dcc..2a5ba8c28 100644 --- a/samples/pagination_sample.py +++ b/samples/pagination_sample.py @@ -17,37 +17,6 @@ import tableauserverclient as TSC -class pagination_generator(object): - """ This class returns a generator that will iterate over all of the results. - - server is the server object that will be used when calling the callback. It will be passed - to the callback on each iteration - - Callback is expected to take a server object and a request options and return two values, an array of results, - and the pagination item from the current call. This will be used to build subsequent requests. - """ - - def __init__(self, fetch_more): - self._fetch_more = fetch_more - - def __call__(self): - current_item_list, last_pagination_item = self._fetch_more(None) # Prime the generator - count = 0 - - while count < last_pagination_item.total_available: - if len(current_item_list) == 0: - current_item_list, last_pagination_item = self._load_next_page(current_item_list, last_pagination_item) - - yield current_item_list.pop(0) - count += 1 - - def _load_next_page(self, current_item_list, last_pagination_item): - next_page = last_pagination_item.page_number + 1 - opts = TSC.RequestOptions(pagenumber=next_page, pagesize=last_pagination_item.page_size) - current_item_list, last_pagination_item = self._fetch_more(opts) - return current_item_list, last_pagination_item - - def main(): parser = argparse.ArgumentParser(description='Return a list of all of the workbooks on your server') @@ -70,10 +39,29 @@ def main(): server = TSC.Server(args.server) with server.auth.sign_in(tableau_auth): - generator = pagination_generator(server.workbooks.get) + + # Pager returns a generator that yields one item at a time fetching + # from Server only when necessary. Pager takes a server Endpoint as its + # first parameter. It will call 'get' on that endpoint. To get workbooks + # pass `server.workbooks`, to get users pass` server.users`, etc + # You can then loop over the generator to get the objects one at a time + # Here we print the workbook id for each workbook + print("Your server contains the following workbooks:\n") - for wb in generator(): + for wb in TSC.Pager(server.workbooks): print(wb.name) + # Pager can also be used in list comprehensions for compactness and easy + # filtering. Here we loop over the Pager and only keep workbooks where the + # name starts with the letter 'a' + # >>> [wb for wb in TSC.Pager(server.workbooks) if wb.name.startswith('a')] + + # Since Pager is a generator it follows the standard conventions and can + # be fed to a list if you really need all the workbooks in memory at once. + # If you need everything, it may be faster to use a larger page size + + # >>> request_options = TSC.RequestOptions(pagesize=1000) + # >>> all_workbooks = list(TSC.Pager(server.workbooks, request_options)) + if __name__ == '__main__': main() diff --git a/test/test_pager.py b/test/test_pager.py index 9c4632876..22b925474 100644 --- a/test/test_pager.py +++ b/test/test_pager.py @@ -55,9 +55,6 @@ def test_pager_with_options(self): with open(GET_XML_PAGE3, 'rb') as f: page_3 = f.read().decode('utf-8') with requests_mock.mock() as m: - # Register Pager with default request options - m.get(self.baseurl, text=page_1) - # Register Pager with some pages m.get(self.baseurl + "?pageNumber=1&pageSize=1", text=page_1) m.get(self.baseurl + "?pageNumber=2&pageSize=1", text=page_2) From 738e490759718d93eeea147471eb555e1f68c79b Mon Sep 17 00:00:00 2001 From: T8y8 Date: Mon, 31 Oct 2016 14:08:47 -0700 Subject: [PATCH 6/7] final comment tweaks --- samples/pagination_sample.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/samples/pagination_sample.py b/samples/pagination_sample.py index 2a5ba8c28..882fc85ad 100644 --- a/samples/pagination_sample.py +++ b/samples/pagination_sample.py @@ -51,10 +51,13 @@ def main(): for wb in TSC.Pager(server.workbooks): print(wb.name) - # Pager can also be used in list comprehensions for compactness and easy - # filtering. Here we loop over the Pager and only keep workbooks where the - # name starts with the letter 'a' - # >>> [wb for wb in TSC.Pager(server.workbooks) if wb.name.startswith('a')] + # Pager can also be used in list comprehensions or generator expressions + # for compactness and easy filtering. Generator expressions will use less + # memory than list comprehsnsions. Consult the Python laguage documentation for + # best practices on which are best for your use case. Here we loop over the + # Pager and only keep workbooks where the name starts with the letter 'a' + # >>> [wb for wb in TSC.Pager(server.workbooks) if wb.name.startswith('a')] # List Comprehension + # >>> (wb for wb in TSC.Pager(server.workbooks) if wb.name.startswith('a')) # Generator Expression # Since Pager is a generator it follows the standard conventions and can # be fed to a list if you really need all the workbooks in memory at once. From 55d102cd12b1d88bde456e0d646013c85be1d47e Mon Sep 17 00:00:00 2001 From: T8y8 Date: Mon, 31 Oct 2016 14:20:31 -0700 Subject: [PATCH 7/7] final bit --- test/test_pager.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/test/test_pager.py b/test/test_pager.py index 22b925474..e3cec1ce8 100644 --- a/test/test_pager.py +++ b/test/test_pager.py @@ -37,11 +37,10 @@ def test_pager_with_no_options(self): m.get(self.baseurl + "?pageNumber=3&pageSize=1", text=page_3) # No options should get all 3 - workbooks = TSC.Pager(self.server.workbooks) - self.assertTrue(len(list(workbooks)) == 3) + workbooks = list(TSC.Pager(self.server.workbooks)) + self.assertTrue(len(workbooks) == 3) # Let's check that workbook items aren't duplicates - workbooks = TSC.Pager(self.server.workbooks) wb1, wb2, wb3 = workbooks self.assertEqual(wb1.name, 'Page1Workbook') self.assertEqual(wb2.name, 'Page2Workbook') @@ -65,6 +64,8 @@ def test_pager_with_options(self): opts = TSC.RequestOptions(2, 1) workbooks = list(TSC.Pager(self.server.workbooks, opts)) self.assertTrue(len(workbooks) == 2) + + # Check that the workbooks are the 2 we think they should be wb2, wb3 = workbooks self.assertEqual(wb2.name, 'Page2Workbook') self.assertEqual(wb3.name, 'Page3Workbook')