From 1339d645ce58a2e1198b898b9549ba5917b1ff12 Mon Sep 17 00:00:00 2001 From: "John L. Villalovos" Date: Tue, 12 Apr 2022 06:24:51 -0700 Subject: [PATCH] feat: emit a warning when using a `list()` method returns max A common cause of issues filed and questions raised is that a user will call a `list()` method and only get 20 items. As this is the default maximum of items that will be returned from a `list()` method. To help with this we now emit a warning when the result from a `list()` method is greater-than or equal to 20 (or the specified `per_page` value) and the user is not using either `all=True`, `all=False`, `as_list=False`, or `page=X`. --- gitlab/client.py | 62 +++++++++++++-- tests/functional/api/test_gitlab.py | 45 +++++++++++ tests/unit/test_gitlab_http_methods.py | 102 ++++++++++++++++++++++++- 3 files changed, 199 insertions(+), 10 deletions(-) diff --git a/gitlab/client.py b/gitlab/client.py index c6ac0d179..73a0a5c92 100644 --- a/gitlab/client.py +++ b/gitlab/client.py @@ -24,6 +24,7 @@ import requests.utils from requests_toolbelt.multipart.encoder import MultipartEncoder # type: ignore +import gitlab import gitlab.config import gitlab.const import gitlab.exceptions @@ -37,6 +38,12 @@ RETRYABLE_TRANSIENT_ERROR_CODES = [500, 502, 503, 504] + list(range(520, 531)) +# https://docs.gitlab.com/ee/api/#offset-based-pagination +_PAGINATION_URL = ( + f"https://python-gitlab.readthedocs.io/en/v{gitlab.__version__}/" + f"api-usage.html#pagination" +) + class Gitlab: """Represents a GitLab server connection. @@ -826,20 +833,59 @@ def http_list( # In case we want to change the default behavior at some point as_list = True if as_list is None else as_list - get_all = kwargs.pop("all", False) + get_all = kwargs.pop("all", None) url = self._build_url(path) page = kwargs.get("page") - if get_all is True and as_list is True: - return list(GitlabList(self, url, query_data, **kwargs)) + if as_list is False: + # Generator requested + return GitlabList(self, url, query_data, **kwargs) - if page or as_list is True: - # pagination requested, we return a list - return list(GitlabList(self, url, query_data, get_next=False, **kwargs)) + if get_all is True: + return list(GitlabList(self, url, query_data, **kwargs)) - # No pagination, generator requested - return GitlabList(self, url, query_data, **kwargs) + # pagination requested, we return a list + gl_list = GitlabList(self, url, query_data, get_next=False, **kwargs) + items = list(gl_list) + + def should_emit_warning() -> bool: + # No warning is emitted if any of the following conditions apply: + # * `all=False` was set in the `list()` call. + # * `page` was set in the `list()` call. + # * GitLab did not return the `x-per-page` header. + # * Number of items received is less than per-page value. + # * Number of items received is >= total available. + if get_all is False: + return False + if page is not None: + return False + if gl_list.per_page is None: + return False + if len(items) < gl_list.per_page: + return False + if gl_list.total is not None and len(items) >= gl_list.total: + return False + return True + + if not should_emit_warning(): + return items + + # Warn the user that they are only going to retrieve `per_page` + # maximum items. This is a common cause of issues filed. + total_items = "many" if gl_list.total is None else gl_list.total + utils.warn( + message=( + f"Calling a `list()` method without specifying `all=True` or " + f"`as_list=False` will return a maximum of {gl_list.per_page} items. " + f"Your query returned {len(items)} of {total_items} items. See " + f"{_PAGINATION_URL} for more details. If this was done intentionally, " + f"then this warning can be supressed by adding the argument " + f"`all=False` to the `list()` call." + ), + category=UserWarning, + ) + return items def http_post( self, diff --git a/tests/functional/api/test_gitlab.py b/tests/functional/api/test_gitlab.py index 5c8cf854d..4684e433b 100644 --- a/tests/functional/api/test_gitlab.py +++ b/tests/functional/api/test_gitlab.py @@ -1,3 +1,5 @@ +import warnings + import pytest import gitlab @@ -181,3 +183,46 @@ def test_rate_limits(gl): settings.throttle_authenticated_api_enabled = False settings.save() [project.delete() for project in projects] + + +def test_list_default_warning(gl): + """When there are more than 20 items and use default `list()` then warning is + generated""" + with warnings.catch_warnings(record=True) as caught_warnings: + gl.gitlabciymls.list() + assert len(caught_warnings) == 1 + warning = caught_warnings[0] + assert isinstance(warning.message, UserWarning) + message = str(warning.message) + assert "python-gitlab.readthedocs.io" in message + assert __file__ == warning.filename + + +def test_list_page_nowarning(gl): + """Using `page=X` will disable the warning""" + with warnings.catch_warnings(record=True) as caught_warnings: + gl.gitlabciymls.list(page=1) + assert len(caught_warnings) == 0 + + +def test_list_all_false_nowarning(gl): + """Using `all=False` will disable the warning""" + with warnings.catch_warnings(record=True) as caught_warnings: + gl.gitlabciymls.list(all=False) + assert len(caught_warnings) == 0 + + +def test_list_all_true_nowarning(gl): + """Using `all=True` will disable the warning""" + with warnings.catch_warnings(record=True) as caught_warnings: + items = gl.gitlabciymls.list(all=True) + assert len(caught_warnings) == 0 + assert len(items) > 20 + + +def test_list_as_list_false_nowarning(gl): + """Using `as_list=False` will disable the warning""" + with warnings.catch_warnings(record=True) as caught_warnings: + items = gl.gitlabciymls.list(as_list=False) + assert len(caught_warnings) == 0 + assert len(list(items)) > 20 diff --git a/tests/unit/test_gitlab_http_methods.py b/tests/unit/test_gitlab_http_methods.py index ed962153b..8481aee82 100644 --- a/tests/unit/test_gitlab_http_methods.py +++ b/tests/unit/test_gitlab_http_methods.py @@ -1,3 +1,6 @@ +import copy +import warnings + import pytest import requests import responses @@ -425,13 +428,15 @@ def test_list_request(gl): match=MATCH_EMPTY_QUERY_PARAMS, ) - result = gl.http_list("/projects", as_list=True) + with warnings.catch_warnings(record=True) as caught_warnings: + result = gl.http_list("/projects", as_list=True) + assert len(caught_warnings) == 0 assert isinstance(result, list) assert len(result) == 1 result = gl.http_list("/projects", as_list=False) assert isinstance(result, GitlabList) - assert len(result) == 1 + assert len(list(result)) == 1 result = gl.http_list("/projects", all=True) assert isinstance(result, list) @@ -439,6 +444,99 @@ def test_list_request(gl): assert responses.assert_call_count(url, 3) is True +large_list_response = { + "method": responses.GET, + "url": "http://localhost/api/v4/projects", + "json": [ + {"name": "project01"}, + {"name": "project02"}, + {"name": "project03"}, + {"name": "project04"}, + {"name": "project05"}, + {"name": "project06"}, + {"name": "project07"}, + {"name": "project08"}, + {"name": "project09"}, + {"name": "project10"}, + {"name": "project11"}, + {"name": "project12"}, + {"name": "project13"}, + {"name": "project14"}, + {"name": "project15"}, + {"name": "project16"}, + {"name": "project17"}, + {"name": "project18"}, + {"name": "project19"}, + {"name": "project20"}, + ], + "headers": {"X-Total": "30", "x-per-page": "20"}, + "status": 200, + "match": MATCH_EMPTY_QUERY_PARAMS, +} + + +@responses.activate +def test_list_request_pagination_warning(gl): + responses.add(**large_list_response) + + with warnings.catch_warnings(record=True) as caught_warnings: + result = gl.http_list("/projects", as_list=True) + assert len(caught_warnings) == 1 + warning = caught_warnings[0] + assert isinstance(warning.message, UserWarning) + message = str(warning.message) + assert "Calling a `list()` method" in message + assert "python-gitlab.readthedocs.io" in message + assert __file__ == warning.filename + assert isinstance(result, list) + assert len(result) == 20 + assert len(responses.calls) == 1 + + +@responses.activate +def test_list_request_as_list_false_nowarning(gl): + responses.add(**large_list_response) + with warnings.catch_warnings(record=True) as caught_warnings: + result = gl.http_list("/projects", as_list=False) + assert len(caught_warnings) == 0 + assert isinstance(result, GitlabList) + assert len(list(result)) == 20 + assert len(responses.calls) == 1 + + +@responses.activate +def test_list_request_all_true_nowarning(gl): + responses.add(**large_list_response) + with warnings.catch_warnings(record=True) as caught_warnings: + result = gl.http_list("/projects", all=True) + assert len(caught_warnings) == 0 + assert isinstance(result, list) + assert len(result) == 20 + assert len(responses.calls) == 1 + + +@responses.activate +def test_list_request_all_false_nowarning(gl): + responses.add(**large_list_response) + with warnings.catch_warnings(record=True) as caught_warnings: + result = gl.http_list("/projects", all=False) + assert len(caught_warnings) == 0 + assert isinstance(result, list) + assert len(result) == 20 + assert len(responses.calls) == 1 + + +@responses.activate +def test_list_request_page_nowarning(gl): + response_dict = copy.deepcopy(large_list_response) + response_dict["match"] = [responses.matchers.query_param_matcher({"page": "1"})] + responses.add(**response_dict) + with warnings.catch_warnings(record=True) as caught_warnings: + gl.http_list("/projects", page=1) + assert len(caught_warnings) == 0 + assert len(responses.calls) == 1 + + @responses.activate def test_list_request_404(gl): url = "http://localhost/api/v4/not_there"