Skip to content

Commit

Permalink
A bit more cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
JWCook committed Sep 13, 2021
1 parent 9781cdc commit e0e7be9
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 26 deletions.
49 changes: 24 additions & 25 deletions pyinaturalist/paginator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,24 @@
logger = getLogger(__name__)


# TODO: support autocomplete pseudo-pagination?
class Paginator(Iterable, AsyncIterable, Generic[T]):
"""Class to handle pagination of API requests, with async support
Args:
request_function: API request function to paginate
model: Model class to use for results
method: Pagination method; either 'page', 'id', or 'autocomplete' (see below)
method: Pagination method; either 'page' or 'id' (see note below)
limit: Maximum number of results to fetch
per_page: Maximum number of results to fetch per page
kwargs: Original request parameters
Note on pagination by ID, from the iNaturalist documentation:
_'The large size of the observations index prevents us from supporting the page parameter when
retrieving records from large result sets. If you need to retrieve large numbers of records,
use the ``per_page`` and ``id_above`` or ``id_below`` parameters instead.'_
.. note::
Note on pagination by ID, from the iNaturalist documentation:
*The large size of the observations index prevents us from supporting the page parameter
when retrieving records from large result sets. If you need to retrieve large numbers of
records, use the ``per_page`` and ``id_above`` or ``id_below`` parameters instead.*
"""

Expand Down Expand Up @@ -72,8 +73,6 @@ def __init__(
# Set initial pagination params based on pagination method
self.kwargs.pop('page', None)
self.kwargs.pop('per_page', None)
# if self.method == 'autocomplete':
# return paginate_autocomplete(self.request_function, **self.kwargs)
if self.method == 'id':
self.kwargs['order_by'] = 'id'
self.kwargs['order'] = 'asc'
Expand Down Expand Up @@ -104,9 +103,8 @@ def count(self) -> int:
Either the total number of results, if the endpoint provides pagination info, or ``-1``
"""
if self.total_results is None:
count_response = self.request_function(
*self.request_args, **{**self.kwargs, 'per_page': 0}
)
kwargs = {**self.kwargs, 'per_page': 0}
count_response = self.request_function(*self.request_args, **kwargs)
self.total_results = int(count_response['total_results'])
return self.total_results

Expand All @@ -127,28 +125,29 @@ def next_page(self) -> List[ResponseResult]:

# Note: For id-based pagination, only the first page's 'total_results' is accurate
if self.total_results is None:
self.total_results = response.get('total_results', -1)
self.total_results = response.get('total_results', len(results))
self.results_fetched += len(results)
self._update_next_page_params(results)

# If this is the first of multiple requests, log the estimated time and number of requests
if self.results_fetched == len(results) and not self.exhausted:
self._estimate()
return results

# Set params for next request, if there are more results
# Some endpoints (like get_observation_fields) don't return total_results
# Also check page size, in case total_results is off (race condition, outdated index, etc.)
def _update_next_page_params(self, page_results):
"""Set params for next request, if there are more results. Also check page size, in case
total_results is off due to race condition, outdated index, etc.
"""
if (
(self.limit and self.results_fetched >= self.limit)
or (self.total_results and self.results_fetched >= self.total_results)
or len(results) == 0
or len(page_results) == 0
):
self.exhausted = True
elif self.method == 'id':
self.kwargs['id_above'] = page_results[-1]['id']
else:
if self.method == 'id':
self.kwargs['id_above'] = results[-1]['id']
else:
self.kwargs['page'] += 1

# If this is the first of multiple requests, log the estimated time and number of requests
if self.results_fetched == len(results) and not self.exhausted:
self._estimate()
return results
self.kwargs['page'] += 1

def _estimate(self):
"""Log the estimated total number of requests and rate-limiting delay, and show a warning if
Expand Down
4 changes: 3 additions & 1 deletion test/test_paginator.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ def test_count(requests_mock):

paginator = Paginator(get_observations, Observation, q='asdf')
assert paginator.count() == 50
assert paginator.total_results == 50

# Subsequent calls should use the previously saved value
assert paginator.count() == paginator.total_results == 50


def test_next_page__exhausted():
Expand Down

0 comments on commit e0e7be9

Please sign in to comment.