Skip to content

Commit

Permalink
Update logging package usage
Browse files Browse the repository at this point in the history
+ Log errors as extra on page request retries rather than logging upon
  construction
+ Add light logging for result limit and page bounds logic
+ Add logging example to README

Logging messages could be improved, and there may be an opportunity to
offer a better logging config. It seems `basicConfig`s just don't log
the extra fields.
  • Loading branch information
lukasschwab committed Apr 17, 2021
1 parent bb625a2 commit 347327a
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 39 deletions.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,19 @@ for result in big_slow_client.get(arxiv.Search(query="quantum")):
print(result.title)
```

#### Example: logging

To inspect this package's network behavior and API logic, configure an `INFO`-level logger.

```python
>>> import logging, arxiv
>>> logging.basicConfig(level=logging.INFO)
>>> paper = next(arxiv.Search(id_list=["1605.08386v1"]).get()) # Logs:
INFO:arxiv.arxiv:Requesting 100 results at offset 0
INFO:arxiv.arxiv:Requesting page of results
INFO:arxiv.arxiv:Got first page; 1 of inf results available
```

## Contributors

<a href="https://github.com/lukasschwab/arxiv.py/graphs/contributors">
Expand Down
30 changes: 20 additions & 10 deletions arxiv/arxiv.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,10 @@ def get(self, search: Search) -> Generator[Result, None, None]:
first_page = True
while offset < total_results:
page_size = min(self.page_size, search.max_results - offset)
logger.info("Requesting {} results at offset {}".format(
page_size,
offset,
))
page_url = self._format_url(search, offset, page_size)
feed = self._parse_feed(page_url, first_page)
if first_page:
Expand All @@ -318,12 +322,17 @@ def get(self, search: Search) -> Generator[Result, None, None]:
# bug is fixed, we can remove this conditional and always set
# `total_results = min(...)`.
if len(feed.entries) == 0:
logger.info("Got empty results; stopping generation")
total_results = 0
else:
total_results = min(
total_results,
int(feed.feed.opensearch_totalresults)
)
logger.info("Got first page; {} of {} results available".format(
total_results,
search.max_results
))
# Subsequent pages are not the first page.
first_page = False
# Update offset for next request: account for received results.
Expand Down Expand Up @@ -372,7 +381,12 @@ def _parse_feed(
# self.delay_seconds seconds have passed since last call. Fetch results.
err = None
for retry in range(self.num_retries):
logger.info("Requesting feed", extra={'retry': retry, 'url': url})
logger.info("Requesting page of results", extra={
'url': url,
'first_page': first_page,
'retry': retry,
'last_err': err.message if err is not None else None,
})
feed = feedparser.parse(url)
self._last_request_dt = datetime.now()
if feed.status != 200:
Expand All @@ -381,7 +395,8 @@ def _parse_feed(
err = UnexpectedEmptyPageError(url, retry)
else:
return feed
# Raise the last exception encountered.
# Feed was never returned in self.num_retries tries. Raise the last
# exception encountered.
raise err


Expand All @@ -390,10 +405,10 @@ class ArxivError(Exception):
"""The feed URL that could not be fetched."""
message: str
"""Message explaining what went wrong."""
def __init__(self, url, message, extra={}):
def __init__(self, url, message):
self.url = url
self.message = message
logger.warning(self.message, extra=extra)
# logger.info(self.message, extra=extra)
super().__init__(self.message)


Expand All @@ -408,11 +423,7 @@ class UnexpectedEmptyPageError(ArxivError):
def __init__(self, url: str, retry: int):
self.url = url
self.retry = retry
super().__init__(
url,
"Page of results was unexpectedly empty",
extra={'retry': self.retry, 'url': self.url}
)
super().__init__(url, "Page of results was unexpectedly empty")


class HTTPError(ArxivError):
Expand All @@ -428,5 +439,4 @@ def __init__(self, url: str, retry: int, status: int):
super().__init__(
url,
"Page request resulted in HTTP {}".format(self.status),
extra={'status': self.status, 'retry': self.retry, 'url': self.url}
)
Loading

0 comments on commit 347327a

Please sign in to comment.