Skip to content

Commit

Permalink
linkcheck: Fix conversion from UTC time to the UNIX epoch (#11649)
Browse files Browse the repository at this point in the history
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
  • Loading branch information
mitya57 and AA-Turner committed Aug 28, 2023
1 parent 1567281 commit 2f025a4
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 15 deletions.
4 changes: 3 additions & 1 deletion CHANGES
Expand Up @@ -6,6 +6,8 @@ Bugs fixed

* #11618: Fix a regression in the MoveModuleTargets transform,
introduced in #10478 (#9662).
* #11649: linkcheck: Fix conversions from UTC to UNIX time
for timezones west of London.

Release 7.2.3 (released Aug 23, 2023)
=====================================
Expand All @@ -24,7 +26,7 @@ Bugs fixed
when ``autodoc_preserve_defaults`` is ``True``.
* Restore support string methods on path objects.
This is deprecated and will be removed in Sphinx 8.
Use :py:func`os.fspath` to convert :py:class:`~pathlib.Path` objects to strings,
Use :py:func:`os.fspath` to convert :py:class:`~pathlib.Path` objects to strings,
or :py:class:`~pathlib.Path`'s methods to work with path objects.

Release 7.2.2 (released Aug 17, 2023)
Expand Down
9 changes: 3 additions & 6 deletions sphinx/builders/linkcheck.py
Expand Up @@ -7,7 +7,6 @@
import re
import socket
import time
from email.utils import parsedate_tz
from html.parser import HTMLParser
from os import path
from queue import PriorityQueue, Queue
Expand All @@ -29,6 +28,7 @@
red,
turquoise,
)
from sphinx.util.http_date import rfc1123_to_epoch
from sphinx.util.nodes import get_node_line

if TYPE_CHECKING:
Expand Down Expand Up @@ -488,11 +488,8 @@ def limit_rate(self, response_url: str, retry_after: str) -> float | None:
except ValueError:
try:
# An HTTP-date: time of next attempt.
parsed = parsedate_tz(retry_after)
assert parsed is not None
# the 10th element is the GMT offset in seconds
next_check = time.mktime(parsed[:9]) - (parsed[9] or 0)
except (AssertionError, TypeError, ValueError):
next_check = rfc1123_to_epoch(retry_after)
except (ValueError, TypeError):
# TypeError: Invalid date format.
# ValueError: Invalid date, e.g. Oct 52th.
pass
Expand Down
29 changes: 24 additions & 5 deletions sphinx/util/http_date.py
Expand Up @@ -4,7 +4,12 @@
"""

import time
from email.utils import formatdate, parsedate
import warnings
from email.utils import formatdate, parsedate_tz

from sphinx.deprecation import RemovedInSphinx90Warning

_GMT_OFFSET = float(time.localtime().tm_gmtoff)


def epoch_to_rfc1123(epoch: float) -> str:
Expand All @@ -14,7 +19,21 @@ def epoch_to_rfc1123(epoch: float) -> str:

def rfc1123_to_epoch(rfc1123: str) -> float:
"""Return epoch offset from HTTP-date string."""
t = parsedate(rfc1123)
if t:
return time.mktime(t)
raise ValueError
t = parsedate_tz(rfc1123)
if t is None:
raise ValueError
if not rfc1123.endswith(" GMT"):
warnings.warn(
"HTTP-date string does not meet RFC 7231 requirements "
f"(must end with 'GMT'): {rfc1123!r}",
RemovedInSphinx90Warning, stacklevel=3,
)
epoch_secs = time.mktime(time.struct_time(t[:9])) + _GMT_OFFSET
if (gmt_offset := t[9]) != 0:
warnings.warn(
"HTTP-date string does not meet RFC 7231 requirements "
f"(must be GMT time): {rfc1123!r}",
RemovedInSphinx90Warning, stacklevel=3,
)
return epoch_secs - (gmt_offset or 0)
return epoch_secs
19 changes: 16 additions & 3 deletions tests/test_build_linkcheck.py
Expand Up @@ -5,6 +5,7 @@
import http.server
import json
import re
import sys
import textwrap
import time
import wsgiref.handlers
Expand All @@ -16,6 +17,7 @@
import pytest
from urllib3.poolmanager import PoolManager

import sphinx.util.http_date
from sphinx.builders.linkcheck import (
CheckRequest,
Hyperlink,
Expand Down Expand Up @@ -772,11 +774,22 @@ def test_too_many_requests_retry_after_int_delay(app, capsys, status):
)


@pytest.mark.parametrize('tz', [None, 'GMT', 'GMT+3', 'GMT-3'])
@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
def test_too_many_requests_retry_after_HTTP_date(app, capsys):
def test_too_many_requests_retry_after_HTTP_date(tz, app, monkeypatch, capsys):
retry_after = wsgiref.handlers.format_date_time(time.time())
with http_server(make_retry_after_handler([(429, retry_after), (200, None)])):
app.build()

with monkeypatch.context() as m:
if tz is not None:
m.setenv('TZ', tz)
if sys.platform != "win32":
time.tzset()
m.setattr(sphinx.util.http_date, '_GMT_OFFSET',
float(time.localtime().tm_gmtoff))

with http_server(make_retry_after_handler([(429, retry_after), (200, None)])):
app.build()

content = (app.outdir / 'output.json').read_text(encoding='utf8')
assert json.loads(content) == {
"filename": "index.rst",
Expand Down

0 comments on commit 2f025a4

Please sign in to comment.