Skip to content

Commit

Permalink
Implements http retries
Browse files Browse the repository at this point in the history
  • Loading branch information
ndejong committed Feb 9, 2024
1 parent 6e931be commit 9affbb2
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 20 deletions.
7 changes: 4 additions & 3 deletions src/hibp_downloader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@

LOGGER_NAME = "hibp-downloader"
PWNEDPASSWORDS_API_URL = "https://api.pwnedpasswords.com"
HTTPX_TIMEOUT_SECONDS = 30
HTTP_TIMEOUT_DEFAULT = 30 # seconds
HTTP_MAX_RETRIES_DEFAULT = 5
LOCAL_CACHE_TTL_DEFAULT = 12 * 3600
MULTIPROCESSING_PROCESSES_DEFAULT = int(cpu_count() if cpu_count() else 4) # type: ignore[arg-type]
MULTIPROCESSING_PREFIXES_CHUNK_SIZE = 10
APPROX_GZIP_BYTES_PER_HASH = 20.674
MULTIPROCESSING_PREFIXES_CHUNK_SIZE_DEFAULT = 8
APPROX_GZIP_BYTES_PER_HASH = 20.674 # manually computed based on data-review
LOGGING_INFO_EVENT_MODULUS = 25

# encoding_type
Expand Down
39 changes: 34 additions & 5 deletions src/hibp_downloader/commands/hibp_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
APPROX_GZIP_BYTES_PER_HASH,
ENCODING_TYPE,
HELP_EPILOG_FOOTER,
HTTP_MAX_RETRIES_DEFAULT,
HTTP_TIMEOUT_DEFAULT,
LOCAL_CACHE_TTL_DEFAULT,
LOGGER_NAME,
LOGGING_INFO_EVENT_MODULUS,
MULTIPROCESSING_PREFIXES_CHUNK_SIZE,
MULTIPROCESSING_PREFIXES_CHUNK_SIZE_DEFAULT,
MULTIPROCESSING_PROCESSES_DEFAULT,
PWNEDPASSWORDS_API_URL,
app_context,
Expand Down Expand Up @@ -74,7 +76,7 @@ def main(
typer.Option(
help="Number of hash-prefixes to consume (asynchronously) per iteration per process",
),
] = MULTIPROCESSING_PREFIXES_CHUNK_SIZE,
] = MULTIPROCESSING_PREFIXES_CHUNK_SIZE_DEFAULT,
force: Annotated[
bool, typer.Option("--force", help="Same as setting --local_cache_ttl=0 and --ignore-etag")
] = False,
Expand All @@ -88,6 +90,14 @@ def main(
"prevents requesting the same data twice in this period"
),
] = LOCAL_CACHE_TTL_DEFAULT,
http_timeout: Annotated[
int,
typer.Option(help="HTTP timeout (seconds) per request"),
] = HTTP_TIMEOUT_DEFAULT,
http_max_retries: Annotated[
int,
typer.Option(help="Maximum number of HTTP request retries on request failure"),
] = HTTP_MAX_RETRIES_DEFAULT,
):
"""
Download new pwned password hash data from HIBP and update the local --data-path data storage path; use [bold cyan]download --help[/bold cyan] for more.
Expand Down Expand Up @@ -122,6 +132,9 @@ def main(
encoding_type=ENCODING_TYPE,
ignore_etag=ignore_etag,
local_cache_ttl=local_cache_ttl,
http_timeout=http_timeout,
http_max_retries=http_max_retries,
http_debug=False,
)

worker_processes = start_worker_processes(
Expand Down Expand Up @@ -193,6 +206,9 @@ async def pwnedpasswords_get_and_store_async(
data_path: Path,
metadata_path: Path,
encoding_type: str,
http_timeout: int,
http_max_retries: int,
http_debug: bool,
ignore_etag: bool,
local_cache_ttl: int,
worker_index: int,
Expand All @@ -202,6 +218,7 @@ async def pwnedpasswords_get_and_store_async(

logger_.debug(
f"{worker_index=} {prefix=} hash_type='{hash_type.value}' {encoding_type=} "
f"{http_timeout=} {http_max_retries=} {http_debug=}"
f"{ignore_etag=} {local_cache_ttl=} start_timestamp={str(start_timestamp)}"
)

Expand Down Expand Up @@ -234,7 +251,15 @@ async def pwnedpasswords_get_and_store_async(
etag = metadata_existing.etag

# download with etag setting
binary, metadata_latest = await pwnedpasswords_get(prefix, hash_type=hash_type, etag=etag, encoding=encoding_type)
binary, metadata_latest = await pwnedpasswords_get(
prefix,
hash_type=hash_type,
etag=etag,
encoding=encoding_type,
http_timeout=http_timeout,
http_max_retires=http_max_retries,
http_debug=http_debug,
)
metadata_latest.start_timestamp = start_timestamp

metadata = metadata_existing
Expand Down Expand Up @@ -269,14 +294,18 @@ async def pwnedpasswords_get(
hash_type: HashType,
etag: Union[str, None],
encoding: str,
httpx_debug: bool = False,
http_max_retires: int,
http_timeout: int,
http_debug: bool,
):
url = f"{PWNEDPASSWORDS_API_URL}/range/{prefix}"
if hash_type == HashType.ntlm:
url += "?mode=ntlm"

try:
response = await httpx_binary_response(url=url, etag=etag, encoding=encoding, debug=httpx_debug)
response = await httpx_binary_response(
url=url, etag=etag, encoding=encoding, debug=http_debug, max_retries=http_max_retires, timeout=http_timeout
)
except HibpDownloaderException:
return None, PrefixMetadata(prefix=prefix, data_source=PrefixMetadataDataSource.unknown_source_status)

Expand Down
20 changes: 15 additions & 5 deletions src/hibp_downloader/lib/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import httpx

from hibp_downloader import HTTPX_TIMEOUT_SECONDS, LOGGER_NAME, __title__, __version__
from hibp_downloader import LOGGER_NAME, __title__, __version__
from hibp_downloader.exceptions import HibpDownloaderException
from hibp_downloader.lib.logger import logger_get

Expand All @@ -24,7 +24,9 @@ async def httpx_debug_response(response):
)


async def httpx_binary_response(url, etag=None, method="GET", encoding="gzip", debug=False):
async def httpx_binary_response(
url, etag=None, method="GET", encoding="gzip", timeout=10, max_retries=3, __attempt=0, debug=False
):
event_hooks = {}
if debug:
event_hooks["request"] = [httpx_debug_request]
Expand All @@ -43,7 +45,7 @@ async def httpx_binary_response(url, etag=None, method="GET", encoding="gzip", d
httpx_client = {
"headers": headers,
"http2": True,
"timeout": HTTPX_TIMEOUT_SECONDS,
"timeout": timeout,
"follow_redirects": False,
"trust_env": False,
}
Expand All @@ -52,14 +54,22 @@ async def httpx_binary_response(url, etag=None, method="GET", encoding="gzip", d
httpx_client["event_hooks"] = event_hooks

if __TESTING_RANDOM_ERROR_INJECT_RATE and __TESTING_RANDOM_ERROR_INJECT_RATE > random.random():
url = url.replace("http", "broken")
url = url.replace("http", "BR0KEN")
logger.warning(f"Testing, creating broken URL {url}")

async with httpx.AsyncClient(**httpx_client) as client:
__attempt += 1
logger.debug(f"Request attempt {__attempt} of {max_retries} for {url!r}")
request = client.build_request(method=method, url=url)
try:
response = await client.send(request=request, stream=True)
except (httpx.ConnectError, httpx.RemoteProtocolError, httpx.HTTPError):
raise HibpDownloaderException(f"Unable to establish connection {url}")
logger.warning(f"Request [{__attempt} of {max_retries}] failed for {request.method!r} {url!r}")
if __attempt < max_retries:
return await httpx_binary_response(
url.replace("BR0KEN", "http"), etag, method, encoding, timeout, max_retries, __attempt, debug
)
raise HibpDownloaderException(f"Request failed after {__attempt} retries: {url!r}")

response.binary = b"".join([part async for part in response.aiter_raw()])

Expand Down
11 changes: 8 additions & 3 deletions src/hibp_downloader/models/worker_args.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dataclasses import asdict, dataclass, field
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Optional

Expand All @@ -11,8 +11,13 @@ class WorkerArgs:
data_path: Path
metadata_path: Path
encoding_type: str
ignore_etag: bool = field(default=False)
local_cache_ttl: int = field(default=(12 * 3600))

http_timeout: int
http_max_retries: int
http_debug: bool

ignore_etag: bool
local_cache_ttl: int
worker_index: Optional[int] = None

def as_dict(self):
Expand Down
8 changes: 4 additions & 4 deletions tests/test_02_exec/test_exec_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def test_exec_download():
"--data-path", f"/tmp/hibp-test/{prefix}",
"download",
"--first-hash", f"{prefix}0",
"--last-hash", f"{prefix}8"
"--last-hash", f"{prefix}f"
]
# fmt: on
stdout, stderr, rc = exec_command("hibp-downloader", args=args)
Expand All @@ -37,7 +37,7 @@ def test_exec_download_sha1():
"--data-path", f"/tmp/hibp-test/{prefix}",
"download",
"--first-hash", f"{prefix}0",
"--last-hash", f"{prefix}8",
"--last-hash", f"{prefix}f",
"--hash-type", hash_type,
]
# fmt: on
Expand All @@ -62,7 +62,7 @@ def test_exec_download_ntlm():
"--data-path", f"/tmp/hibp-test/{prefix}",
"download",
"--first-hash", f"{prefix}0",
"--last-hash", f"{prefix}8",
"--last-hash", f"{prefix}f",
"--hash-type", hash_type,
]
# fmt: on
Expand All @@ -87,7 +87,7 @@ def test_exec_download_w_metadata_path():
"--metadata-path", f"/tmp/hibp-test-metadata/{prefix}",
"download",
"--first-hash", f"{prefix}0",
"--last-hash", f"{prefix}8"
"--last-hash", f"{prefix}f"
]
# fmt: on
stdout, stderr, rc = exec_command("hibp-downloader", args=args)
Expand Down

0 comments on commit 9affbb2

Please sign in to comment.