Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ on:
type: string

permissions:
contents: write
contents: read
pull-requests: read

concurrency:
Expand Down Expand Up @@ -65,8 +65,10 @@ jobs:

- name: Validate release inputs
id: release
env:
RELEASE_TAG: ${{ github.event.inputs.tag || github.ref_name }}
run: |
release_tag="${{ github.event.inputs.tag || github.ref_name }}"
release_tag="${RELEASE_TAG}"
if [[ ! "${release_tag}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
echo "::error title=Invalid release tag::Use a vMAJOR.MINOR.PATCH tag, got '${release_tag}'."
exit 1
Expand Down Expand Up @@ -214,6 +216,8 @@ jobs:
name: Publish GitHub release assets
needs: [validate, wheel]
runs-on: ubuntu-24.04
permissions:
contents: write

steps:
- name: Download release assets
Expand All @@ -226,8 +230,9 @@ jobs:
env:
GH_TOKEN: ${{ github.token }}
GH_REPO: ${{ github.repository }}
RELEASE_TAG: ${{ github.event.inputs.tag || github.ref_name }}
run: |
release_tag="${{ github.event.inputs.tag || github.ref_name }}"
release_tag="${RELEASE_TAG}"
notes_path="$(find release-assets -name release-notes.md -print -quit)"
mapfile -t release_assets < <(find release-assets -type f ! -name release-notes.md | sort)

Expand Down
5 changes: 5 additions & 0 deletions src/src_py_lib/clients/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ def _normalize_github_url(github_url: str) -> str:
stripped = github_url.strip().rstrip("/")
if "://" not in stripped:
stripped = f"https://{stripped}"
split = urlsplit(stripped)
if split.scheme != "https":
raise ValueError(f"GitHub URL must be an https:// URL (got {split.scheme!r})")
if not split.hostname:
raise ValueError(f"could not parse hostname from GitHub URL {stripped!r}")
return stripped


Expand Down
4 changes: 2 additions & 2 deletions src/src_py_lib/clients/graphql.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pathlib import Path
from typing import cast

from src_py_lib.utils.http import HTTPClient, HTTPClientError, HTTPResponse
from src_py_lib.utils.http import HTTPClient, HTTPClientError, HTTPResponse, log_safe_url
from src_py_lib.utils.json_types import JSONDict, JSONValue, json_dict, json_list, json_str
from src_py_lib.utils.logging import event

Expand Down Expand Up @@ -249,7 +249,7 @@ def _execute_once(
page_number=page_number,
page_size=_int_variable(variables, first_variable),
cursor_present=variables.get(after_variable) is not None,
url=self.url,
url=log_safe_url(self.url),
variable_names=sorted(variables),
query_bytes=len(query.encode("utf-8")),
) as fields:
Expand Down
9 changes: 8 additions & 1 deletion src/src_py_lib/clients/sourcegraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,9 @@ class SourcegraphClient:
`endpoint` should be the instance base URL, for example
`https://sourcegraph.example.com`.

Plain HTTP endpoints are rejected unless `allow_insecure_http=True` is set
for local development.

Set `trace=True` to ask Sourcegraph to retain traces for each GraphQL
request. Traced requests are available through `drain_traces()` and can be
fetched from the instance's Jaeger/debug endpoint with
Expand All @@ -192,12 +195,16 @@ class SourcegraphClient:
token: str
http: HTTPClient = field(default_factory=HTTPClient)
trace: bool = False
allow_insecure_http: bool = False
_traces: queue.Queue[SourcegraphTrace] = field(
default_factory=lambda: queue.Queue[SourcegraphTrace](), init=False, repr=False
)

def __post_init__(self) -> None:
self.endpoint = normalize_sourcegraph_endpoint(self.endpoint)
self.endpoint = normalize_sourcegraph_endpoint(
self.endpoint,
require_https=not self.allow_insecure_http,
)

def graphql(self, query: str, variables: Mapping[str, JSONValue] | None = None) -> JSONDict:
return self._client().execute(query, variables)
Expand Down
52 changes: 46 additions & 6 deletions src/src_py_lib/utils/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,19 @@
"secret",
"token",
)
SENSITIVE_URL_QUERY_FRAGMENTS: Final[tuple[str, ...]] = (
"access_token",
"api-key",
"api_key",
"authorization",
"code",
"credential",
"key",
"password",
"secret",
"signature",
"token",
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -150,7 +163,7 @@ def response(
"http_request",
level="debug",
method=method,
url=_safe_url(request_url),
url=log_safe_url(request_url),
attempt=attempt,
request_headers=_headers_for_log(request_headers),
request_bytes=len(body or b""),
Expand Down Expand Up @@ -179,7 +192,7 @@ def response(
if not self._should_retry(response.status_code, attempt):
raise HTTPClientError(
f"HTTP {response.status_code} for {method} "
f"{_safe_url(request_url)}: {body_text}",
f"{log_safe_url(request_url)}: {body_text}",
status_code=response.status_code,
body=body_text,
headers=dict(response.headers),
Expand All @@ -203,7 +216,7 @@ def response(
"timed out" if isinstance(exception, httpx.TimeoutException) else "failed"
)
raise HTTPClientError(
f"HTTP request {failure} for {method} {_safe_url(request_url)}: "
f"HTTP request {failure} for {method} {log_safe_url(request_url)}: "
f"{_exception_message(exception)}"
) from exception
record_http_retry()
Expand Down Expand Up @@ -246,7 +259,7 @@ def json_response(
), response
except json.JSONDecodeError as exception:
raise HTTPClientError(
f"Invalid JSON response from {method} {_safe_url(url)}"
f"Invalid JSON response from {method} {log_safe_url(url)}"
) from exception

def _should_retry(self, status_code: int | None, attempt: int) -> bool:
Expand Down Expand Up @@ -276,9 +289,31 @@ def _with_query(
return f"{url}{separator}{urllib.parse.urlencode(filtered)}"


def _safe_url(url: str) -> str:
def log_safe_url(url: str) -> str:
"""Return a URL safe to include in logs and exception messages."""
split = urllib.parse.urlsplit(url)
return urllib.parse.urlunsplit((split.scheme, split.netloc, split.path, split.query, ""))
return urllib.parse.urlunsplit(
(split.scheme, _safe_netloc(split.netloc), split.path, _safe_query(split.query), "")
)


def _safe_netloc(netloc: str) -> str:
return netloc.rsplit("@", 1)[-1]


def _safe_query(query: str) -> str:
if not query:
return ""
parts: list[str] = []
for part in query.split("&"):
key, separator, _value = part.partition("=")
if _is_sensitive_query_parameter(urllib.parse.unquote_plus(key)):
parts.append(f"{key}={REDACTED_HEADER_VALUE}")
elif separator:
parts.append(part)
else:
parts.append(key)
return "&".join(parts)


def _headers_for_log(headers: Mapping[str, str] | httpx.Headers) -> dict[str, str | list[str]]:
Expand Down Expand Up @@ -311,6 +346,11 @@ def _is_sensitive_header(name: str) -> bool:
return any(fragment in lowered for fragment in SENSITIVE_HEADER_FRAGMENTS)


def _is_sensitive_query_parameter(name: str) -> bool:
lowered = name.lower()
return any(fragment in lowered for fragment in SENSITIVE_URL_QUERY_FRAGMENTS)


def _response_http_version(response: httpx.Response) -> str | None:
version = response.extensions.get("http_version")
if isinstance(version, bytes):
Expand Down
16 changes: 12 additions & 4 deletions src/src_py_lib/utils/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@
TRACE_ID_BYTES: Final[int] = 16
SPAN_ID_BYTES: Final[int] = 8
MEBIBYTE: Final[int] = 1024 * 1024
REDACTED_LOG_VALUE: Final[str] = "[redacted]"
SECRET_FIELD_FRAGMENTS: Final[tuple[str, ...]] = (
"api-key",
"api_key",
"authorization",
"cookie",
Expand Down Expand Up @@ -768,7 +770,7 @@ def sanitized_config_snapshot(config: object) -> dict[str, Any]:
if callable(value):
continue
key_text = str(key)
if any(fragment in key_text.lower() for fragment in SECRET_FIELD_FRAGMENTS):
if _is_sensitive_log_field(key_text):
snapshot[key_text] = _secret_state(value)
elif isinstance(value, Path):
snapshot[key_text] = str(value)
Expand Down Expand Up @@ -937,13 +939,14 @@ def _http_headers(raw_headers: object) -> dict[str, str | list[str]]:
if name is None or value is None:
continue
key = name.lower()
logged_value = REDACTED_LOG_VALUE if _is_sensitive_log_field(key) else value
existing = headers.get(key)
if existing is None:
headers[key] = value
headers[key] = logged_value
elif isinstance(existing, list):
existing.append(value)
existing.append(logged_value)
else:
headers[key] = [existing, value]
headers[key] = [existing, logged_value]
return {key: headers[key] for key in sorted(headers)}


Expand Down Expand Up @@ -971,6 +974,11 @@ def _is_hex_identifier(value: str, length: int) -> bool:
)


def _is_sensitive_log_field(name: str) -> bool:
lowered = name.lower()
return any(fragment in lowered for fragment in SECRET_FIELD_FRAGMENTS)


def _secret_state(value: object) -> str:
if value is None or value == "":
return "missing"
Expand Down
45 changes: 40 additions & 5 deletions tests/test_logging_http_clients.py
Original file line number Diff line number Diff line change
Expand Up @@ -1209,7 +1209,8 @@ def test_httpcore_response_headers_are_structured(self) -> None:
"receive_response_headers.complete "
"return_value=(b'HTTP/1.1', 200, b'OK', "
"[(b'Zed', b'last'), (b'Content-Type', b'application/json'), "
"(b'Alpha', b'first')])"
"(b'Set-Cookie', b'session=secret'), "
"(b'X-Api-Key', b'secret'), (b'Alpha', b'first')])"
)
finally:
logger = logging.getLogger(logger_name)
Expand All @@ -1226,12 +1227,17 @@ def test_httpcore_response_headers_are_structured(self) -> None:
self.assertEqual(response_headers["http_version"], "HTTP/1.1")
self.assertEqual(response_headers["status_code"], 200)
self.assertEqual(response_headers["reason_phrase"], "OK")
self.assertEqual(list(response_headers["headers"]), ["alpha", "content-type", "zed"])
self.assertEqual(
list(response_headers["headers"]),
["alpha", "content-type", "set-cookie", "x-api-key", "zed"],
)
self.assertEqual(
response_headers["headers"],
{
"alpha": "first",
"content-type": "application/json",
"set-cookie": "[redacted]",
"x-api-key": "[redacted]",
"zed": "last",
},
)
Expand Down Expand Up @@ -1311,8 +1317,9 @@ def handler(_request: httpx.Request) -> httpx.Response:
client = HTTPClient(max_attempts=1, transport=httpx.MockTransport(handler))
payload = client.json(
"POST",
"https://example.com/api",
"https://user:pass@example.com/api?code=oauth",
headers={"Authorization": "Bearer token"},
query={"limit": 10, "access_token": "secret", "signature": "signed"},
json_body={"hello": "world"},
)
finally:
Expand All @@ -1332,6 +1339,11 @@ def handler(_request: httpx.Request) -> httpx.Response:
self.assertFalse(any(row.get("logger") in {"httpx", "httpcore"} for row in rows))
self.assertEqual(http_request["status_code"], 200)
self.assertEqual(http_request["reason_phrase"], "OK")
self.assertEqual(
http_request["url"],
"https://example.com/api?code=[redacted]&limit=10"
"&access_token=[redacted]&signature=[redacted]",
)
self.assertEqual(http_request["request_bytes"], len(b'{"hello": "world"}'))
self.assertEqual(http_request["request_headers"]["authorization"], "[redacted]")
self.assertEqual(
Expand Down Expand Up @@ -1364,10 +1376,13 @@ def handler(_request: httpx.Request) -> httpx.Response:
)

with self.assertRaisesRegex(HTTPClientError, "rate limited") as raised:
client.json("GET", "https://example.com/api")
client.json("GET", "https://user:pass@example.com/api?access_token=secret")

self.assertEqual(raised.exception.status_code, 429)
self.assertEqual(raised.exception.body, "rate limited")
self.assertIn("https://example.com/api?access_token=[redacted]", str(raised.exception))
self.assertNotIn("user:pass", str(raised.exception))
self.assertNotIn("secret", str(raised.exception))


class ClientTest(unittest.TestCase):
Expand Down Expand Up @@ -1409,6 +1424,13 @@ def test_sourcegraph_client_builds_graphql_request(self) -> None:
self.assertEqual(http.calls[0]["url"], "https://sourcegraph.example.com/.api/graphql")
self.assertEqual(http.calls[0]["headers"], {"Authorization": "token token"})

def test_sourcegraph_client_rejects_http_endpoint_by_default(self) -> None:
with self.assertRaisesRegex(ValueError, "https:// URL"):
SourcegraphClient("http://sourcegraph.example.com", "token")

client = SourcegraphClient("http://localhost:3080", "token", allow_insecure_http=True)
self.assertEqual(client.endpoint, "http://localhost:3080")

def test_sourcegraph_client_streams_connection_nodes(self) -> None:
http = RecordingHTTP(
[
Expand Down Expand Up @@ -1780,7 +1802,12 @@ def test_graphql_client_emits_query_debug_events(self) -> None:
},
]
)
client = GraphQLClient("https://example.com/graphql", {}, "Example", http=http)
client = GraphQLClient(
"https://user:pass@example.com/graphql?access_token=secret&query=ok",
{},
"Example",
http=http,
)
query = """
query Items($first: Int!, $after: String, $userId: ID!) {
viewer { items { nodes { id } pageInfo { hasNextPage endCursor } } }
Expand Down Expand Up @@ -1822,6 +1849,10 @@ def test_graphql_client_emits_query_debug_events(self) -> None:
self.assertEqual([row["page_size"] for row in starts], [2, 2])
self.assertEqual([row["cursor_present"] for row in starts], [False, True])
self.assertEqual(starts[0]["graphql_client"], "Example")
self.assertEqual(
starts[0]["url"],
"https://example.com/graphql?access_token=[redacted]&query=ok",
)
self.assertEqual(starts[0]["variable_names"], ["after", "first", "userId"])
self.assertEqual(ends[0]["response_fields"], ["viewer"])

Expand Down Expand Up @@ -1946,6 +1977,10 @@ def test_github_client_can_target_github_enterprise(self) -> None:
graphql_api_url("github.example.com"), "https://github.example.com/api/graphql"
)

def test_github_client_rejects_http_enterprise_url(self) -> None:
with self.assertRaisesRegex(ValueError, "https:// URL"):
graphql_api_url("http://github.example.com")

def test_github_client_validate_queries_viewer(self) -> None:
http = RecordingHTTP([{"data": {"viewer": {"login": "alice"}}}])
client = GitHubClient("token", http=http)
Expand Down