diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 51bd774..a18bac3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,7 +12,7 @@ on: type: string permissions: - contents: write + contents: read pull-requests: read concurrency: @@ -65,8 +65,10 @@ jobs: - name: Validate release inputs id: release + env: + RELEASE_TAG: ${{ github.event.inputs.tag || github.ref_name }} run: | - release_tag="${{ github.event.inputs.tag || github.ref_name }}" + release_tag="${RELEASE_TAG}" if [[ ! "${release_tag}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then echo "::error title=Invalid release tag::Use a vMAJOR.MINOR.PATCH tag, got '${release_tag}'." exit 1 @@ -214,6 +216,8 @@ jobs: name: Publish GitHub release assets needs: [validate, wheel] runs-on: ubuntu-24.04 + permissions: + contents: write steps: - name: Download release assets @@ -226,8 +230,9 @@ jobs: env: GH_TOKEN: ${{ github.token }} GH_REPO: ${{ github.repository }} + RELEASE_TAG: ${{ github.event.inputs.tag || github.ref_name }} run: | - release_tag="${{ github.event.inputs.tag || github.ref_name }}" + release_tag="${RELEASE_TAG}" notes_path="$(find release-assets -name release-notes.md -print -quit)" mapfile -t release_assets < <(find release-assets -type f ! -name release-notes.md | sort) diff --git a/src/src_py_lib/clients/github.py b/src/src_py_lib/clients/github.py index ebff03e..c010628 100644 --- a/src/src_py_lib/clients/github.py +++ b/src/src_py_lib/clients/github.py @@ -113,6 +113,11 @@ def _normalize_github_url(github_url: str) -> str: stripped = github_url.strip().rstrip("/") if "://" not in stripped: stripped = f"https://{stripped}" + split = urlsplit(stripped) + if split.scheme != "https": + raise ValueError(f"GitHub URL must be an https:// URL (got {split.scheme!r})") + if not split.hostname: + raise ValueError(f"could not parse hostname from GitHub URL {stripped!r}") return stripped diff --git a/src/src_py_lib/clients/graphql.py b/src/src_py_lib/clients/graphql.py index 4dbf143..2478bb6 100644 --- a/src/src_py_lib/clients/graphql.py +++ b/src/src_py_lib/clients/graphql.py @@ -9,7 +9,7 @@ from pathlib import Path from typing import cast -from src_py_lib.utils.http import HTTPClient, HTTPClientError, HTTPResponse +from src_py_lib.utils.http import HTTPClient, HTTPClientError, HTTPResponse, log_safe_url from src_py_lib.utils.json_types import JSONDict, JSONValue, json_dict, json_list, json_str from src_py_lib.utils.logging import event @@ -249,7 +249,7 @@ def _execute_once( page_number=page_number, page_size=_int_variable(variables, first_variable), cursor_present=variables.get(after_variable) is not None, - url=self.url, + url=log_safe_url(self.url), variable_names=sorted(variables), query_bytes=len(query.encode("utf-8")), ) as fields: diff --git a/src/src_py_lib/clients/sourcegraph.py b/src/src_py_lib/clients/sourcegraph.py index 69200b6..283153b 100644 --- a/src/src_py_lib/clients/sourcegraph.py +++ b/src/src_py_lib/clients/sourcegraph.py @@ -182,6 +182,9 @@ class SourcegraphClient: `endpoint` should be the instance base URL, for example `https://sourcegraph.example.com`. + Plain HTTP endpoints are rejected unless `allow_insecure_http=True` is set + for local development. + Set `trace=True` to ask Sourcegraph to retain traces for each GraphQL request. Traced requests are available through `drain_traces()` and can be fetched from the instance's Jaeger/debug endpoint with @@ -192,12 +195,16 @@ class SourcegraphClient: token: str http: HTTPClient = field(default_factory=HTTPClient) trace: bool = False + allow_insecure_http: bool = False _traces: queue.Queue[SourcegraphTrace] = field( default_factory=lambda: queue.Queue[SourcegraphTrace](), init=False, repr=False ) def __post_init__(self) -> None: - self.endpoint = normalize_sourcegraph_endpoint(self.endpoint) + self.endpoint = normalize_sourcegraph_endpoint( + self.endpoint, + require_https=not self.allow_insecure_http, + ) def graphql(self, query: str, variables: Mapping[str, JSONValue] | None = None) -> JSONDict: return self._client().execute(query, variables) diff --git a/src/src_py_lib/utils/http.py b/src/src_py_lib/utils/http.py index ba65348..bf0505f 100644 --- a/src/src_py_lib/utils/http.py +++ b/src/src_py_lib/utils/http.py @@ -33,6 +33,19 @@ "secret", "token", ) +SENSITIVE_URL_QUERY_FRAGMENTS: Final[tuple[str, ...]] = ( + "access_token", + "api-key", + "api_key", + "authorization", + "code", + "credential", + "key", + "password", + "secret", + "signature", + "token", +) logger = logging.getLogger(__name__) @@ -150,7 +163,7 @@ def response( "http_request", level="debug", method=method, - url=_safe_url(request_url), + url=log_safe_url(request_url), attempt=attempt, request_headers=_headers_for_log(request_headers), request_bytes=len(body or b""), @@ -179,7 +192,7 @@ def response( if not self._should_retry(response.status_code, attempt): raise HTTPClientError( f"HTTP {response.status_code} for {method} " - f"{_safe_url(request_url)}: {body_text}", + f"{log_safe_url(request_url)}: {body_text}", status_code=response.status_code, body=body_text, headers=dict(response.headers), @@ -203,7 +216,7 @@ def response( "timed out" if isinstance(exception, httpx.TimeoutException) else "failed" ) raise HTTPClientError( - f"HTTP request {failure} for {method} {_safe_url(request_url)}: " + f"HTTP request {failure} for {method} {log_safe_url(request_url)}: " f"{_exception_message(exception)}" ) from exception record_http_retry() @@ -246,7 +259,7 @@ def json_response( ), response except json.JSONDecodeError as exception: raise HTTPClientError( - f"Invalid JSON response from {method} {_safe_url(url)}" + f"Invalid JSON response from {method} {log_safe_url(url)}" ) from exception def _should_retry(self, status_code: int | None, attempt: int) -> bool: @@ -276,9 +289,31 @@ def _with_query( return f"{url}{separator}{urllib.parse.urlencode(filtered)}" -def _safe_url(url: str) -> str: +def log_safe_url(url: str) -> str: + """Return a URL safe to include in logs and exception messages.""" split = urllib.parse.urlsplit(url) - return urllib.parse.urlunsplit((split.scheme, split.netloc, split.path, split.query, "")) + return urllib.parse.urlunsplit( + (split.scheme, _safe_netloc(split.netloc), split.path, _safe_query(split.query), "") + ) + + +def _safe_netloc(netloc: str) -> str: + return netloc.rsplit("@", 1)[-1] + + +def _safe_query(query: str) -> str: + if not query: + return "" + parts: list[str] = [] + for part in query.split("&"): + key, separator, _value = part.partition("=") + if _is_sensitive_query_parameter(urllib.parse.unquote_plus(key)): + parts.append(f"{key}={REDACTED_HEADER_VALUE}") + elif separator: + parts.append(part) + else: + parts.append(key) + return "&".join(parts) def _headers_for_log(headers: Mapping[str, str] | httpx.Headers) -> dict[str, str | list[str]]: @@ -311,6 +346,11 @@ def _is_sensitive_header(name: str) -> bool: return any(fragment in lowered for fragment in SENSITIVE_HEADER_FRAGMENTS) +def _is_sensitive_query_parameter(name: str) -> bool: + lowered = name.lower() + return any(fragment in lowered for fragment in SENSITIVE_URL_QUERY_FRAGMENTS) + + def _response_http_version(response: httpx.Response) -> str | None: version = response.extensions.get("http_version") if isinstance(version, bytes): diff --git a/src/src_py_lib/utils/logging.py b/src/src_py_lib/utils/logging.py index 7522fe0..3508062 100644 --- a/src/src_py_lib/utils/logging.py +++ b/src/src_py_lib/utils/logging.py @@ -43,7 +43,9 @@ TRACE_ID_BYTES: Final[int] = 16 SPAN_ID_BYTES: Final[int] = 8 MEBIBYTE: Final[int] = 1024 * 1024 +REDACTED_LOG_VALUE: Final[str] = "[redacted]" SECRET_FIELD_FRAGMENTS: Final[tuple[str, ...]] = ( + "api-key", "api_key", "authorization", "cookie", @@ -768,7 +770,7 @@ def sanitized_config_snapshot(config: object) -> dict[str, Any]: if callable(value): continue key_text = str(key) - if any(fragment in key_text.lower() for fragment in SECRET_FIELD_FRAGMENTS): + if _is_sensitive_log_field(key_text): snapshot[key_text] = _secret_state(value) elif isinstance(value, Path): snapshot[key_text] = str(value) @@ -937,13 +939,14 @@ def _http_headers(raw_headers: object) -> dict[str, str | list[str]]: if name is None or value is None: continue key = name.lower() + logged_value = REDACTED_LOG_VALUE if _is_sensitive_log_field(key) else value existing = headers.get(key) if existing is None: - headers[key] = value + headers[key] = logged_value elif isinstance(existing, list): - existing.append(value) + existing.append(logged_value) else: - headers[key] = [existing, value] + headers[key] = [existing, logged_value] return {key: headers[key] for key in sorted(headers)} @@ -971,6 +974,11 @@ def _is_hex_identifier(value: str, length: int) -> bool: ) +def _is_sensitive_log_field(name: str) -> bool: + lowered = name.lower() + return any(fragment in lowered for fragment in SECRET_FIELD_FRAGMENTS) + + def _secret_state(value: object) -> str: if value is None or value == "": return "missing" diff --git a/tests/test_logging_http_clients.py b/tests/test_logging_http_clients.py index cdfb6cc..14b0da0 100644 --- a/tests/test_logging_http_clients.py +++ b/tests/test_logging_http_clients.py @@ -1209,7 +1209,8 @@ def test_httpcore_response_headers_are_structured(self) -> None: "receive_response_headers.complete " "return_value=(b'HTTP/1.1', 200, b'OK', " "[(b'Zed', b'last'), (b'Content-Type', b'application/json'), " - "(b'Alpha', b'first')])" + "(b'Set-Cookie', b'session=secret'), " + "(b'X-Api-Key', b'secret'), (b'Alpha', b'first')])" ) finally: logger = logging.getLogger(logger_name) @@ -1226,12 +1227,17 @@ def test_httpcore_response_headers_are_structured(self) -> None: self.assertEqual(response_headers["http_version"], "HTTP/1.1") self.assertEqual(response_headers["status_code"], 200) self.assertEqual(response_headers["reason_phrase"], "OK") - self.assertEqual(list(response_headers["headers"]), ["alpha", "content-type", "zed"]) + self.assertEqual( + list(response_headers["headers"]), + ["alpha", "content-type", "set-cookie", "x-api-key", "zed"], + ) self.assertEqual( response_headers["headers"], { "alpha": "first", "content-type": "application/json", + "set-cookie": "[redacted]", + "x-api-key": "[redacted]", "zed": "last", }, ) @@ -1311,8 +1317,9 @@ def handler(_request: httpx.Request) -> httpx.Response: client = HTTPClient(max_attempts=1, transport=httpx.MockTransport(handler)) payload = client.json( "POST", - "https://example.com/api", + "https://user:pass@example.com/api?code=oauth", headers={"Authorization": "Bearer token"}, + query={"limit": 10, "access_token": "secret", "signature": "signed"}, json_body={"hello": "world"}, ) finally: @@ -1332,6 +1339,11 @@ def handler(_request: httpx.Request) -> httpx.Response: self.assertFalse(any(row.get("logger") in {"httpx", "httpcore"} for row in rows)) self.assertEqual(http_request["status_code"], 200) self.assertEqual(http_request["reason_phrase"], "OK") + self.assertEqual( + http_request["url"], + "https://example.com/api?code=[redacted]&limit=10" + "&access_token=[redacted]&signature=[redacted]", + ) self.assertEqual(http_request["request_bytes"], len(b'{"hello": "world"}')) self.assertEqual(http_request["request_headers"]["authorization"], "[redacted]") self.assertEqual( @@ -1364,10 +1376,13 @@ def handler(_request: httpx.Request) -> httpx.Response: ) with self.assertRaisesRegex(HTTPClientError, "rate limited") as raised: - client.json("GET", "https://example.com/api") + client.json("GET", "https://user:pass@example.com/api?access_token=secret") self.assertEqual(raised.exception.status_code, 429) self.assertEqual(raised.exception.body, "rate limited") + self.assertIn("https://example.com/api?access_token=[redacted]", str(raised.exception)) + self.assertNotIn("user:pass", str(raised.exception)) + self.assertNotIn("secret", str(raised.exception)) class ClientTest(unittest.TestCase): @@ -1409,6 +1424,13 @@ def test_sourcegraph_client_builds_graphql_request(self) -> None: self.assertEqual(http.calls[0]["url"], "https://sourcegraph.example.com/.api/graphql") self.assertEqual(http.calls[0]["headers"], {"Authorization": "token token"}) + def test_sourcegraph_client_rejects_http_endpoint_by_default(self) -> None: + with self.assertRaisesRegex(ValueError, "https:// URL"): + SourcegraphClient("http://sourcegraph.example.com", "token") + + client = SourcegraphClient("http://localhost:3080", "token", allow_insecure_http=True) + self.assertEqual(client.endpoint, "http://localhost:3080") + def test_sourcegraph_client_streams_connection_nodes(self) -> None: http = RecordingHTTP( [ @@ -1780,7 +1802,12 @@ def test_graphql_client_emits_query_debug_events(self) -> None: }, ] ) - client = GraphQLClient("https://example.com/graphql", {}, "Example", http=http) + client = GraphQLClient( + "https://user:pass@example.com/graphql?access_token=secret&query=ok", + {}, + "Example", + http=http, + ) query = """ query Items($first: Int!, $after: String, $userId: ID!) { viewer { items { nodes { id } pageInfo { hasNextPage endCursor } } } @@ -1822,6 +1849,10 @@ def test_graphql_client_emits_query_debug_events(self) -> None: self.assertEqual([row["page_size"] for row in starts], [2, 2]) self.assertEqual([row["cursor_present"] for row in starts], [False, True]) self.assertEqual(starts[0]["graphql_client"], "Example") + self.assertEqual( + starts[0]["url"], + "https://example.com/graphql?access_token=[redacted]&query=ok", + ) self.assertEqual(starts[0]["variable_names"], ["after", "first", "userId"]) self.assertEqual(ends[0]["response_fields"], ["viewer"]) @@ -1946,6 +1977,10 @@ def test_github_client_can_target_github_enterprise(self) -> None: graphql_api_url("github.example.com"), "https://github.example.com/api/graphql" ) + def test_github_client_rejects_http_enterprise_url(self) -> None: + with self.assertRaisesRegex(ValueError, "https:// URL"): + graphql_api_url("http://github.example.com") + def test_github_client_validate_queries_viewer(self) -> None: http = RecordingHTTP([{"data": {"viewer": {"login": "alice"}}}]) client = GitHubClient("token", http=http)