Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- `CursorClientAdapter` for MCP server management in `.cursor/mcp.json`
- `OpenCodeClientAdapter` for MCP server management in `opencode.json`

### Fixed

- GitHub API rate-limit 403 responses no longer misdiagnosed as authentication failures — unauthenticated users now see actionable "rate limit exceeded" guidance instead of misleading "private repository" errors
- Virtual file downloads from public github.com repos no longer require authentication — uses `raw.githubusercontent.com` CDN (no rate limit) before falling back to the Contents API
Comment on lines +20 to +23

## [0.7.9] - 2026-03-13

### Added
Expand Down
99 changes: 94 additions & 5 deletions src/apm_cli/deps/github_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
build_ado_https_clone_url,
build_ado_ssh_url,
build_ado_api_url,
build_raw_content_url,
sanitize_token_url_in_message,
default_host,
is_azure_devops_hostname,
Expand Down Expand Up @@ -227,19 +228,37 @@ def _resilient_get(self, url: str, headers: Dict[str, str], timeout: int = 30, m
requests.exceptions.RequestException: After all retries exhausted
"""
last_exc = None
last_response = None
for attempt in range(max_retries):
try:
response = requests.get(url, headers=headers, timeout=timeout)

# Handle rate limiting
if response.status_code in (429, 503):
# Handle rate limiting — GitHub returns 429 for secondary limits
# and 403 with X-RateLimit-Remaining: 0 for primary limits.
is_rate_limited = response.status_code in (429, 503)
if not is_rate_limited and response.status_code == 403:
try:
remaining = response.headers.get("X-RateLimit-Remaining")
if remaining is not None and int(remaining) == 0:
is_rate_limited = True
except (TypeError, ValueError):
pass

if is_rate_limited:
last_response = response
retry_after = response.headers.get("Retry-After")
reset_at = response.headers.get("X-RateLimit-Reset")
if retry_after:
try:
wait = min(float(retry_after), 60)
except (TypeError, ValueError):
# Retry-After may be an HTTP-date; fall back to exponential backoff
wait = min(2 ** attempt, 30) * (0.5 + random.random())
elif reset_at:
try:
wait = max(0, min(int(reset_at) - time.time(), 60))
except (TypeError, ValueError):
wait = min(2 ** attempt, 30) * (0.5 + random.random())
else:
wait = min(2 ** attempt, 30) * (0.5 + random.random())
_debug(f"Rate limited ({response.status_code}), retry in {wait:.1f}s (attempt {attempt + 1}/{max_retries})")
Expand All @@ -266,6 +285,12 @@ def _resilient_get(self, url: str, headers: Dict[str, str], timeout: int = 30, m
if attempt < max_retries - 1:
_debug(f"Timeout, retrying (attempt {attempt + 1}/{max_retries})")

# If rate limiting exhausted all retries, return the last response so
# callers can inspect headers (e.g. X-RateLimit-Remaining) and raise
# an appropriate user-facing error.
if last_response is not None:
return last_response

if last_exc:
raise last_exc
raise requests.exceptions.RequestException(f"All {max_retries} attempts failed for {url}")
Expand Down Expand Up @@ -672,9 +697,30 @@ def _download_ado_file(self, dep_ref: DependencyReference, file_path: str, ref:
except requests.exceptions.RequestException as e:
raise RuntimeError(f"Network error downloading {file_path}: {e}")

def _try_raw_download(self, owner: str, repo: str, ref: str, file_path: str) -> Optional[bytes]:
"""Attempt to fetch a file via raw.githubusercontent.com (CDN).

Returns the raw bytes on success, or ``None`` if the file was not found
(HTTP 404) or the request failed for any reason. This is intentionally
best-effort: callers fall back to the Contents API when ``None`` is
returned.
"""
raw_url = build_raw_content_url(owner, repo, ref, file_path)
try:
response = requests.get(raw_url, timeout=30)
if response.status_code == 200:
return response.content
except requests.exceptions.RequestException:
pass
return None

def _download_github_file(self, dep_ref: DependencyReference, file_path: str, ref: str = "main") -> bytes:
"""Download a file from GitHub repository.

For github.com without a token, tries raw.githubusercontent.com first
(CDN, no rate limit) before falling back to the Contents API. Authenticated
requests and non-github.com hosts always use the Contents API directly.

Args:
dep_ref: Parsed dependency reference
file_path: Path to file within the repository
Expand All @@ -688,15 +734,31 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re
# Parse owner/repo from repo_url
owner, repo = dep_ref.repo_url.split('/', 1)

# --- CDN fast-path for github.com without a token ---
# raw.githubusercontent.com is served from GitHub's CDN and is not
# subject to the REST API rate limit (60 req/h unauthenticated).
# Only available for github.com — GHES/GHE-DR have no equivalent.
if host.lower() == "github.com" and not self.github_token:
content = self._try_raw_download(owner, repo, ref, file_path)
if content is not None:
return content
# raw download returned 404 — could be wrong default branch.
# Try the other default branch before falling through to the API.
if ref in ("main", "master"):
fallback_ref = "master" if ref == "main" else "main"
content = self._try_raw_download(owner, repo, fallback_ref, file_path)
if content is not None:
return content
# All raw attempts failed — fall through to API path which
# handles private repos, rate-limit messaging, and SAML errors.

# --- Contents API path (authenticated, enterprise, or raw fallback) ---
# Build GitHub API URL - format differs by host type
if host == "github.com":
# GitHub.com: https://api.github.com/repos/owner/repo/contents/path
api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}"
elif host.lower().endswith(".ghe.com"):
# GitHub Enterprise Cloud Data Residency: https://api.{subdomain}.ghe.com/repos/owner/repo/contents/path
api_url = f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}"
else:
# GitHub Enterprise Server: https://{host}/api/v3/repos/owner/repo/contents/path
api_url = f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}"

# Set up authentication headers
Expand Down Expand Up @@ -739,6 +801,33 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re
f"(tried refs: {ref}, {fallback_ref})"
)
elif e.response.status_code == 401 or e.response.status_code == 403:
# Distinguish rate limiting from auth failure.
# GitHub returns 403 with X-RateLimit-Remaining: 0 when the
# primary rate limit is exhausted — even for public repos.
# _resilient_get already retries these, so if we still land
# here the retries were exhausted; surface the real cause.
is_rate_limit = False
try:
rl_remaining = e.response.headers.get("X-RateLimit-Remaining")
if rl_remaining is not None and int(rl_remaining) == 0:
is_rate_limit = True
except (TypeError, ValueError):
pass

if is_rate_limit:
error_msg = f"GitHub API rate limit exceeded for {dep_ref.repo_url}. "
if not self.github_token:
error_msg += (
"Unauthenticated requests are limited to 60/hour (shared per IP). "
"Set GITHUB_APM_PAT or GITHUB_TOKEN to increase the limit to 5,000/hour."
)
else:
error_msg += (
"Authenticated rate limit exhausted. "
"Wait a few minutes or check your token's rate-limit quota."
)
raise RuntimeError(error_msg)

# Token may lack SSO/SAML authorization for this org.
# Retry without auth -- the repo might be public.
# Applies to github.com and GHES (custom domains can have public repos).
Expand Down
21 changes: 21 additions & 0 deletions src/apm_cli/utils/github_host.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,27 @@ def unsupported_host_error(hostname: str, context: Optional[str] = None) -> str:
return msg


def build_raw_content_url(owner: str, repo: str, ref: str, file_path: str) -> str:
"""Build a raw.githubusercontent.com URL for fetching file content.

This CDN endpoint is not subject to the GitHub REST API rate limit and
does not require authentication for public repositories.

Only valid for github.com — GitHub Enterprise Server and GHE Cloud Data
Residency hosts do not have a ``raw.githubusercontent.com`` equivalent.

Args:
owner: Repository owner (user or organisation)
repo: Repository name
ref: Git reference (branch, tag, or commit SHA)
file_path: Path to file within the repository

Returns:
str: ``https://raw.githubusercontent.com/{owner}/{repo}/{ref}/{file_path}``
"""
return f"https://raw.githubusercontent.com/{owner}/{repo}/{ref}/{file_path}"


def build_ssh_url(host: str, repo_ref: str) -> str:
"""Build an SSH clone URL for the given host and repo_ref (owner/repo)."""
return f"git@{host}:{repo_ref}.git"
Expand Down
Loading
Loading