From 0e18233049527eb07782f9822cfa0beab37d9060 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 09:00:41 +0000 Subject: [PATCH 1/4] Initial plan From 56fd84c1a9772dedb239eac4b061f442c02995bf Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 09:25:06 +0000 Subject: [PATCH 2/4] fix: detect GitHub API rate-limit 403 instead of misdiagnosing as auth failure GitHub returns 403 with X-RateLimit-Remaining: 0 when the primary rate limit is exhausted. Previously this was treated as an authentication failure, showing a misleading "private repository" error. Now: - _resilient_get retries 403 responses that carry rate-limit exhaustion headers (X-RateLimit-Remaining: 0), using X-RateLimit-Reset for wait - _download_github_file detects rate limiting before falling through to the auth-failure error path, surfacing actionable guidance - New unit tests cover all rate-limit detection paths Co-authored-by: danielmeppiel <51440732+danielmeppiel@users.noreply.github.com> --- CHANGELOG.md | 4 + src/apm_cli/deps/github_downloader.py | 55 +++++++++++- tests/test_github_downloader.py | 123 ++++++++++++++++++++++++++ 3 files changed, 180 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 607dae1c6..e39ec9d7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `CursorClientAdapter` for MCP server management in `.cursor/mcp.json` - `OpenCodeClientAdapter` for MCP server management in `opencode.json` +### Fixed + +- GitHub API rate-limit 403 responses no longer misdiagnosed as authentication failures — unauthenticated users now see actionable "rate limit exceeded" guidance instead of misleading "private repository" errors + ## [0.7.9] - 2026-03-13 ### Added diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index 3ae6cc343..e73e6f35b 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -227,19 +227,37 @@ def _resilient_get(self, url: str, headers: Dict[str, str], timeout: int = 30, m requests.exceptions.RequestException: After all retries exhausted """ last_exc = None + last_response = None for attempt in range(max_retries): try: response = requests.get(url, headers=headers, timeout=timeout) - # Handle rate limiting - if response.status_code in (429, 503): + # Handle rate limiting — GitHub returns 429 for secondary limits + # and 403 with X-RateLimit-Remaining: 0 for primary limits. + is_rate_limited = response.status_code in (429, 503) + if not is_rate_limited and response.status_code == 403: + try: + remaining = response.headers.get("X-RateLimit-Remaining") + if remaining is not None and int(remaining) == 0: + is_rate_limited = True + except (TypeError, ValueError): + pass + + if is_rate_limited: + last_response = response retry_after = response.headers.get("Retry-After") + reset_at = response.headers.get("X-RateLimit-Reset") if retry_after: try: wait = min(float(retry_after), 60) except (TypeError, ValueError): # Retry-After may be an HTTP-date; fall back to exponential backoff wait = min(2 ** attempt, 30) * (0.5 + random.random()) + elif reset_at: + try: + wait = max(0, min(int(reset_at) - time.time(), 60)) + except (TypeError, ValueError): + wait = min(2 ** attempt, 30) * (0.5 + random.random()) else: wait = min(2 ** attempt, 30) * (0.5 + random.random()) _debug(f"Rate limited ({response.status_code}), retry in {wait:.1f}s (attempt {attempt + 1}/{max_retries})") @@ -266,6 +284,12 @@ def _resilient_get(self, url: str, headers: Dict[str, str], timeout: int = 30, m if attempt < max_retries - 1: _debug(f"Timeout, retrying (attempt {attempt + 1}/{max_retries})") + # If rate limiting exhausted all retries, return the last response so + # callers can inspect headers (e.g. X-RateLimit-Remaining) and raise + # an appropriate user-facing error. + if last_response is not None: + return last_response + if last_exc: raise last_exc raise requests.exceptions.RequestException(f"All {max_retries} attempts failed for {url}") @@ -739,6 +763,33 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re f"(tried refs: {ref}, {fallback_ref})" ) elif e.response.status_code == 401 or e.response.status_code == 403: + # Distinguish rate limiting from auth failure. + # GitHub returns 403 with X-RateLimit-Remaining: 0 when the + # primary rate limit is exhausted — even for public repos. + # _resilient_get already retries these, so if we still land + # here the retries were exhausted; surface the real cause. + is_rate_limit = False + try: + rl_remaining = e.response.headers.get("X-RateLimit-Remaining") + if rl_remaining is not None and int(rl_remaining) == 0: + is_rate_limit = True + except (TypeError, ValueError): + pass + + if is_rate_limit: + error_msg = f"GitHub API rate limit exceeded for {dep_ref.repo_url}. " + if not self.github_token: + error_msg += ( + "Unauthenticated requests are limited to 60/hour (shared per IP). " + "Set GITHUB_APM_PAT or GITHUB_TOKEN to increase the limit to 5,000/hour." + ) + else: + error_msg += ( + "Authenticated rate limit exhausted. " + "Wait a few minutes or check your token's rate-limit quota." + ) + raise RuntimeError(error_msg) + # Token may lack SSO/SAML authorization for this org. # Retry without auth -- the repo might be public. # Applies to github.com and GHES (custom domains can have public repos). diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index 2a54fa958..4e384fa84 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -509,6 +509,129 @@ def test_repository_not_found_handling(self): # Would require mocking 404 errors pass + def test_download_github_file_403_rate_limit_no_token(self): + """Test that 403 with X-RateLimit-Remaining: 0 and no token gives a rate-limit error.""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + dep_ref = DependencyReference.parse('github/awesome-copilot/agents/api-architect.agent.md') + + mock_response_403 = Mock() + mock_response_403.status_code = 403 + mock_response_403.headers = {'X-RateLimit-Remaining': '0', 'X-RateLimit-Reset': '0'} + mock_response_403.raise_for_status = Mock( + side_effect=requests_lib.exceptions.HTTPError(response=mock_response_403) + ) + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get, \ + patch('apm_cli.deps.github_downloader.time.sleep'): + # _resilient_get retries 3 times on rate-limit 403, all return same + mock_get.return_value = mock_response_403 + + with pytest.raises(RuntimeError, match="rate limit exceeded") as exc_info: + downloader.download_raw_file(dep_ref, 'agents/api-architect.agent.md', 'main') + + # Must NOT mention "private repository" — that's the old misleading message + assert "private repository" not in str(exc_info.value).lower() + assert "60/hour" in str(exc_info.value) + + def test_download_github_file_403_rate_limit_with_token(self): + """Test that 403 with X-RateLimit-Remaining: 0 and a token gives a rate-limit error.""" + with patch.dict(os.environ, {'GITHUB_APM_PAT': 'my-token'}, clear=True): + downloader = GitHubPackageDownloader() + dep_ref = DependencyReference.parse('github/awesome-copilot/agents/api-architect.agent.md') + + mock_response_403 = Mock() + mock_response_403.status_code = 403 + mock_response_403.headers = {'X-RateLimit-Remaining': '0', 'X-RateLimit-Reset': '0'} + mock_response_403.raise_for_status = Mock( + side_effect=requests_lib.exceptions.HTTPError(response=mock_response_403) + ) + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get, \ + patch('apm_cli.deps.github_downloader.time.sleep'): + mock_get.return_value = mock_response_403 + + with pytest.raises(RuntimeError, match="rate limit exceeded") as exc_info: + downloader.download_raw_file(dep_ref, 'agents/api-architect.agent.md', 'main') + + assert "Authenticated rate limit exhausted" in str(exc_info.value) + assert "SSO/SAML" not in str(exc_info.value) + + def test_download_github_file_403_non_rate_limit_still_auth_error(self): + """Test that 403 WITHOUT rate-limit headers still produces the auth error.""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + dep_ref = DependencyReference.parse('owner/private-repo/sub/file.agent.md') + + mock_response_403 = Mock() + mock_response_403.status_code = 403 + # No rate-limit headers — this is a genuine auth failure + mock_response_403.headers = {} + mock_response_403.raise_for_status = Mock( + side_effect=requests_lib.exceptions.HTTPError(response=mock_response_403) + ) + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get: + mock_get.return_value = mock_response_403 + + with pytest.raises(RuntimeError, match="Authentication failed"): + downloader.download_raw_file(dep_ref, 'sub/file.agent.md', 'main') + + def test_resilient_get_retries_on_403_rate_limit(self): + """Test that _resilient_get retries when 403 has X-RateLimit-Remaining: 0.""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + + mock_response_403 = Mock() + mock_response_403.status_code = 403 + mock_response_403.headers = {'X-RateLimit-Remaining': '0', 'X-RateLimit-Reset': '0'} + + mock_response_200 = Mock() + mock_response_200.status_code = 200 + mock_response_200.headers = {'X-RateLimit-Remaining': '50'} + mock_response_200.content = b'success' + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get, \ + patch('apm_cli.deps.github_downloader.time.sleep'): + mock_get.side_effect = [mock_response_403, mock_response_200] + + response = downloader._resilient_get('https://api.github.com/repos/test', {}) + assert response.status_code == 200 + assert mock_get.call_count == 2 + + def test_resilient_get_does_not_retry_403_without_rate_limit_header(self): + """Test that _resilient_get does NOT retry 403 without rate-limit exhaustion.""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + + mock_response_403 = Mock() + mock_response_403.status_code = 403 + mock_response_403.headers = {} # No rate-limit headers + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get: + mock_get.return_value = mock_response_403 + + response = downloader._resilient_get('https://api.github.com/repos/test', {}) + # Should return immediately — no retry for non-rate-limit 403 + assert response.status_code == 403 + assert mock_get.call_count == 1 + + def test_resilient_get_403_with_nonzero_remaining_not_retried(self): + """Test that 403 with X-RateLimit-Remaining > 0 is NOT retried as rate limiting.""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + + mock_response_403 = Mock() + mock_response_403.status_code = 403 + mock_response_403.headers = {'X-RateLimit-Remaining': '42'} + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get: + mock_get.return_value = mock_response_403 + + response = downloader._resilient_get('https://api.github.com/repos/test', {}) + assert response.status_code == 403 + assert mock_get.call_count == 1 + class TestAzureDevOpsSupport: """Test Azure DevOps package support.""" From 951c5013fc00ab3bf4cefefad29905ff7817b204 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 09:38:32 +0000 Subject: [PATCH 3/4] feat: CDN-first download for github.com public repos via raw.githubusercontent.com Use raw.githubusercontent.com (CDN, no rate limit) for unauthenticated github.com file downloads before falling back to the Contents API. - Add build_raw_content_url() helper to github_host.py - Add _try_raw_download() method for best-effort CDN fetch - Modify _download_github_file() to try CDN first when no token is set - CDN path handles main/master branch fallback - Authenticated requests and enterprise hosts still use Contents API directly - Add 10 new tests for CDN download path + 2 tests for URL builder Co-authored-by: danielmeppiel <51440732+danielmeppiel@users.noreply.github.com> --- src/apm_cli/deps/github_downloader.py | 44 +++++- src/apm_cli/utils/github_host.py | 21 +++ tests/test_github_downloader.py | 198 ++++++++++++++++++++++++++ tests/unit/test_github_host.py | 14 +- 4 files changed, 273 insertions(+), 4 deletions(-) diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index e73e6f35b..090b0bd2b 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -33,6 +33,7 @@ build_ado_https_clone_url, build_ado_ssh_url, build_ado_api_url, + build_raw_content_url, sanitize_token_url_in_message, default_host, is_azure_devops_hostname, @@ -696,9 +697,30 @@ def _download_ado_file(self, dep_ref: DependencyReference, file_path: str, ref: except requests.exceptions.RequestException as e: raise RuntimeError(f"Network error downloading {file_path}: {e}") + def _try_raw_download(self, owner: str, repo: str, ref: str, file_path: str) -> bytes | None: + """Attempt to fetch a file via raw.githubusercontent.com (CDN). + + Returns the raw bytes on success, or ``None`` if the file was not found + (HTTP 404) or the request failed for any reason. This is intentionally + best-effort: callers fall back to the Contents API when ``None`` is + returned. + """ + raw_url = build_raw_content_url(owner, repo, ref, file_path) + try: + response = requests.get(raw_url, timeout=30) + if response.status_code == 200: + return response.content + except requests.exceptions.RequestException: + pass + return None + def _download_github_file(self, dep_ref: DependencyReference, file_path: str, ref: str = "main") -> bytes: """Download a file from GitHub repository. + For github.com without a token, tries raw.githubusercontent.com first + (CDN, no rate limit) before falling back to the Contents API. Authenticated + requests and non-github.com hosts always use the Contents API directly. + Args: dep_ref: Parsed dependency reference file_path: Path to file within the repository @@ -712,15 +734,31 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re # Parse owner/repo from repo_url owner, repo = dep_ref.repo_url.split('/', 1) + # --- CDN fast-path for github.com without a token --- + # raw.githubusercontent.com is served from GitHub's CDN and is not + # subject to the REST API rate limit (60 req/h unauthenticated). + # Only available for github.com — GHES/GHE-DR have no equivalent. + if host == "github.com" and not self.github_token: + content = self._try_raw_download(owner, repo, ref, file_path) + if content is not None: + return content + # raw download returned 404 — could be wrong default branch. + # Try the other default branch before falling through to the API. + if ref in ("main", "master"): + fallback_ref = "master" if ref == "main" else "main" + content = self._try_raw_download(owner, repo, fallback_ref, file_path) + if content is not None: + return content + # All raw attempts failed — fall through to API path which + # handles private repos, rate-limit messaging, and SAML errors. + + # --- Contents API path (authenticated, enterprise, or raw fallback) --- # Build GitHub API URL - format differs by host type if host == "github.com": - # GitHub.com: https://api.github.com/repos/owner/repo/contents/path api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" elif host.lower().endswith(".ghe.com"): - # GitHub Enterprise Cloud Data Residency: https://api.{subdomain}.ghe.com/repos/owner/repo/contents/path api_url = f"https://api.{host}/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" else: - # GitHub Enterprise Server: https://{host}/api/v3/repos/owner/repo/contents/path api_url = f"https://{host}/api/v3/repos/{owner}/{repo}/contents/{file_path}?ref={ref}" # Set up authentication headers diff --git a/src/apm_cli/utils/github_host.py b/src/apm_cli/utils/github_host.py index 0ae1990c6..68117cd4d 100644 --- a/src/apm_cli/utils/github_host.py +++ b/src/apm_cli/utils/github_host.py @@ -125,6 +125,27 @@ def unsupported_host_error(hostname: str, context: Optional[str] = None) -> str: return msg +def build_raw_content_url(owner: str, repo: str, ref: str, file_path: str) -> str: + """Build a raw.githubusercontent.com URL for fetching file content. + + This CDN endpoint is not subject to the GitHub REST API rate limit and + does not require authentication for public repositories. + + Only valid for github.com — GitHub Enterprise Server and GHE Cloud Data + Residency hosts do not have a ``raw.githubusercontent.com`` equivalent. + + Args: + owner: Repository owner (user or organisation) + repo: Repository name + ref: Git reference (branch, tag, or commit SHA) + file_path: Path to file within the repository + + Returns: + str: ``https://raw.githubusercontent.com/{owner}/{repo}/{ref}/{file_path}`` + """ + return f"https://raw.githubusercontent.com/{owner}/{repo}/{ref}/{file_path}" + + def build_ssh_url(host: str, repo_ref: str) -> str: """Build an SSH clone URL for the given host and repo_ref (owner/repo).""" return f"git@{host}:{repo_ref}.git" diff --git a/tests/test_github_downloader.py b/tests/test_github_downloader.py index 4e384fa84..493712558 100644 --- a/tests/test_github_downloader.py +++ b/tests/test_github_downloader.py @@ -1270,5 +1270,203 @@ def test_git_config_global_uses_dev_null_on_unix(self): assert dl.git_env['GIT_CONFIG_GLOBAL'] == '/dev/null' +class TestRawContentCDNDownload: + """Tests for CDN-first (raw.githubusercontent.com) download strategy.""" + + def test_raw_cdn_used_for_github_com_without_token(self): + """Unauthenticated github.com requests should try raw.githubusercontent.com first.""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + dep_ref = DependencyReference.parse('owner/repo/agents/bot.agent.md') + + mock_raw_response = Mock() + mock_raw_response.status_code = 200 + mock_raw_response.content = b'# Agent content' + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get: + mock_get.return_value = mock_raw_response + + result = downloader._download_github_file(dep_ref, 'agents/bot.agent.md', 'main') + + assert result == b'# Agent content' + # Should have hit raw.githubusercontent.com, not API + call_url = mock_get.call_args[0][0] + assert 'raw.githubusercontent.com' in call_url + assert 'api.github.com' not in call_url + + def test_raw_cdn_not_used_when_token_present(self): + """Authenticated requests should go straight to Contents API.""" + with patch.dict(os.environ, {'GITHUB_APM_PAT': 'my-token'}, clear=True): + downloader = GitHubPackageDownloader() + dep_ref = DependencyReference.parse('owner/repo/agents/bot.agent.md') + + mock_api_response = Mock() + mock_api_response.status_code = 200 + mock_api_response.headers = {'X-RateLimit-Remaining': '4999'} + mock_api_response.content = b'# Agent content' + mock_api_response.raise_for_status = Mock() + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get: + mock_get.return_value = mock_api_response + + result = downloader._download_github_file(dep_ref, 'agents/bot.agent.md', 'main') + + assert result == b'# Agent content' + # Should use API with auth, not raw CDN + call_url = mock_get.call_args[0][0] + assert 'api.github.com' in call_url + + def test_raw_cdn_not_used_for_enterprise_host(self): + """Enterprise hosts should use API directly (no raw.githubusercontent.com).""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + dep_ref = DependencyReference.parse('owner/repo/agents/bot.agent.md') + dep_ref.host = 'github.mycompany.com' + + mock_api_response = Mock() + mock_api_response.status_code = 200 + mock_api_response.headers = {} + mock_api_response.content = b'# Agent content' + mock_api_response.raise_for_status = Mock() + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get: + mock_get.return_value = mock_api_response + + result = downloader._download_github_file(dep_ref, 'agents/bot.agent.md', 'main') + + assert result == b'# Agent content' + call_url = mock_get.call_args[0][0] + assert 'raw.githubusercontent.com' not in call_url + assert 'github.mycompany.com' in call_url + + def test_raw_cdn_fallback_to_api_on_404(self): + """If raw CDN returns 404, should fall through to the API path.""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + dep_ref = DependencyReference.parse('owner/private-repo/agents/bot.agent.md') + + # Raw CDN returns 404 (private repo or file doesn't exist) + mock_raw_404 = Mock() + mock_raw_404.status_code = 404 + mock_raw_404.content = b'404: Not Found' + + # API also returns 404 with proper error handling + mock_api_404 = Mock() + mock_api_404.status_code = 404 + mock_api_404.headers = {} + mock_api_404.raise_for_status = Mock( + side_effect=requests_lib.exceptions.HTTPError(response=mock_api_404) + ) + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get: + # First 2 calls: raw CDN (main + master fallback) → 404 + # Third call: API → 404 + mock_get.side_effect = [mock_raw_404, mock_raw_404, mock_api_404, mock_api_404] + + with pytest.raises(RuntimeError, match="File not found"): + downloader._download_github_file(dep_ref, 'agents/bot.agent.md', 'main') + + def test_raw_cdn_fallback_main_to_master(self): + """If raw CDN 404s on 'main', should try 'master' before API fallback.""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + dep_ref = DependencyReference.parse('owner/repo/agents/bot.agent.md') + + # raw CDN: main → 404, master → 200 + mock_raw_404 = Mock() + mock_raw_404.status_code = 404 + + mock_raw_200 = Mock() + mock_raw_200.status_code = 200 + mock_raw_200.content = b'# Found on master' + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get: + mock_get.side_effect = [mock_raw_404, mock_raw_200] + + result = downloader._download_github_file(dep_ref, 'agents/bot.agent.md', 'main') + + assert result == b'# Found on master' + # Second call should be to master + assert mock_get.call_count == 2 + second_url = mock_get.call_args_list[1][0][0] + assert '/master/' in second_url + + def test_raw_cdn_network_error_falls_through(self): + """If raw CDN raises a network error, should fall through to API.""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + dep_ref = DependencyReference.parse('owner/repo/agents/bot.agent.md') + + mock_api_response = Mock() + mock_api_response.status_code = 200 + mock_api_response.headers = {} + mock_api_response.content = b'# From API' + mock_api_response.raise_for_status = Mock() + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get: + # Raw CDN: network error; API: success + mock_get.side_effect = [ + requests_lib.exceptions.ConnectionError("CDN unreachable"), + mock_api_response, + ] + + result = downloader._download_github_file(dep_ref, 'agents/bot.agent.md', 'v1.0.0') + + assert result == b'# From API' + + def test_raw_cdn_no_branch_fallback_for_specific_ref(self): + """For a specific ref (not main/master), raw CDN should not try branch fallback.""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + dep_ref = DependencyReference.parse('owner/repo/agents/bot.agent.md') + + mock_raw_404 = Mock() + mock_raw_404.status_code = 404 + + mock_api_404 = Mock() + mock_api_404.status_code = 404 + mock_api_404.headers = {} + mock_api_404.raise_for_status = Mock( + side_effect=requests_lib.exceptions.HTTPError(response=mock_api_404) + ) + + with patch('apm_cli.deps.github_downloader.requests.get') as mock_get: + # Raw CDN: 404, then API: 404 with specific ref error + mock_get.side_effect = [mock_raw_404, mock_api_404] + + with pytest.raises(RuntimeError, match="File not found.*at ref 'v2.0.0'"): + downloader._download_github_file(dep_ref, 'agents/bot.agent.md', 'v2.0.0') + + # Should be exactly 2 calls: 1 raw CDN (no master fallback) + 1 API + assert mock_get.call_count == 2 + + def test_try_raw_download_returns_none_on_404(self): + """_try_raw_download should return None on 404.""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + + mock_response = Mock() + mock_response.status_code = 404 + + with patch('apm_cli.deps.github_downloader.requests.get', return_value=mock_response): + result = downloader._try_raw_download('owner', 'repo', 'main', 'file.md') + + assert result is None + + def test_try_raw_download_returns_content_on_200(self): + """_try_raw_download should return bytes on success.""" + with patch.dict(os.environ, {}, clear=True): + downloader = GitHubPackageDownloader() + + mock_response = Mock() + mock_response.status_code = 200 + mock_response.content = b'hello world' + + with patch('apm_cli.deps.github_downloader.requests.get', return_value=mock_response): + result = downloader._try_raw_download('owner', 'repo', 'main', 'file.md') + + assert result == b'hello world' + + if __name__ == '__main__': pytest.main([__file__]) \ No newline at end of file diff --git a/tests/unit/test_github_host.py b/tests/unit/test_github_host.py index 77de92523..7dade5f05 100644 --- a/tests/unit/test_github_host.py +++ b/tests/unit/test_github_host.py @@ -1,6 +1,18 @@ import pytest -from apm_cli.utils.github_host import is_valid_fqdn +from apm_cli.utils.github_host import is_valid_fqdn, build_raw_content_url + + +def test_build_raw_content_url(): + """build_raw_content_url returns the correct raw.githubusercontent.com URL.""" + url = build_raw_content_url("microsoft", "apm", "main", "README.md") + assert url == "https://raw.githubusercontent.com/microsoft/apm/main/README.md" + + +def test_build_raw_content_url_nested_path(): + """build_raw_content_url handles nested file paths.""" + url = build_raw_content_url("owner", "repo", "v1.0.0", "agents/api-architect.agent.md") + assert url == "https://raw.githubusercontent.com/owner/repo/v1.0.0/agents/api-architect.agent.md" def test_valid_fqdns(): From b3a6fd9f4888b5757d321009b61535814bbf9e21 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Mar 2026 09:40:17 +0000 Subject: [PATCH 4/4] fix: use Optional[bytes] and case-insensitive host comparison per code review Co-authored-by: danielmeppiel <51440732+danielmeppiel@users.noreply.github.com> --- CHANGELOG.md | 1 + src/apm_cli/deps/github_downloader.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e39ec9d7b..679651f10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - GitHub API rate-limit 403 responses no longer misdiagnosed as authentication failures — unauthenticated users now see actionable "rate limit exceeded" guidance instead of misleading "private repository" errors +- Virtual file downloads from public github.com repos no longer require authentication — uses `raw.githubusercontent.com` CDN (no rate limit) before falling back to the Contents API ## [0.7.9] - 2026-03-13 diff --git a/src/apm_cli/deps/github_downloader.py b/src/apm_cli/deps/github_downloader.py index 090b0bd2b..c57fda3b7 100644 --- a/src/apm_cli/deps/github_downloader.py +++ b/src/apm_cli/deps/github_downloader.py @@ -697,7 +697,7 @@ def _download_ado_file(self, dep_ref: DependencyReference, file_path: str, ref: except requests.exceptions.RequestException as e: raise RuntimeError(f"Network error downloading {file_path}: {e}") - def _try_raw_download(self, owner: str, repo: str, ref: str, file_path: str) -> bytes | None: + def _try_raw_download(self, owner: str, repo: str, ref: str, file_path: str) -> Optional[bytes]: """Attempt to fetch a file via raw.githubusercontent.com (CDN). Returns the raw bytes on success, or ``None`` if the file was not found @@ -738,7 +738,7 @@ def _download_github_file(self, dep_ref: DependencyReference, file_path: str, re # raw.githubusercontent.com is served from GitHub's CDN and is not # subject to the REST API rate limit (60 req/h unauthenticated). # Only available for github.com — GHES/GHE-DR have no equivalent. - if host == "github.com" and not self.github_token: + if host.lower() == "github.com" and not self.github_token: content = self._try_raw_download(owner, repo, ref, file_path) if content is not None: return content