Skip to content

Commit 7af6233

Browse files
committed
security(owners): enforce OWNERS files read from PR base branch using worktrees
Critical security fix to prevent PRs from modifying their own approval requirements. Previously, OWNERS files were read from the main repository clone, which could be influenced by PR content. This created a security vulnerability where a malicious PR could: - Modify OWNERS files in changed directories - Change who can approve the PR - Potentially bypass approval requirements Solution: - Use git worktree to create isolated checkout of PR's base branch (target branch) - Always read OWNERS files from base branch, not PR head branch - Ensures approval requirements cannot be modified by PR content - Maintains performance by using existing worktree helper Changes: - Modified get_all_repository_approvers_and_reviewers() to accept branch parameter - Integrated git_worktree_checkout for isolated base branch access - Updated _get_file_content_from_local() to support custom base paths - Enhanced file discovery to skip hidden directories in worktree - Updated all tests to mock worktree creation Impact: - Security: PRs cannot modify their own approval requirements - Performance: Minimal overhead (worktree creation is fast) - Reliability: Fail-fast on worktree creation failures
1 parent be8dcd3 commit 7af6233

File tree

4 files changed

+169
-59
lines changed

4 files changed

+169
-59
lines changed

webhook_server/libs/handlers/owners_files_handler.py

Lines changed: 71 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from github.PullRequest import PullRequest
1313
from github.Repository import Repository
1414

15+
from webhook_server.utils import helpers as helpers_module
1516
from webhook_server.utils.constants import COMMAND_ADD_ALLOWED_USER_STR, ROOT_APPROVERS_KEY
1617
from webhook_server.utils.helpers import format_task_fields
1718

@@ -32,10 +33,13 @@ async def initialize(self, pull_request: PullRequest) -> "OwnersFileHandler":
3233
Phase 1: Fetch independent data in parallel (changed files + OWNERS data)
3334
Phase 2: Process derived data in parallel (approvers + reviewers)
3435
"""
36+
# Extract base branch for OWNERS files (PR target branch)
37+
base_branch = await asyncio.to_thread(lambda: pull_request.base.ref)
38+
3539
# Phase 1: Parallel data fetching - independent GitHub API operations
3640
self.changed_files, self.all_repository_approvers_and_reviewers = await asyncio.gather(
3741
self.list_changed_files(pull_request=pull_request),
38-
self.get_all_repository_approvers_and_reviewers(pull_request=pull_request),
42+
self.get_all_repository_approvers_and_reviewers(branch=base_branch),
3943
)
4044

4145
# Phase 2: Parallel data processing - all depend on phase 1 but independent of each other
@@ -106,16 +110,20 @@ def _validate_owners_content(self, content: Any, path: str) -> bool:
106110
self.logger.error(f"{self.log_prefix} Invalid OWNERS file {path}: {e}")
107111
return False
108112

109-
async def _get_file_content_from_local(self, content_path: Path) -> tuple[str, str] | None:
113+
async def _get_file_content_from_local(
114+
self, content_path: Path, base_path: Path | None = None
115+
) -> tuple[str, str] | None:
110116
"""Read OWNERS file from local cloned repository.
111117
112118
Args:
113119
content_path: Path object pointing to OWNERS file in clone_repo_dir
120+
base_path: Base path to compute relative path from (defaults to clone_repo_dir)
114121
115122
Returns:
116123
Tuple of (file_content, relative_path_str) or None if file is unreadable
117124
"""
118-
relative_path = content_path.relative_to(Path(self.github_webhook.clone_repo_dir))
125+
_base_path = base_path if base_path else Path(self.github_webhook.clone_repo_dir)
126+
relative_path = content_path.relative_to(_base_path)
119127
self.logger.debug(f"{self.log_prefix} Reading OWNERS file from local clone: {relative_path}")
120128

121129
try:
@@ -137,62 +145,85 @@ async def _get_file_content_from_local(self, content_path: Path) -> tuple[str, s
137145
)
138146
return None
139147

140-
async def get_all_repository_approvers_and_reviewers(self, pull_request: PullRequest) -> dict[str, dict[str, Any]]:
148+
async def get_all_repository_approvers_and_reviewers(self, branch: str) -> dict[str, dict[str, Any]]:
141149
"""Get all repository approvers and reviewers from OWNERS files.
142150
143-
Reads OWNERS files from local cloned repository instead of GitHub API.
144-
Saves ~1,200-2,000 API calls per webhook.
145-
"""
146-
# Ensure repository is cloned first
147-
await self.github_webhook._clone_repository_for_pr(pull_request)
151+
Reads OWNERS files from local cloned repository at specified branch using worktree.
152+
153+
Args:
154+
branch: Branch name to read OWNERS files from (e.g., 'main', 'develop')
148155
156+
Returns:
157+
Dictionary mapping OWNERS file paths to their approvers and reviewers
158+
"""
149159
# Dictionary mapping OWNERS file paths to their approvers and reviewers
150160
_owners: dict[str, dict[str, Any]] = {}
151161
tasks: list[Coroutine[Any, Any, Any]] = []
152162

153163
max_owners_files = 1000 # Intentionally hardcoded limit to prevent runaway processing
154164
owners_count = 0
155165

156-
# Find all OWNERS files via filesystem walk
157-
self.logger.debug(f"{self.log_prefix} Finding OWNERS files in local clone")
158-
clone_path = Path(self.github_webhook.clone_repo_dir)
166+
# Use existing worktree helper to create isolated checkout of specified branch
167+
async with helpers_module.git_worktree_checkout(
168+
repo_dir=self.github_webhook.clone_repo_dir,
169+
checkout=branch,
170+
log_prefix=self.log_prefix,
171+
mask_sensitive=self.github_webhook.mask_sensitive,
172+
) as (success, worktree_path, _stdout, stderr):
173+
if not success:
174+
self.logger.error(f"{self.log_prefix} Failed to create worktree for branch {branch}: {stderr}")
175+
raise RuntimeError(f"Failed to create worktree for branch {branch}")
176+
177+
self.logger.debug(
178+
f"{self.log_prefix} Reading OWNERS files from branch '{branch}' using worktree at {worktree_path}"
179+
)
159180

160-
# Use rglob to recursively find all OWNERS files
161-
def find_owners_files() -> list[Path]:
162-
return list(clone_path.rglob("OWNERS"))
181+
# Read OWNERS files from worktree (not main clone)
182+
clone_path = Path(worktree_path)
163183

164-
owners_files = await asyncio.to_thread(find_owners_files)
184+
# Find all OWNERS files via filesystem walk
185+
self.logger.debug(f"{self.log_prefix} Finding OWNERS files in worktree")
165186

166-
for owners_file_path in owners_files:
167-
owners_count += 1
168-
if owners_count > max_owners_files:
169-
self.logger.error(f"{self.log_prefix} Too many OWNERS files (>{max_owners_files})")
170-
break
187+
# Use rglob to recursively find all OWNERS files
188+
def find_owners_files() -> list[Path]:
189+
return [
190+
p
191+
for p in clone_path.rglob("OWNERS")
192+
if not any(part.startswith(".") for part in p.relative_to(clone_path).parts)
193+
]
171194

172-
relative_path = owners_file_path.relative_to(clone_path)
173-
self.logger.debug(f"{self.log_prefix} Found OWNERS file: {relative_path}")
174-
tasks.append(self._get_file_content_from_local(owners_file_path))
195+
owners_files = await asyncio.to_thread(find_owners_files)
175196

176-
results = await asyncio.gather(*tasks)
197+
for owners_file_path in owners_files:
198+
owners_count += 1
199+
if owners_count > max_owners_files:
200+
self.logger.error(f"{self.log_prefix} Too many OWNERS files (>{max_owners_files})")
201+
break
177202

178-
for result in results:
179-
# Skip files that couldn't be read (deleted or unreadable)
180-
if result is None:
181-
continue
203+
relative_path = owners_file_path.relative_to(clone_path)
204+
self.logger.debug(f"{self.log_prefix} Found OWNERS file: {relative_path}")
205+
tasks.append(self._get_file_content_from_local(owners_file_path, base_path=clone_path))
182206

183-
file_content, relative_path_str = result
207+
results = await asyncio.gather(*tasks)
184208

185-
try:
186-
content = yaml.safe_load(file_content)
187-
if self._validate_owners_content(content, relative_path_str):
188-
parent_path = str(Path(relative_path_str).parent)
189-
if not parent_path or parent_path == ".":
190-
parent_path = "."
191-
_owners[parent_path] = content
209+
for result in results:
210+
# Skip files that couldn't be read (deleted or unreadable)
211+
if result is None:
212+
continue
192213

193-
except yaml.YAMLError:
194-
self.logger.exception(f"{self.log_prefix} Invalid OWNERS file {relative_path_str}")
195-
continue
214+
file_content, relative_path_str = result
215+
216+
try:
217+
content = yaml.safe_load(file_content)
218+
if self._validate_owners_content(content, relative_path_str):
219+
parent_path = str(Path(relative_path_str).parent)
220+
if not parent_path or parent_path == ".":
221+
parent_path = "."
222+
_owners[parent_path] = content
223+
224+
except yaml.YAMLError:
225+
self.logger.exception(f"{self.log_prefix} Invalid OWNERS file {relative_path_str}")
226+
continue
196227

197228
return _owners
198229

webhook_server/tests/test_github_api.py

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import asyncio
22
import os
33
import tempfile
4+
from collections.abc import AsyncGenerator
5+
from contextlib import asynccontextmanager
46
from typing import Any
57
from unittest.mock import AsyncMock, Mock, patch
68

@@ -74,6 +76,21 @@ def minimal_headers(self) -> dict[str, str]:
7476
def logger(self):
7577
return get_logger(name="test")
7678

79+
@pytest.fixture
80+
def mock_git_worktree(self, tmp_path):
81+
"""Mock git_worktree_checkout context manager."""
82+
83+
@asynccontextmanager
84+
async def mock_worktree(
85+
repo_dir: str, checkout: str, log_prefix: str, mask_sensitive: bool = True
86+
) -> AsyncGenerator[tuple[bool, str, str, str], None]:
87+
# Create temp directory to simulate worktree
88+
worktree_path = str(tmp_path / "worktree")
89+
os.makedirs(worktree_path, exist_ok=True)
90+
yield (True, worktree_path, "", "") # success, path, stdout, stderr
91+
92+
return mock_worktree
93+
7794
@patch("webhook_server.libs.github_api.Config")
7895
@patch("webhook_server.libs.github_api.get_api_with_highest_rate_limit")
7996
@patch("webhook_server.libs.github_api.get_github_repo_api")
@@ -218,6 +235,7 @@ async def test_process_pull_request_event(
218235
mock_repo_api: Mock,
219236
pull_request_payload: dict[str, Any],
220237
webhook_headers: Headers,
238+
mock_git_worktree,
221239
) -> None:
222240
"""Test processing pull_request event."""
223241
# Mock GitHub API to prevent network calls
@@ -262,6 +280,10 @@ async def test_process_pull_request_event(
262280
return_value=Mock(decoded_content=b"approvers:\n - user1\nreviewers:\n - user2"),
263281
),
264282
patch.object(webhook, "_clone_repository_for_pr", new=AsyncMock(return_value=None)),
283+
patch(
284+
"webhook_server.libs.handlers.owners_files_handler.helpers_module.git_worktree_checkout",
285+
new=mock_git_worktree,
286+
),
265287
):
266288
await webhook.process()
267289
mock_process_pr.assert_called_once()
@@ -319,6 +341,7 @@ async def test_process_issue_comment_event(
319341
mock_api_rate_limit: Mock,
320342
mock_repo_api: Mock,
321343
issue_comment_payload: dict[str, Any],
344+
mock_git_worktree,
322345
) -> None:
323346
"""Test processing issue_comment event."""
324347
# Mock GitHub API to prevent network calls
@@ -364,6 +387,10 @@ async def test_process_issue_comment_event(
364387
return_value=Mock(decoded_content=b"approvers:\n - user1\nreviewers:\n - user2"),
365388
),
366389
patch.object(webhook, "_clone_repository_for_pr", new=AsyncMock(return_value=None)),
390+
patch(
391+
"webhook_server.libs.handlers.owners_files_handler.helpers_module.git_worktree_checkout",
392+
new=mock_git_worktree,
393+
),
367394
):
368395
await webhook.process()
369396
mock_process_comment.assert_called_once()
@@ -688,7 +715,9 @@ def test_prepare_log_prefix_with_color_file(
688715
assert result2 is not None
689716

690717
@pytest.mark.asyncio
691-
async def test_process_check_run_event(self, minimal_hook_data: dict, minimal_headers: dict, logger: Mock) -> None:
718+
async def test_process_check_run_event(
719+
self, minimal_hook_data: dict, minimal_headers: dict, logger: Mock, mock_git_worktree
720+
) -> None:
692721
"""Test processing check run event."""
693722
check_run_data = {
694723
"repository": {"name": "test-repo", "full_name": "org/test-repo"},
@@ -748,8 +777,14 @@ async def test_process_check_run_event(self, minimal_hook_data: dict, minimal_he
748777
mock_pr_handler.return_value.check_if_can_be_merged = AsyncMock(return_value=None)
749778

750779
webhook = GithubWebhook(check_run_data, headers, logger)
751-
with patch.object(
752-
webhook, "_clone_repository_for_pr", new=AsyncMock(return_value=None)
780+
with (
781+
patch.object(
782+
webhook, "_clone_repository_for_pr", new=AsyncMock(return_value=None)
783+
),
784+
patch(
785+
"webhook_server.libs.handlers.owners_files_handler.helpers_module.git_worktree_checkout",
786+
new=mock_git_worktree,
787+
),
753788
):
754789
await webhook.process()
755790

webhook_server/tests/test_owners_files_handler.py

Lines changed: 46 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from collections.abc import AsyncGenerator
2+
from contextlib import asynccontextmanager
13
from pathlib import Path
24
from unittest.mock import AsyncMock, Mock, call, patch
35

@@ -184,9 +186,17 @@ async def test_get_all_repository_approvers_and_reviewers(
184186
# Set clone_repo_dir to tmp_path
185187
owners_file_handler.github_webhook.clone_repo_dir = str(tmp_path)
186188

187-
# Mock _clone_repository_for_pr to do nothing (already "cloned" to tmp_path)
188-
with patch.object(owners_file_handler.github_webhook, "_clone_repository_for_pr", new=AsyncMock()):
189-
result = await owners_file_handler.get_all_repository_approvers_and_reviewers(mock_pull_request)
189+
# Mock git_worktree_checkout to use tmp_path directly (simulate successful worktree)
190+
@asynccontextmanager
191+
async def mock_worktree(
192+
repo_dir: str, checkout: str, log_prefix: str, mask_sensitive: bool = True
193+
) -> AsyncGenerator[tuple[bool, str, str, str], None]:
194+
yield (True, str(tmp_path), "", "")
195+
196+
with patch(
197+
"webhook_server.libs.handlers.owners_files_handler.helpers_module.git_worktree_checkout", mock_worktree
198+
):
199+
result = await owners_file_handler.get_all_repository_approvers_and_reviewers(branch="main")
190200

191201
expected = {
192202
".": {"approvers": ["root_approver1", "root_approver2"], "reviewers": ["root_reviewer1", "root_reviewer2"]},
@@ -222,9 +232,17 @@ async def test_get_all_repository_approvers_and_reviewers_too_many_files(
222232
owners_file_handler.github_webhook.clone_repo_dir = str(tmp_path)
223233
owners_file_handler.logger.error = Mock()
224234

225-
# Mock _clone_repository_for_pr
226-
with patch.object(owners_file_handler.github_webhook, "_clone_repository_for_pr", new=AsyncMock()):
227-
result = await owners_file_handler.get_all_repository_approvers_and_reviewers(mock_pull_request)
235+
# Mock git_worktree_checkout to use tmp_path directly
236+
@asynccontextmanager
237+
async def mock_worktree(
238+
repo_dir: str, checkout: str, log_prefix: str, mask_sensitive: bool = True
239+
) -> AsyncGenerator[tuple[bool, str, str, str], None]:
240+
yield (True, str(tmp_path), "", "")
241+
242+
with patch(
243+
"webhook_server.libs.handlers.owners_files_handler.helpers_module.git_worktree_checkout", mock_worktree
244+
):
245+
result = await owners_file_handler.get_all_repository_approvers_and_reviewers(branch="main")
228246

229247
assert len(result) == 1000
230248
owners_file_handler.logger.error.assert_called_once()
@@ -242,9 +260,17 @@ async def test_get_all_repository_approvers_and_reviewers_invalid_yaml(
242260
owners_file_handler.github_webhook.clone_repo_dir = str(tmp_path)
243261
owners_file_handler.logger.exception = Mock()
244262

245-
# Mock _clone_repository_for_pr
246-
with patch.object(owners_file_handler.github_webhook, "_clone_repository_for_pr", new=AsyncMock()):
247-
result = await owners_file_handler.get_all_repository_approvers_and_reviewers(mock_pull_request)
263+
# Mock git_worktree_checkout to use tmp_path directly
264+
@asynccontextmanager
265+
async def mock_worktree(
266+
repo_dir: str, checkout: str, log_prefix: str, mask_sensitive: bool = True
267+
) -> AsyncGenerator[tuple[bool, str, str, str], None]:
268+
yield (True, str(tmp_path), "", "")
269+
270+
with patch(
271+
"webhook_server.libs.handlers.owners_files_handler.helpers_module.git_worktree_checkout", mock_worktree
272+
):
273+
result = await owners_file_handler.get_all_repository_approvers_and_reviewers(branch="main")
248274

249275
assert result == {}
250276
owners_file_handler.logger.exception.assert_called_once()
@@ -262,9 +288,17 @@ async def test_get_all_repository_approvers_and_reviewers_invalid_content(
262288
owners_file_handler.github_webhook.clone_repo_dir = str(tmp_path)
263289
owners_file_handler.logger.error = Mock()
264290

265-
# Mock _clone_repository_for_pr
266-
with patch.object(owners_file_handler.github_webhook, "_clone_repository_for_pr", new=AsyncMock()):
267-
result = await owners_file_handler.get_all_repository_approvers_and_reviewers(mock_pull_request)
291+
# Mock git_worktree_checkout to use tmp_path directly
292+
@asynccontextmanager
293+
async def mock_worktree(
294+
repo_dir: str, checkout: str, log_prefix: str, mask_sensitive: bool = True
295+
) -> AsyncGenerator[tuple[bool, str, str, str], None]:
296+
yield (True, str(tmp_path), "", "")
297+
298+
with patch(
299+
"webhook_server.libs.handlers.owners_files_handler.helpers_module.git_worktree_checkout", mock_worktree
300+
):
301+
result = await owners_file_handler.get_all_repository_approvers_and_reviewers(branch="main")
268302

269303
assert result == {}
270304
owners_file_handler.logger.error.assert_called_once()

webhook_server/tests/test_pull_request_owners.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from collections.abc import AsyncGenerator
2+
from contextlib import asynccontextmanager
13
from unittest.mock import AsyncMock, patch
24

35
import pytest
@@ -144,11 +146,19 @@ async def test_get_all_repository_approvers_and_reviewers(
144146
# Set clone_repo_dir to tmp_path
145147
process_github_webhook.clone_repo_dir = str(tmp_path)
146148

149+
# Mock git_worktree_checkout to use tmp_path directly (simulate successful worktree)
150+
@asynccontextmanager
151+
async def mock_worktree(
152+
repo_dir: str, checkout: str, log_prefix: str, mask_sensitive: bool = True
153+
) -> AsyncGenerator[tuple[bool, str, str, str], None]:
154+
yield (True, str(tmp_path), "", "")
155+
147156
# Mock _clone_repository_for_pr to do nothing (already "cloned" to tmp_path)
148-
with patch.object(process_github_webhook, "_clone_repository_for_pr", new=AsyncMock()):
149-
read_owners_result = await owners_file_handler.get_all_repository_approvers_and_reviewers(
150-
pull_request=pull_request
151-
)
157+
with (
158+
patch.object(process_github_webhook, "_clone_repository_for_pr", new=AsyncMock()),
159+
patch("webhook_server.libs.handlers.owners_files_handler.helpers_module.git_worktree_checkout", mock_worktree),
160+
):
161+
read_owners_result = await owners_file_handler.get_all_repository_approvers_and_reviewers(branch="main")
152162

153163
assert read_owners_result == owners_file_handler.all_repository_approvers_and_reviewers
154164

0 commit comments

Comments
 (0)