diff --git a/docs/docs/recent_updates/index.md b/docs/docs/recent_updates/index.md index bbc2b21d58..ee236ad5bf 100644 --- a/docs/docs/recent_updates/index.md +++ b/docs/docs/recent_updates/index.md @@ -8,6 +8,7 @@ It also outlines our development roadmap for the upcoming three months. Please n === "Recent Updates" | Date | Feature | Description | |---|---|---| + | 2025-08-27 | **GitLab submodule diff expansion** | Optionally expand GitLab submodule updates into full diffs. ([Learn more](https://qodo-merge-docs.qodo.ai/usage-guide/additional_configurations/#expand-gitlab-submodule-diffs)) | | 2025-08-11 | **RAG support for GitLab** | All Qodo Merge RAG features are now available in GitLab. ([Learn more](https://qodo-merge-docs.qodo.ai/core-abilities/rag_context_enrichment/)) | | 2025-07-29 | **High-level Suggestions** | Qodo Merge now also provides high-level code suggestion for PRs. ([Learn more](https://qodo-merge-docs.qodo.ai/core-abilities/high_level_suggestions/)) | | 2025-07-20 | **PR to Ticket** | Generate tickets in your tracking systems based on PR content. ([Learn more](https://qodo-merge-docs.qodo.ai/tools/pr_to_ticket/)) | diff --git a/docs/docs/usage-guide/additional_configurations.md b/docs/docs/usage-guide/additional_configurations.md index 5ac1429657..de4a531c51 100644 --- a/docs/docs/usage-guide/additional_configurations.md +++ b/docs/docs/usage-guide/additional_configurations.md @@ -64,9 +64,9 @@ All Qodo Merge tools have a parameter called `extra_instructions`, that enables ## Language Settings -The default response language for Qodo Merge is **U.S. English**. However, some development teams may prefer to display information in a different language. For example, your team's workflow might improve if PR descriptions and code suggestions are set to your country's native language. +The default response language for Qodo Merge is **U.S. English**. However, some development teams may prefer to display information in a different language. For example, your team's workflow might improve if PR descriptions and code suggestions are set to your country's native language. -To configure this, set the `response_language` parameter in the configuration file. This will prompt the model to respond in the specified language. Use a **standard locale code** based on [ISO 3166](https://en.wikipedia.org/wiki/ISO_3166) (country codes) and [ISO 639](https://en.wikipedia.org/wiki/ISO_639) (language codes) to define a language-country pair. See this [comprehensive list of locale codes](https://simplelocalize.io/data/locales/). +To configure this, set the `response_language` parameter in the configuration file. This will prompt the model to respond in the specified language. Use a **standard locale code** based on [ISO 3166](https://en.wikipedia.org/wiki/ISO_3166) (country codes) and [ISO 639](https://en.wikipedia.org/wiki/ISO_639) (language codes) to define a language-country pair. See this [comprehensive list of locale codes](https://simplelocalize.io/data/locales/). Example: @@ -125,6 +125,17 @@ Increasing this number provides more context to the model, but will also increas If the PR is too large (see [PR Compression strategy](https://github.com/Codium-ai/pr-agent/blob/main/PR_COMPRESSION.md)), Qodo Merge may automatically set this number to 0, and will use the original git patch. +## Expand GitLab submodule diffs + +By default, GitLab merge requests show submodule updates as `Subproject commit` lines. To include the actual file-level changes from those submodules in Qodo Merge analysis, enable: + +```toml +[gitlab] +expand_submodule_diffs = true +``` + +When enabled, Qodo Merge will fetch and attach diffs from the submodule repositories. The default is `false` to avoid extra GitLab API calls. + ## Log Level Qodo Merge allows you to control the verbosity of logging by using the `log_level` configuration parameter. This is particularly useful for troubleshooting and debugging issues with your PR workflows. @@ -260,7 +271,7 @@ To automatically exclude files generated by specific languages or frameworks, yo ignore_language_framework = ['protobuf', ...] ``` -You can view the list of auto-generated file patterns in [`generated_code_ignore.toml`](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/generated_code_ignore.toml). +You can view the list of auto-generated file patterns in [`generated_code_ignore.toml`](https://github.com/qodo-ai/pr-agent/blob/main/pr_agent/settings/generated_code_ignore.toml). Files matching these glob patterns will be automatically excluded from PR Agent analysis. ### Ignoring Tickets with Specific Labels diff --git a/pr_agent/git_providers/gitlab_provider.py b/pr_agent/git_providers/gitlab_provider.py index 831052e40e..e9db1a3740 100644 --- a/pr_agent/git_providers/gitlab_provider.py +++ b/pr_agent/git_providers/gitlab_provider.py @@ -1,12 +1,14 @@ import difflib import hashlib import re -from typing import Optional, Tuple, Any, Union -from urllib.parse import urlparse, parse_qs +import urllib.parse +from typing import Any, Optional, Tuple, Union +from urllib.parse import parse_qs, urlparse import gitlab import requests -from gitlab import GitlabGetError, GitlabAuthenticationError, GitlabCreateError, GitlabUpdateError +from gitlab import (GitlabAuthenticationError, GitlabCreateError, + GitlabGetError, GitlabUpdateError) from pr_agent.algo.types import EDIT_TYPE, FilePatchInfo @@ -38,12 +40,12 @@ def __init__(self, merge_request_url: Optional[str] = None, incremental: Optiona raise ValueError("GitLab personal access token is not set in the config file") # Authentication method selection via configuration auth_method = get_settings().get("GITLAB.AUTH_TYPE", "oauth_token") - + # Basic validation of authentication type if auth_method not in ["oauth_token", "private_token"]: raise ValueError(f"Unsupported GITLAB.AUTH_TYPE: '{auth_method}'. " f"Must be 'oauth_token' or 'private_token'.") - + # Create GitLab instance based on authentication method try: if auth_method == "oauth_token": @@ -67,12 +69,221 @@ def __init__(self, merge_request_url: Optional[str] = None, incremental: Optiona self.diff_files = None self.git_files = None self.temp_comments = [] + self._submodule_cache: dict[tuple[str, str, str], list[dict]] = {} self.pr_url = merge_request_url self._set_merge_request(merge_request_url) self.RE_HUNK_HEADER = re.compile( r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@[ ]?(.*)") self.incremental = incremental + # --- submodule expansion helpers (opt-in) --- + def _get_gitmodules_map(self) -> dict[str, str]: + """ + Return {submodule_path -> repo_url} from '.gitmodules' (best effort). + Tries target branch first, then source branch. Always returns text. + """ + try: + proj = self.gl.projects.get(self.id_project) + except Exception: + return {} + + import base64 + + def _read_text(ref: str | None) -> str | None: + if not ref: + return None + try: + f = proj.files.get(file_path=".gitmodules", ref=ref) + except Exception: + return None + + # 1) python-gitlab File.decode() – usually returns BYTES + try: + raw = f.decode() + if isinstance(raw, (bytes, bytearray)): + return raw.decode("utf-8", "ignore") + if isinstance(raw, str): + return raw + except Exception: + pass + + # 2) fallback: base64 decode f.content + try: + c = getattr(f, "content", None) + if c: + return base64.b64decode(c).decode("utf-8", "ignore") + except Exception: + pass + + return None + + content = ( + _read_text(getattr(self.mr, "target_branch", None)) + or _read_text(getattr(self.mr, "source_branch", None)) + ) + if not content: + return {} + + import configparser + + parser = configparser.ConfigParser( + delimiters=("=",), + interpolation=None, + inline_comment_prefixes=("#", ";"), + strict=False, + ) + try: + parser.read_string(content) + except Exception: + return {} + + out: dict[str, str] = {} + for section in parser.sections(): + if not section.lower().startswith("submodule"): + continue + path = parser.get(section, "path", fallback=None) + url = parser.get(section, "url", fallback=None) + if path and url: + path = path.strip().strip('"').strip("'") + url = url.strip().strip('"').strip("'") + out[path] = url + return out + + def _url_to_project_path(self, url: str) -> str | None: + """ + Convert ssh/https GitLab URL to 'group/subgroup/repo' project path. + """ + try: + if url.startswith("git@") and ":" in url: + path = url.split(":", 1)[1] + else: + path = urllib.parse.urlparse(url).path.lstrip("/") + if path.endswith(".git"): + path = path[:-4] + return path or None + except Exception: + return None + + def _project_by_path(self, proj_path: str): + """ + Resolve a project by path with multiple strategies: + 1) URL-encoded path_with_namespace + 2) Raw path_with_namespace + 3) Search fallback + exact match on path_with_namespace (case-insensitive) + Returns a project object or None. + """ + if not proj_path: + return None + + # 1) Encoded + try: + enc = urllib.parse.quote_plus(proj_path) + return self.gl.projects.get(enc) + except Exception: + pass + + # 2) Raw + try: + return self.gl.projects.get(proj_path) + except Exception: + pass + + # 3) Search fallback + try: + name = proj_path.split("/")[-1] + # membership=True so we don't leak other people's repos + matches = self.gl.projects.list(search=name, simple=True, membership=True, per_page=100) + # prefer exact path_with_namespace match (case-insensitive) + for p in matches: + pwn = getattr(p, "path_with_namespace", "") + if pwn.lower() == proj_path.lower(): + return self.gl.projects.get(p.id) + if matches: + get_logger().warning(f"[submodule] no exact match for {proj_path} (skip)") + except Exception: + pass + + return None + + def _compare_submodule(self, proj_path: str, old_sha: str, new_sha: str) -> list[dict]: + """ + Call repository_compare on submodule project; return list of diffs. + """ + key = (proj_path, old_sha, new_sha) + if key in self._submodule_cache: + return self._submodule_cache[key] + try: + proj = self._project_by_path(proj_path) + if proj is None: + get_logger().warning(f"[submodule] resolve failed for {proj_path}") + self._submodule_cache[key] = [] + return [] + cmp = proj.repository_compare(old_sha, new_sha) + if isinstance(cmp, dict): + diffs = cmp.get("diffs", []) or [] + else: + diffs = [] + self._submodule_cache[key] = diffs + return diffs + except Exception as e: + get_logger().warning(f"[submodule] compare failed for {proj_path} {old_sha}..{new_sha}: {e}") + self._submodule_cache[key] = [] + return [] + + def _expand_submodule_changes(self, changes: list[dict]) -> list[dict]: + """ + If enabled, expand 'Subproject commit' bumps into real file diffs from the submodule. + Soft-fail on any issue. + """ + try: + if not bool(get_settings().get("GITLAB.EXPAND_SUBMODULE_DIFFS", False)): + return changes + except Exception: + return changes + + gitmodules = self._get_gitmodules_map() + if not gitmodules: + return changes + + out = list(changes) + for ch in changes: + patch = ch.get("diff") or "" + if "Subproject commit" not in patch: + continue + + # Extract old/new SHAs from the hunk + old_m = re.search(r"^-Subproject commit ([0-9a-f]{7,40})", patch, re.M) + new_m = re.search(r"^\+Subproject commit ([0-9a-f]{7,40})", patch, re.M) + if not (old_m and new_m): + continue + old_sha, new_sha = old_m.group(1), new_m.group(1) + + sub_path = ch.get("new_path") or ch.get("old_path") or "" + repo_url = gitmodules.get(sub_path) + if not repo_url: + get_logger().warning(f"[submodule] no url for '{sub_path}' in .gitmodules (skip)") + continue + + proj_path = self._url_to_project_path(repo_url) + if not proj_path: + get_logger().warning(f"[submodule] cannot parse project path from url '{repo_url}' (skip)") + continue + + get_logger().info(f"[submodule] {sub_path} url={repo_url} -> proj_path={proj_path}") + sub_diffs = self._compare_submodule(proj_path, old_sha, new_sha) + for sd in sub_diffs: + sd_diff = sd.get("diff") or "" + sd_old = sd.get("old_path") or sd.get("a_path") or "" + sd_new = sd.get("new_path") or sd.get("b_path") or sd_old + out.append({ + "old_path": f"{sub_path}/{sd_old}" if sd_old else sub_path, + "new_path": f"{sub_path}/{sd_new}" if sd_new else sub_path, + "diff": sd_diff, + "new_file": sd.get("new_file", False), + "deleted_file": sd.get("deleted_file", False), + "renamed_file": sd.get("renamed_file", False), + }) + return out def is_supported(self, capability: str) -> bool: if capability in ['get_issue_comments', 'create_inline_comment', 'publish_inline_comments', @@ -152,11 +363,11 @@ def create_or_update_pr_file(self, file_path: str, branch: str, contents="", mes """Create or update a file in the GitLab repository.""" try: project = self.gl.projects.get(self.id_project) - + if not message: action = "Update" if contents else "Create" message = f"{action} {file_path}" - + try: existing_file = project.files.get(file_path, branch) existing_file.content = contents @@ -194,7 +405,9 @@ def get_diff_files(self) -> list[FilePatchInfo]: return self.diff_files # filter files using [ignore] patterns - diffs_original = self.mr.changes()['changes'] + raw_changes = self.mr.changes().get('changes', []) + raw_changes = self._expand_submodule_changes(raw_changes) + diffs_original = raw_changes diffs = filter_ignored(diffs_original, 'gitlab') if diffs != diffs_original: try: @@ -264,7 +477,9 @@ def get_diff_files(self) -> list[FilePatchInfo]: def get_files(self) -> list: if not self.git_files: - self.git_files = [change['new_path'] for change in self.mr.changes()['changes']] + raw_changes = self.mr.changes().get('changes', []) + raw_changes = self._expand_submodule_changes(raw_changes) + self.git_files = [c.get('new_path') for c in raw_changes if c.get('new_path')] return self.git_files def publish_description(self, pr_title: str, pr_body: str): @@ -420,7 +635,9 @@ def send_inline_comment(self, body: str, edit_type: str, found: bool, relevant_f get_logger().exception(f"Failed to create comment in MR {self.id_mr}") def get_relevant_diff(self, relevant_file: str, relevant_line_in_file: str) -> Optional[dict]: - changes = self.mr.changes() # Retrieve the changes for the merge request once + _changes = self.mr.changes() # dict + _changes['changes'] = self._expand_submodule_changes(_changes.get('changes', [])) + changes = _changes if not changes: get_logger().error('No changes found for the merge request.') return None @@ -589,14 +806,14 @@ def add_eyes_reaction(self, issue_comment_id: int, disable_eyes: bool = False) - if not self.id_mr: get_logger().warning("Cannot add eyes reaction: merge request ID is not set.") return None - + mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr) comment = mr.notes.get(issue_comment_id) - + if not comment: get_logger().warning(f"Comment with ID {issue_comment_id} not found in merge request {self.id_mr}.") return None - + award_emoji = comment.awardemojis.create({ 'name': 'eyes' }) @@ -610,20 +827,20 @@ def remove_reaction(self, issue_comment_id: int, reaction_id: str) -> bool: if not self.id_mr: get_logger().warning("Cannot remove reaction: merge request ID is not set.") return False - + mr = self.gl.projects.get(self.id_project).mergerequests.get(self.id_mr) comment = mr.notes.get(issue_comment_id) if not comment: get_logger().warning(f"Comment with ID {issue_comment_id} not found in merge request {self.id_mr}.") return False - + reactions = comment.awardemojis.list() for reaction in reactions: if reaction.name == reaction_id: reaction.delete() return True - + get_logger().warning(f"Reaction '{reaction_id}' not found in comment {issue_comment_id}.") return False except Exception as e: diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml index 1fee37a509..6240217b92 100644 --- a/pr_agent/settings/configuration.toml +++ b/pr_agent/settings/configuration.toml @@ -275,6 +275,7 @@ push_commands = [ [gitlab] url = "https://gitlab.com" +expand_submodule_diffs = false pr_commands = [ "/describe --pr_description.final_update_message=false", "/review", diff --git a/tests/unittest/test_gitlab_provider.py b/tests/unittest/test_gitlab_provider.py index d1889a35d0..dadc0903cb 100644 --- a/tests/unittest/test_gitlab_provider.py +++ b/tests/unittest/test_gitlab_provider.py @@ -1,35 +1,36 @@ -import pytest from unittest.mock import MagicMock, patch -from pr_agent.git_providers.gitlab_provider import GitLabProvider +import pytest from gitlab import Gitlab -from gitlab.v4.objects import Project, ProjectFile from gitlab.exceptions import GitlabGetError +from gitlab.v4.objects import Project, ProjectFile + +from pr_agent.git_providers.gitlab_provider import GitLabProvider class TestGitLabProvider: """Test suite for GitLab provider functionality.""" - + @pytest.fixture def mock_gitlab_client(self): client = MagicMock() return client - + @pytest.fixture def mock_project(self): project = MagicMock() return project - + @pytest.fixture def gitlab_provider(self, mock_gitlab_client, mock_project): with patch('pr_agent.git_providers.gitlab_provider.gitlab.Gitlab', return_value=mock_gitlab_client), \ patch('pr_agent.git_providers.gitlab_provider.get_settings') as mock_settings: - + mock_settings.return_value.get.side_effect = lambda key, default=None: { "GITLAB.URL": "https://gitlab.com", "GITLAB.PERSONAL_ACCESS_TOKEN": "fake_token" }.get(key, default) - + mock_gitlab_client.projects.get.return_value = mock_project provider = GitLabProvider("https://gitlab.com/test/repo/-/merge_requests/1") provider.gl = mock_gitlab_client @@ -40,9 +41,9 @@ def test_get_pr_file_content_success(self, gitlab_provider, mock_project): mock_file = MagicMock(ProjectFile) mock_file.decode.return_value = "# Changelog\n\n## v1.0.0\n- Initial release" mock_project.files.get.return_value = mock_file - + content = gitlab_provider.get_pr_file_content("CHANGELOG.md", "main") - + assert content == "# Changelog\n\n## v1.0.0\n- Initial release" mock_project.files.get.assert_called_once_with("CHANGELOG.md", "main") mock_file.decode.assert_called_once() @@ -51,39 +52,39 @@ def test_get_pr_file_content_with_bytes(self, gitlab_provider, mock_project): mock_file = MagicMock(ProjectFile) mock_file.decode.return_value = b"# Changelog\n\n## v1.0.0\n- Initial release" mock_project.files.get.return_value = mock_file - + content = gitlab_provider.get_pr_file_content("CHANGELOG.md", "main") - + assert content == "# Changelog\n\n## v1.0.0\n- Initial release" mock_project.files.get.assert_called_once_with("CHANGELOG.md", "main") def test_get_pr_file_content_file_not_found(self, gitlab_provider, mock_project): mock_project.files.get.side_effect = GitlabGetError("404 Not Found") - + content = gitlab_provider.get_pr_file_content("CHANGELOG.md", "main") - + assert content == "" mock_project.files.get.assert_called_once_with("CHANGELOG.md", "main") def test_get_pr_file_content_other_exception(self, gitlab_provider, mock_project): mock_project.files.get.side_effect = Exception("Network error") - + content = gitlab_provider.get_pr_file_content("CHANGELOG.md", "main") - + assert content == "" def test_create_or_update_pr_file_create_new(self, gitlab_provider, mock_project): mock_project.files.get.side_effect = GitlabGetError("404 Not Found") mock_file = MagicMock() mock_project.files.create.return_value = mock_file - + new_content = "# Changelog\n\n## v1.1.0\n- New feature" commit_message = "Add CHANGELOG.md" - + gitlab_provider.create_or_update_pr_file( "CHANGELOG.md", "feature-branch", new_content, commit_message ) - + mock_project.files.get.assert_called_once_with("CHANGELOG.md", "feature-branch") mock_project.files.create.assert_called_once_with({ 'file_path': 'CHANGELOG.md', @@ -96,21 +97,21 @@ def test_create_or_update_pr_file_update_existing(self, gitlab_provider, mock_pr mock_file = MagicMock(ProjectFile) mock_file.decode.return_value = "# Old changelog content" mock_project.files.get.return_value = mock_file - + new_content = "# New changelog content" commit_message = "Update CHANGELOG.md" - + gitlab_provider.create_or_update_pr_file( "CHANGELOG.md", "feature-branch", new_content, commit_message ) - + mock_project.files.get.assert_called_once_with("CHANGELOG.md", "feature-branch") mock_file.content = new_content mock_file.save.assert_called_once_with(branch="feature-branch", commit_message=commit_message) def test_create_or_update_pr_file_update_exception(self, gitlab_provider, mock_project): mock_project.files.get.side_effect = Exception("Network error") - + with pytest.raises(Exception): gitlab_provider.create_or_update_pr_file( "CHANGELOG.md", "feature-branch", "content", "message" @@ -122,10 +123,10 @@ def test_has_create_or_update_pr_file_method(self, gitlab_provider): def test_method_signature_compatibility(self, gitlab_provider): import inspect - + sig = inspect.signature(gitlab_provider.create_or_update_pr_file) params = list(sig.parameters.keys()) - + expected_params = ['file_path', 'branch', 'contents', 'message'] assert params == expected_params @@ -141,7 +142,53 @@ def test_content_encoding_handling(self, gitlab_provider, mock_project, content, mock_file = MagicMock(ProjectFile) mock_file.decode.return_value = content mock_project.files.get.return_value = mock_file - + result = gitlab_provider.get_pr_file_content("test.md", "main") - - assert result == expected \ No newline at end of file + + assert result == expected + + def test_get_gitmodules_map_parsing(self, gitlab_provider, mock_project): + gitlab_provider.id_project = "1" + gitlab_provider.mr = MagicMock() + gitlab_provider.mr.target_branch = "main" + + file_obj = MagicMock(ProjectFile) + file_obj.decode.return_value = ( + "[submodule \"libs/a\"]\n" + " path = \"libs/a\"\n" + " url = \"https://gitlab.com/a.git\"\n" + "[submodule \"libs/b\"]\n" + " path = libs/b\n" + " url = git@gitlab.com:b.git\n" + ) + mock_project.files.get.return_value = file_obj + gitlab_provider.gl.projects.get.return_value = mock_project + + result = gitlab_provider._get_gitmodules_map() + assert result == { + "libs/a": "https://gitlab.com/a.git", + "libs/b": "git@gitlab.com:b.git", + } + + def test_project_by_path_requires_exact_match(self, gitlab_provider): + gitlab_provider.gl.projects.get.reset_mock() + gitlab_provider.gl.projects.get.side_effect = Exception("not found") + fake = MagicMock() + fake.path_with_namespace = "other/group/repo" + gitlab_provider.gl.projects.list.return_value = [fake] + + result = gitlab_provider._project_by_path("group/repo") + + assert result is None + assert gitlab_provider.gl.projects.get.call_count == 2 + + def test_compare_submodule_cached(self, gitlab_provider): + proj = MagicMock() + proj.repository_compare.return_value = {"diffs": [{"diff": "d"}]} + with patch.object(gitlab_provider, "_project_by_path", return_value=proj) as m_pbp: + first = gitlab_provider._compare_submodule("grp/repo", "old", "new") + second = gitlab_provider._compare_submodule("grp/repo", "old", "new") + + assert first == second == [{"diff": "d"}] + m_pbp.assert_called_once_with("grp/repo") + proj.repository_compare.assert_called_once_with("old", "new")