From 4db9ecca1c9bfb9b1c74f8f13bbb331c0fdfbbdf Mon Sep 17 00:00:00 2001
From: Alessio <148966056+alessio-locatelli@users.noreply.github.com>
Date: Fri, 8 Aug 2025 11:47:25 +0300
Subject: [PATCH 1/4] feat: add `ContributionTimeCostEstimate`
---
pr_agent/settings/pr_reviewer_prompts.toml | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 16ac68c0ac..3109f9e82a 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -89,6 +89,13 @@ class TicketCompliance(BaseModel):
requires_further_human_verification: str = Field(description="Bullet-point list of items from the 'ticket_requirements' section above that cannot be assessed through code review alone, are unclear, or need further human review (e.g., browser testing, UI checks). Leave empty if all 'ticket_requirements' were marked as fully compliant or not compliant")
{%- endif %}
+{%- if require_estimate_contribution_time_cost %}
+class ContributionTimeCostEstimate(BaseModel):
+ best_case: str = Field(description="Assume that an author is an expert in the involved technology stack and that there were no edge cases or unexpected bugs during development.", examples=["45m", "5h", "10h"])
+ average_case: str = Field(description="A typical required time to create and submit the changes if an author is more or less familiar with the technology and there were no significant unforeseen circumstances.", examples=["45m", "5h", "10h"])
+ worst_case: str = Field(description="Assume that contributing the changes could require a great deal of additional effort, such as investigating the topic, debugging the source code, or resolving unexpected errors.", examples=["45m", "5h", "10h"])
+{%- endif %}
+
class Review(BaseModel):
{%- if related_tickets %}
ticket_compliance_check: List[TicketCompliance] = Field(description="A list of compliance checks for the related tickets")
@@ -115,6 +122,9 @@ class Review(BaseModel):
{%- if require_can_be_split_review %}
can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningful independent task. Output an empty list if the PR code does not need to be split.")
{%- endif %}
+{%- if require_estimate_contribution_time_cost %}
+ contribution_time_cost_estimate: ContributionTimeEstimate = Field(description="Based on the contribution quantity, quality, and complexity, estimate the time that may be required for a senior developer to create and submit such changes.")
+{%- endif %}
class PRReview(BaseModel):
review: Review
From 4ea14d27a23780ea71c4e1486cb5111dad2fdd25 Mon Sep 17 00:00:00 2001
From: Alessio <148966056+alessio-locatelli@users.noreply.github.com>
Date: Fri, 8 Aug 2025 13:50:05 +0300
Subject: [PATCH 2/4] docs: mentiond `require_estimate_contribution_time_cost`
for `reviewer`
---
docs/docs/tools/review.md | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/docs/docs/tools/review.md b/docs/docs/tools/review.md
index 7665202aab..7ef46d83d7 100644
--- a/docs/docs/tools/review.md
+++ b/docs/docs/tools/review.md
@@ -91,6 +91,10 @@ extra_instructions = "..."
require_estimate_effort_to_review |
If set to true, the tool will add a section that estimates the effort needed to review the PR. Default is true. |
+
+ | require_estimate_contribution_time_cost |
+ If set to true, the tool will add a section that estimates the time required for a senior developer to create and submit such changes. Default is false. |
+
| require_can_be_split_review |
If set to true, the tool will add a section that checks if the PR contains several themes, and can be split into smaller PRs. Default is false. |
From 55c4f968763ae25af0ca72152ae675e1902a0634 Mon Sep 17 00:00:00 2001
From: Alessio <148966056+alessio-locatelli@users.noreply.github.com>
Date: Fri, 8 Aug 2025 13:22:08 +0300
Subject: [PATCH 3/4] feat: implement time cost estimate for `reviewer`
---
pr_agent/algo/utils.py | 11 +++++++-
pr_agent/settings/configuration.toml | 1 +
pr_agent/settings/pr_reviewer_prompts.toml | 31 +++++++++++++++++-----
pr_agent/tools/pr_reviewer.py | 1 +
tests/unittest/test_convert_to_markdown.py | 21 +++++++++++++++
5 files changed, 58 insertions(+), 7 deletions(-)
diff --git a/pr_agent/algo/utils.py b/pr_agent/algo/utils.py
index bf1c758871..35e3111bee 100644
--- a/pr_agent/algo/utils.py
+++ b/pr_agent/algo/utils.py
@@ -148,6 +148,7 @@ def convert_to_markdown_v2(output_data: dict,
"Insights from user's answers": "📝",
"Code feedback": "🤖",
"Estimated effort to review [1-5]": "⏱️",
+ "Contribution time cost estimate": "⏳",
"Ticket compliance check": "🎫",
}
markdown_text = ""
@@ -207,6 +208,14 @@ def convert_to_markdown_v2(output_data: dict,
markdown_text += f"### {emoji} PR contains tests\n\n"
elif 'ticket compliance check' in key_nice.lower():
markdown_text = ticket_markdown_logic(emoji, markdown_text, value, gfm_supported)
+ elif 'contribution time cost estimate' in key_nice.lower():
+ if gfm_supported:
+ markdown_text += f"
| {emoji} Contribution time estimate (best, average, worst case): "
+ markdown_text += f"{value['best_case'].replace('m', ' minutes')} | {value['average_case'].replace('m', ' minutes')} | {value['worst_case'].replace('m', ' minutes')}"
+ markdown_text += f" |
\n"
+ else:
+ markdown_text += f"### {emoji} Contribution time estimate (best, average, worst case): "
+ markdown_text += f"{value['best_case'].replace('m', ' minutes')} | {value['average_case'].replace('m', ' minutes')} | {value['worst_case'].replace('m', ' minutes')}\n\n"
elif 'security concerns' in key_nice.lower():
if gfm_supported:
markdown_text += f""
@@ -1465,4 +1474,4 @@ def format_todo_items(value: list[TodoItem] | TodoItem, git_provider, gfm_suppor
markdown_text += f"- {format_todo_item(todo_item, git_provider, gfm_supported)}\n"
else:
markdown_text += f"- {format_todo_item(value, git_provider, gfm_supported)}\n"
- return markdown_text
\ No newline at end of file
+ return markdown_text
diff --git a/pr_agent/settings/configuration.toml b/pr_agent/settings/configuration.toml
index 86ea58dd8c..1fee37a509 100644
--- a/pr_agent/settings/configuration.toml
+++ b/pr_agent/settings/configuration.toml
@@ -79,6 +79,7 @@ require_tests_review=true
require_estimate_effort_to_review=true
require_can_be_split_review=false
require_security_review=true
+require_estimate_contribution_time_cost=false
require_todo_scan=false
require_ticket_analysis_review=true
# general options
diff --git a/pr_agent/settings/pr_reviewer_prompts.toml b/pr_agent/settings/pr_reviewer_prompts.toml
index 3109f9e82a..a2f2d8a8be 100644
--- a/pr_agent/settings/pr_reviewer_prompts.toml
+++ b/pr_agent/settings/pr_reviewer_prompts.toml
@@ -90,10 +90,11 @@ class TicketCompliance(BaseModel):
{%- endif %}
{%- if require_estimate_contribution_time_cost %}
+
class ContributionTimeCostEstimate(BaseModel):
- best_case: str = Field(description="Assume that an author is an expert in the involved technology stack and that there were no edge cases or unexpected bugs during development.", examples=["45m", "5h", "10h"])
- average_case: str = Field(description="A typical required time to create and submit the changes if an author is more or less familiar with the technology and there were no significant unforeseen circumstances.", examples=["45m", "5h", "10h"])
- worst_case: str = Field(description="Assume that contributing the changes could require a great deal of additional effort, such as investigating the topic, debugging the source code, or resolving unexpected errors.", examples=["45m", "5h", "10h"])
+ best_case: str = Field(description="An expert in the relevant technology stack, with no unforeseen issues or bugs during the work.", examples=["45m", "5h", "30h"])
+ average_case: str = Field(description="A senior developer with only brief familiarity with this specific technology stack, and no major unforeseen issues.", examples=["45m", "5h", "30h"])
+ worst_case: str = Field(description="A senior developer with no prior experience in this specific technology stack, requiring significant time for research, debugging, or resolving unexpected errors.", examples=["45m", "5h", "30h"])
{%- endif %}
class Review(BaseModel):
@@ -103,6 +104,9 @@ class Review(BaseModel):
{%- if require_estimate_effort_to_review %}
estimated_effort_to_review_[1-5]: int = Field(description="Estimate, on a scale of 1-5 (inclusive), the time and effort required to review this PR by an experienced and knowledgeable developer. 1 means short and easy review , 5 means long and hard review. Take into account the size, complexity, quality, and the needed changes of the PR code diff.")
{%- endif %}
+{%- if require_estimate_contribution_time_cost %}
+ contribution_time_cost_estimate: ContributionTimeCostEstimate = Field(description="An estimate of the time required to implement the changes, based on the quantity, quality, and complexity of the contribution, as well as the context from the PR description and commit messages.")
+{%- endif %}
{%- if require_score %}
score: str = Field(description="Rate this PR on a scale of 0-100 (inclusive), where 0 means the worst possible PR code, and 100 means PR code of the highest quality, without any bugs or performance issues, that is ready to be merged immediately and run in production at scale.")
{%- endif %}
@@ -122,9 +126,6 @@ class Review(BaseModel):
{%- if require_can_be_split_review %}
can_be_split: List[SubPR] = Field(min_items=0, max_items=3, description="Can this PR, which contains {{ num_pr_files }} changed files in total, be divided into smaller sub-PRs with distinct tasks that can be reviewed and merged independently, regardless of the order ? Make sure that the sub-PRs are indeed independent, with no code dependencies between them, and that each sub-PR represent a meaningful independent task. Output an empty list if the PR code does not need to be split.")
{%- endif %}
-{%- if require_estimate_contribution_time_cost %}
- contribution_time_cost_estimate: ContributionTimeEstimate = Field(description="Based on the contribution quantity, quality, and complexity, estimate the time that may be required for a senior developer to create and submit such changes.")
-{%- endif %}
class PRReview(BaseModel):
review: Review
@@ -180,6 +181,15 @@ review:
title: ...
- ...
{%- endif %}
+{%- if require_estimate_contribution_time_cost %}
+ contribution_time_cost_estimate:
+ best_case: |
+ ...
+ average_case: |
+ ...
+ worst_case: |
+ ...
+{%- endif %}
```
Answer should be a valid YAML, and nothing else. Each YAML output MUST be after a newline, with proper indent, and block scalar indicator ('|')
@@ -309,6 +319,15 @@ review:
title: ...
- ...
{%- endif %}
+{%- if require_estimate_contribution_time_cost %}
+ contribution_time_cost_estimate:
+ best_case: |
+ ...
+ average_case: |
+ ...
+ worst_case: |
+ ...
+{%- endif %}
```
(replace '...' with the actual values)
{%- endif %}
diff --git a/pr_agent/tools/pr_reviewer.py b/pr_agent/tools/pr_reviewer.py
index d3004bf48b..c4917f3597 100644
--- a/pr_agent/tools/pr_reviewer.py
+++ b/pr_agent/tools/pr_reviewer.py
@@ -85,6 +85,7 @@ def __init__(self, pr_url: str, is_answer: bool = False, is_auto: bool = False,
"require_score": get_settings().pr_reviewer.require_score_review,
"require_tests": get_settings().pr_reviewer.require_tests_review,
"require_estimate_effort_to_review": get_settings().pr_reviewer.require_estimate_effort_to_review,
+ "require_estimate_contribution_time_cost": get_settings().pr_reviewer.require_estimate_contribution_time_cost,
'require_can_be_split_review': get_settings().pr_reviewer.require_can_be_split_review,
'require_security_review': get_settings().pr_reviewer.require_security_review,
'require_todo_scan': get_settings().pr_reviewer.get("require_todo_scan", False),
diff --git a/tests/unittest/test_convert_to_markdown.py b/tests/unittest/test_convert_to_markdown.py
index 187ea4a812..ec3a0298bd 100644
--- a/tests/unittest/test_convert_to_markdown.py
+++ b/tests/unittest/test_convert_to_markdown.py
@@ -222,6 +222,27 @@ def test_can_be_split(self):
assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()
+ def test_contribution_time_cost_estimate(self):
+ input_data = {
+ 'review': {
+ 'contribution_time_cost_estimate': {
+ 'best_case': '1h',
+ 'average_case': '2h',
+ 'worst_case': '30m',
+ }
+ }
+ }
+
+ expected_output = textwrap.dedent(f"""
+ {PRReviewHeader.REGULAR.value} 🔍
+
+
+ | ⏳ Contribution time estimate (best, average, worst case): 1h | 2h | 30 minutes |
+
+ """)
+
+ assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()
+
# Tests that the function works correctly with an empty dictionary input
def test_empty_dictionary_input(self):
input_data = {}
From dc6f3af302089ce3c96ea2a5fc3f4708fcee6716 Mon Sep 17 00:00:00 2001
From: Alessio <148966056+alessio-locatelli@users.noreply.github.com>
Date: Thu, 21 Aug 2025 13:04:19 +0300
Subject: [PATCH 4/4] test: non-GFM output
To ensure parity and prevent regressions in plain Markdown rendering.
---
tests/unittest/test_convert_to_markdown.py | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/tests/unittest/test_convert_to_markdown.py b/tests/unittest/test_convert_to_markdown.py
index ec3a0298bd..0d18e03cec 100644
--- a/tests/unittest/test_convert_to_markdown.py
+++ b/tests/unittest/test_convert_to_markdown.py
@@ -236,13 +236,26 @@ def test_contribution_time_cost_estimate(self):
expected_output = textwrap.dedent(f"""
{PRReviewHeader.REGULAR.value} 🔍
+ Here are some key observations to aid the review process:
+
| ⏳ Contribution time estimate (best, average, worst case): 1h | 2h | 30 minutes |
""")
-
assert convert_to_markdown_v2(input_data).strip() == expected_output.strip()
+ # Non-GFM branch
+ expected_output_no_gfm = textwrap.dedent(f"""
+ {PRReviewHeader.REGULAR.value} 🔍
+
+ Here are some key observations to aid the review process:
+
+ ### ⏳ Contribution time estimate (best, average, worst case): 1h | 2h | 30 minutes
+
+ """)
+ assert convert_to_markdown_v2(input_data, gfm_supported=False).strip() == expected_output_no_gfm.strip()
+
+
# Tests that the function works correctly with an empty dictionary input
def test_empty_dictionary_input(self):
input_data = {}
|