In [21]:
ai_model = 'gemini-2.0-flash-001'
git_repo_name = "piotrhyzy/cloudfest2025"
git_pr_number = 7


In [22]:
import configuration as config

In [23]:
import vertexai
vertexai.init(project=config.VERTEX_PROJECT_ID, location=config.VERTEX_LOCATION)

In [24]:
from shared.gcp import Secret
secret = Secret.access_secret_version(
        config.GITHUB_SECRET_PROJECT_ID,
        config.GITHUB_SECRET_NAME,
    )
app_id = secret["app_id"]
installation_id = secret["installation_id"]
private_key = secret["private_key"]

In [25]:
from shared.git import get_client
git_client = get_client(app_id, installation_id, private_key, config.GITHUB_BASE_URL)

## Fetch PR Title and Description

In [None]:
from shared.git import get_pr_details
pr_details = get_pr_details(git_client, git_repo_name, git_pr_number)

print(pr_details.model_dump_json(indent=2))

## Fetch PR Diff

In [None]:
from shared.git import get_pr_diff
pr_diff = get_pr_diff(git_client, git_repo_name, git_pr_number)

print(pr_diff)


## Chunk GitHub Diff

In [None]:
from shared.diff import process_diff
unified_diff = process_diff(pr_diff)
print(unified_diff.model_dump_json(indent=4))

## Convert the strctured diff to a review File object

In [29]:
from shared.review import convert_diff_to_review_files
review_files = convert_diff_to_review_files(unified_diff, pr_details)

In [None]:
for _review_file in review_files:
    print(_review_file.model_dump_json(indent=2))

## Fetch File Snapschot

In [None]:
from shared.git import download_file_snapschot_from_pr

for _review_file in review_files:
    _review_file.snapshot = download_file_snapschot_from_pr(git_client, git_repo_name, git_pr_number, _review_file.path)
    print(_review_file.model_dump_json(indent=2))

## Request LLM For File Review

### Prompt 

In [32]:
from vertexai.generative_models import GenerativeModel

import json

def generate_pr_file_review_prompt(file_path: str, file_diff: str, original_file_content: str) -> str:
    """
    Generates a prompt for a generative AI model to review a GitHub pull request file change,
    providing feedback in a structured JSON format, including code suggestions when applicable.
    """
    json_output_schema = {
        '$defs': {
            'ReviewComment': {
                'properties': {
                    'path': {'title': 'Path', 'type': 'string'},
                    'start_line': {'title': 'Start Line', 'type': 'integer'},
                    'line': {'title': 'Line', 'type': 'integer'},
                    'body': {'title': 'Body', 'type': 'string'},
                    'start_side': {
                        'enum': ['LEFT', 'RIGHT'],
                        'title': 'Start Side',
                        'type': 'string'
                    },
                    'side': {
                        'enum': ['LEFT', 'RIGHT'],
                        'title': 'Side',
                        'type': 'string'
                    }
                },
                'required': ['path', 'start_line', 'line', 'body', 'start_side', 'side'],
                'title': 'ReviewComment',
                'type': 'object'
            }
        },
        'properties': {
            'reviews': {
                'items': {
                    '$ref': '#/$defs/ReviewComment'
                },
                'title': 'Reviews',
                'type': 'array'
            }
        },
        'required': ['reviews'],
        'title': 'Review',
        'type': 'object'
    }

    system_prompt = f"""
        You are a senior software engineer performing a thorough code review.
        Your task is to analyze the changes in a pull request and provide feedback in a specific JSON format.
        Focus exclusively on the code added or removed as indicated in the diff; do not comment on unchanged code.
        Be concise, precise, and only comment when a change can be meaningfully improved.
        Adhere to PEP-8 guidelines for style and Python 3.10+ semantics for typing.
        If a change poses no issues, do not generate a comment for it.

        **Instructions:**
        - **Scope:** Only review the `ADDED` and `REMOVED` lines in the diff. Ignore unchanged lines.
        - **Conciseness:** Be brief and focus on the most important issues.
        - **Style:** Apply PEP-8 style checks.
        - **Typing:** Prefer `type | None` over `Optional[type]` for type hinting in Python 3.10+.
        - **Consistency:** Keep the review consistent throughout the file.
        - **Do Not:**
            - Praise or summarize the changes.
            - Comment on unchanged code.
            - Suggest renames or changes outside the provided code.
            - Modify or suggest changes to existing names of classes, functions, messages, or objects.
            - Confirm correctness without offering improvements.
        - **Docstrings:** Do not comment on the absence of docstrings if similar existing code lacks them.
        - **Comments in Code:** Require inline comments for non-obvious logic, conditions, loops and algorithms.
        - **No Improvement:** If no improvements are needed, return an empty `reviews` array.
        - **Output format:** You MUST provide a JSON formatted output using the `ReviewComment` schema with all required fields.
        - **Reviews:** scope a review comment with all affected lines (start_line, line) to make sure the feedback is clear.
        - **Line Number and Side:**
            - Use the `line` and `side` as provided in the diff.
            - For multi-line comments, specify `start_line`/`start_side` and `line`/`side` accordingly.
            - For single-line comments, `start_line = line` and `start_side = side`.
        - **path:** Provide the file path exactly as given.
        - **Code Suggestions:** When applicable, include code suggestions using GitHub's code suggestion format:
            ```suggestion
            # Your improved code here
            ```

        **JSON Output Schema:**
        ```json
            {json.dumps(json_output_schema)}
        ```
        """

    prompt = f"""{system_prompt}
        Here is the path to the file: {file_path}
        Here is the diff from the pull request: {file_diff}
        For reference only: here's a snapshot of the file with original content: {original_file_content}

        Now provide a comprehensive review of the code changes, only if there are meaningful improvements to apply.
        Remember to use proper JSON format in the specified ReviewComment schema.
        Include code suggestions in the review body when it can be helpful for the developer,
        using the GitHub suggestion format: ```suggestion ...```.
        """

    return prompt


def review_single_file_changes_gemini(
    file_path: str,
    original_file_content: str,
    diff: str,
    language: str = 'python'
) -> str:
    """Reviews changes in a single file using the Gemini Pro model via Vertex AI SDK.

    Args:
        file_path: The path to the file.
        original_file_content: The original content of the file.
        diff: The diff of the changes.
        language: The programming language of the code.

    Returns:
        The review of the changes as a string.

    Raises:
        ValueError: If there are issues with the input parameters or the API response.
    """

    if not file_path:
        raise ValueError("file_path is required")
    if not original_file_content:
        raise ValueError("original_file_content is required")
    if not diff:
        raise ValueError("diff is required")
    if not language:
        raise ValueError("language is required")


    prompt = generate_pr_file_review_prompt(
        file_path=file_path,
        original_file_content=original_file_content,
        file_diff=diff,
        # language=language,
    )

    # Configure the Gemini Pro model
    generation_config = {
        "candidate_count": 1,
        "max_output_tokens": 2048,
        "temperature": 0.2,
        "top_p": 1,
        "top_k": 40,
    }
    # Load the Gemini model
    model = GenerativeModel(ai_model)

    # Call the model with the prompt
    response = model.generate_content(
        prompt,
        generation_config=generation_config,
    )
    response_text = response.text.strip()

    # Return the model's output as a string
    return response_text



### Loop over files nad get review

In [33]:
from shared.review import parse_review_comments
pr_review_comments = []
for _review_file in review_files:
    pr_review_comments += parse_review_comments(
        review_single_file_changes_gemini(
            original_file_content=_review_file.snapshot,
            diff=_review_file.diff,
            file_path=_review_file.path
            )
        )

In [None]:
for _comment in pr_review_comments:
    print(_comment.model_dump_json(indent=2))

## Request LLM For Review Summary



In [35]:
from vertexai.generative_models import GenerativeModel, GenerationConfig

from shared.review import ReviewComment


def summarize_pull_request(pr_title: str, pr_description: str, pr_diff: str, review_comments: list[ReviewComment] = None) -> str:
    """
    Calls the Gemini model on Vertex AI to generate a concise summary of a pull request
    based on its title, description, code diff, and review comments.
    Returns the generated summary as a string, including PR summary and review summary.
    """

    # prepare configuration for the model
    generation_config = {
        "max_output_tokens": 8192,  # limit the length of the response
        "temperature": 0.5,  # lower temperature for more deterministic output
        "top_p": 1,  # more deterministic output
        # "top_k": 30, # more deterministic output
    }
    # Load the Gemini model
    model = GenerativeModel(ai_model)

    # Build the prompt in English (basic instructions, no explicit "role")
    prompt_text = f"""
        Below is a pull request:

        Title:
        {pr_title}

        Description:
        {pr_description}

        Code Diff:
        {pr_diff}
        """

    if review_comments:
        prompt_text += """

        Review Comments:
        Here are some review comments and the proposed changes to the PR:
        """
        for i, comment in enumerate(review_comments):
            prompt_text += f"""
            Comment {i+1}:
            File: {comment.path}
            Lines: {comment.start_line}-{comment.line}
            Comment: {comment.body}
            """
        prompt_text += """

        """

    prompt_text += """

        Task:
        1. Read the pull request details, code diff, and review comments.
        2. Provide a concise summary of what this PR changes and why it matters with Highlights.
        3. Provide Changeog using expandalbe "Changelog" section for more details.
        3. Summarize the main points from the review comments,
        4. Provide Merge Assesment and Overall Quality Assessment.
        5. Keep it short, focusing on the main modifications, purpose and review comments.
        6. Use GitHub emoji to make it more appealing.

        Provide only the output no other comments or intorductions

        # Pull Request Summary:

        ...

        # Review Summary:

        ...

        # Overall Quality Assessment

        ...

        # Merge Assessment

        ...
        """

    # Call the model with the prompt
    response = model.generate_content(
        prompt_text,
        generation_config=generation_config,
    )
    response_text = response.text.strip()

    return response_text

In [36]:
pr_summary = summarize_pull_request(pr_details.title, pr_details.description, pr_diff, pr_review_comments)

In [None]:
print(pr_summary)

## Publish Review to GitHub

In [None]:
from shared.git import post_pr_review
post_pr_review(git_client, git_repo_name, git_pr_number, pr_summary, pr_review_comments)