In [21]:
import requests
from urllib.parse import urlparse
from pydantic import BaseModel
from typing import Optional

## Data Fetch

Code to fetch github prs and create pydantic models to access required features

In [30]:

class GitUser(BaseModel):
    login: str
    id: int
    node_id: str
    avatar_url: str
    gravatar_id: str
    url: str
    html_url: str
    followers_url: str
    following_url: str
    gists_url: str
    starred_url: str
    subscriptions_url: str
    organizations_url: str
    repos_url: str
    events_url: str
    received_events_url: str
    type: str

class Repository(BaseModel):
    url: str
    svn_url: str

class GitCommits(BaseModel):
    sha: str
    label: str
    ref: str
    repo: Repository

class PRDetails(BaseModel):
    title: str
    body: str
    user: GitUser
    created_at: str
    updated_at: str
    merged: bool
    mergeable: Optional[bool] = None
    commits: int
    additions: int
    deletions: int
    changed_files: int
    head: GitCommits
    base: GitCommits

class PRFile(BaseModel):
    sha: str
    filename: str
    status: str
    additions: int
    deletions: int
    changes: int
    blob_url: str
    raw_url: str
    contents_url: str
    patch: str


class GithubPRFetcher:
    def __init__(self, pr_url):
        self.pr_url = pr_url
        self.parts = [part for part in urlparse(pr_url).path.split('/') if part]
        self.api_pr_url = f'https://api.github.com/repos/{self.parts[0]}/{self.parts[1]}/pulls/{self.parts[-1]}'
        self.file_url = f'https://api.github.com/repos/{self.parts[0]}/{self.parts[1]}/pulls/{self.parts[-1]}/files'

    def fetch_pr_details(self) -> PRDetails:
        response = requests.get(self.api_pr_url)
        response.raise_for_status()
        return PRDetails.model_validate(response.json())

    def fetch_pr_files(self) -> list[PRFile]:
        response = requests.get(self.file_url)
        response.raise_for_status()
        return [PRFile.model_validate(file) for file in response.json()]

In [31]:
pull_request_url = 'https://github.com/topoteretes/cognee/pull/1851'
pr_fetcher = GithubPRFetcher(pr_url=pull_request_url)

In [32]:
pr_details = pr_fetcher.fetch_pr_details()
pr_files = pr_fetcher.fetch_pr_files()

In [35]:
base_repo_url = pr_details.base.repo.svn_url
base_sha = pr_details.base.sha
for file in pr_files:
    base_file = f'{base_repo_url}/raw/{base_sha}/{file.filename}'
    updated_file = file.raw_url
    if file.status == 'modified':
        print(f'base file url: {base_file}')
        print(f'updated file url: {updated_file}')
        print('---')

base file url: https://github.com/topoteretes/cognee/raw/aeda1d8eba5ee982d9f5b62c3ab8686cf2280134/cognee/cli/_cognee.py
updated file url: https://github.com/topoteretes/cognee/raw/bec5a99a562daa07a1618b32ab0366a2b629d6ab/cognee%2Fcli%2F_cognee.py
---
base file url: https://github.com/topoteretes/cognee/raw/aeda1d8eba5ee982d9f5b62c3ab8686cf2280134/cognee/infrastructure/databases/relational/sqlalchemy/SqlAlchemyAdapter.py
updated file url: https://github.com/topoteretes/cognee/raw/bec5a99a562daa07a1618b32ab0366a2b629d6ab/cognee%2Finfrastructure%2Fdatabases%2Frelational%2Fsqlalchemy%2FSqlAlchemyAdapter.py
---
base file url: https://github.com/topoteretes/cognee/raw/aeda1d8eba5ee982d9f5b62c3ab8686cf2280134/cognee/modules/data/methods/__init__.py
updated file url: https://github.com/topoteretes/cognee/raw/bec5a99a562daa07a1618b32ab0366a2b629d6ab/cognee%2Fmodules%2Fdata%2Fmethods%2F__init__.py
---
base file url: https://github.com/topoteretes/cognee/raw/aeda1d8eba5ee982d9f5b62c3ab8686cf22801

## PR Review Workflow with Multi-Agentic LLM

This notebook aims to develop an LLM-based system that orchestrates a multi-agentic workflow for analyzing pull request (PR) differences and providing code reviews aligned with best practices.

### Key Components:
- **Data Fetching**: Utilize existing PR details and file changes (e.g., `pr_details`, `pr_files`) to extract base and updated file URLs.
- **Difference Analysis**: Agents will compare file versions, focusing on additions, deletions, and modifications.
- **Review Agents**:
    - **Code Quality Agent**: Evaluates adherence to coding standards, readability, and efficiency.
    - **Security Agent**: Scans for potential vulnerabilities or insecure patterns.
    - **Performance Agent**: Assesses impact on performance and resource usage.
    - **Documentation Agent**: Checks for adequate comments, docstrings, and changelog updates.
- **Orchestrator LLM**: Coordinates agents, synthesizes feedback, and generates a comprehensive review summary.

### Next Steps:
1. Implement file diff parsing using libraries like `difflib` or external APIs.
2. Define agent prompts and integrate with an LLM (e.g., via OpenAI or local models).
3. Test on sample PR files to refine the workflow.
4. Ensure outputs include actionable suggestions and severity ratings.