In [1]:
from llm.factory import LLMInterface

llm_client = LLMInterface("bedrock", "arn:aws:bedrock:us-east-1:841162690310:inference-profile/us.anthropic.claude-3-7-sonnet-20250219-v1:0")

In [2]:
import os

from knowledgebase.best_practices import BestPracticesKnowledgeBase
from llm.embedding import get_text_embedding
import numpy as np
from knowledgebase.pr_reader import GitHubPRReader

github_token = os.getenv('GITHUB_TOKEN')
if not github_token:
    raise ValueError("Please set GITHUB_TOKEN environment variable")

def embedding_func(text: str) -> np.ndarray:
    return get_text_embedding(text, "text-embedding-3-small")

bp = BestPracticesKnowledgeBase(llm_client, embedding_func)
reader = GitHubPRReader(github_token)

In [None]:
github_pr_url = "https://github.com/pingcap/tidb/pull/57307"

pr_details = reader.get_pr_details(github_pr_url)
print(pr_details.format())

In [None]:
bp.add_pr_review_best_practices(github_pr_url, pr_details.format("markdown"), commit=True)

In [3]:
github_file_url = "https://github.com/protocolbuffers/protocolbuffers.github.io/blob/main/content/programming-guides/style.md"
content = reader.read_github_file(github_file_url)

In [4]:
bp.add_external_best_practices(github_file_url, content, commit=True)

insert best practice: {'tag': 'protobuf/style_guide', 'summary': 'Part 1: A comprehensive style guide for .proto files that establishes conventions for file structure, naming patterns, and best practices to ensure consistency and readability across protocol buffer definitions.\n\nPart 2: Key recommendations include: using lower_snake_case for file names and fields; TitleCase for messages, services and enums; UPPER_SNAKE_CASE for enum values; avoiding underscores at start/end of identifiers; structuring files in a specific order (license, overview, syntax, package, imports, options); prefixing enum values with enum name; avoiding required fields and groups; and ensuring the first enum value is zero with _UNSPECIFIED suffix.'}


2025-04-04 18:19:29,695 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'tag': 'protobuf/style_guide',
 'summary': 'Part 1: A comprehensive style guide for .proto files that establishes conventions for file structure, naming patterns, and best practices to ensure consistency and readability across protocol buffer definitions.\n\nPart 2: Key recommendations include: using lower_snake_case for file names and fields; TitleCase for messages, services and enums; UPPER_SNAKE_CASE for enum values; avoiding underscores at start/end of identifiers; structuring files in a specific order (license, overview, syntax, package, imports, options); prefixing enum values with enum name; avoiding required fields and groups; and ensuring the first enum value is zero with _UNSPECIFIED suffix.'}

In [5]:
bp.find_best_practices("How to update protobuf")

2025-04-04 18:19:35,121 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


{'https://github.com/pingcap/tipb/pull/352': [{'tag': 'proto/field/deprecation',
   'guideline': {'confidence': 'high',
    'evidence': "breezewish: 'As a deprecated column, please remove `[(gogoproto.nullable) = false]`.' and 'Please review protobuf documentation about how to make changes: https://protobuf.dev/programming-guides/proto3/#updating'",
    'guidelines': "When deprecating fields in protocol buffers, follow proper deprecation practices: 1) Mark the field as deprecated with the reserved keyword, 2) Keep the field number reserved, 3) Remove any non-nullable constraints from deprecated fields, 4) Add clear documentation about why it's deprecated and what replaces it.",
    'tag': 'proto/field/deprecation'},
   'distance': 0.5409894087289281},
  {'tag': 'proto/documentation',
   'guideline': {'confidence': 'high',
    'evidence': "breezewish: 'Please update the comment, because `column` will be always set, with carried type information, no matter or not enable_distance_proj is 