Skip to content
This repository has been archived by the owner on Jul 6, 2024. It is now read-only.

🛠️ GitHub Issue 自動レビュー機能とコードの分離 #113

Merged
merged 33 commits into from
Jun 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
64ae48e
🚀 [feat] 新しい `.SourceSageignore` ファイルの追加
Sunwood-ai-labs Jun 23, 2024
a3d48c0
🛠️ [feat] GitHub Issue 自動レビュー スクリプトの追加
Sunwood-ai-labs Jun 23, 2024
eca5d3a
Revert "🛠️ [feat] GitHub Issue 自動レビュー スクリプトの追加"
Sunwood-ai-labs Jun 23, 2024
bdb11af
🔧 [chore] Issue Review ワークフローのリファクタリング
Sunwood-ai-labs Jun 23, 2024
b2dc00a
🛠️ [feat] GitHub Issue 自動レビュー スクリプトの追加
Sunwood-ai-labs Jun 23, 2024
ea92569
🧹 [chore] 不要なファイルとディレクトリを `.gitignore` に追加
Sunwood-ai-labs Jun 23, 2024
7a10718
Merge branch 'main' into develop
Sunwood-ai-labs Jun 23, 2024
413e65f
📝 [style] .SourceSageignoreの末尾に改行を追加
Sunwood-ai-labs Jun 23, 2024
87bec29
🐛 [fix] review_issue.pyのファイル末尾に改行を追加
Sunwood-ai-labs Jun 23, 2024
86c5477
🔧 [chore] issue-review.ymlのファイル末尾に改行を追加
Sunwood-ai-labs Jun 23, 2024
96fc5da
Merge branch 'develop'
Sunwood-ai-labs Jun 23, 2024
8bb2be5
🌍 [chore] グローバルな.ignore設定を反映してローカル設定を整理
Sunwood-ai-labs Jun 23, 2024
f0407af
🌐 [chore] .gitignoreの内容をグローバル設定に合わせて最適化
Sunwood-ai-labs Jun 23, 2024
862acec
Merge branch 'develop'
Sunwood-ai-labs Jun 23, 2024
ba3c3fd
📦 [chore] 新規 requirements.txt ファイルの作成
Sunwood-ai-labs Jun 23, 2024
7ae54e4
🛠️ [chore] 依存関係のインストール手順を単一ファイル参照に変更
Sunwood-ai-labs Jun 23, 2024
106c6f2
Merge branch 'develop'
Sunwood-ai-labs Jun 23, 2024
ed2baf3
📌 (#113)[chore] ライブラリの依存関係を固定バージョンに更新
Sunwood-ai-labs Jun 24, 2024
8b5ead5
🛠️ (#113)[refactor] issue処理スクリプトのリファクタリングと機能強化
Sunwood-ai-labs Jun 24, 2024
e9bed7f
Merge branch 'develop'
Sunwood-ai-labs Jun 24, 2024
a62640a
🛠️ [refactor] スクリプトのパスを更新
Sunwood-ai-labs Jun 24, 2024
f594de2
🔧 [chore] GitHub Issue Reviewワークフローの環境変数追加
Sunwood-ai-labs Jun 24, 2024
13c8149
🚀 [feat] 開発者用Qdrant環境初期化スクリプトの追加
Sunwood-ai-labs Jun 24, 2024
511c8ca
🛠️ [refactor] .env ファイル読み込み条件の改善
Sunwood-ai-labs Jun 24, 2024
31f4f94
🔧 [chore] .gitignore に .env ファイルを追加
Sunwood-ai-labs Jun 24, 2024
28075d6
Merge branch 'main' into develop
Sunwood-ai-labs Jun 24, 2024
54f5575
🔧 [fix] エラーメッセージ出力の改善
Sunwood-ai-labs Jun 24, 2024
512fdfd
🛠 [feat] GitHub設定スクリプトのロギング機能を強化
Sunwood-ai-labs Jun 24, 2024
75dbe21
📝 [chore] issue-reviewワークフローのPythonスクリプト実行コマンドに改行を追加
Sunwood-ai-labs Jun 24, 2024
4b98678
Merge branch 'develop'
Sunwood-ai-labs Jun 24, 2024
2ea32ce
🔧 [refactor] GitHubトークン取得ロジックの簡素化
Sunwood-ai-labs Jun 24, 2024
71de80b
🔧 [chore] .env.example ファイルの更新
Sunwood-ai-labs Jun 24, 2024
3371c69
🔧 [chore] .env.example の GITHUB_TOKENの行末を修正
Sunwood-ai-labs Jun 24, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
QD_API_KEY=XXX
OPENAI_API_KEY=sk-XXX
QD_URL=https://XXX.gcp.cloud.qdrant.io

GITHUB_EVENT_ISSUE_NUMBER=5
GITHUB_REPOSITORY=XXXX/election2024
GITHUB_TOKEN=ghp_XXX
40 changes: 40 additions & 0 deletions .github/developer/initialize_qdrant_collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from qdrant_client import QdrantClient
from qdrant_client.http import models
from dotenv import load_dotenv
import os

load_dotenv()

QDRANT_URL = os.getenv('QD_URL')
QDRANT_API_KEY = os.getenv('QD_API_KEY')

print(f"API Key: {QDRANT_API_KEY[:5]}...{QDRANT_API_KEY[-5:]}") # API keyの最初と最後の5文字のみを表示

client = QdrantClient(url=QDRANT_URL, api_key=QDRANT_API_KEY)
print(f"Attempting to connect to Qdrant at: {QDRANT_URL}")

try:
# コレクションの一覧を取得(これは通常、より低い権限で可能)
collections = client.get_collections()
print("Existing collections:")
print(collections)
except Exception as e:
print(f"Error getting collections: {str(e)}")

try:
# コレクションの作成を試みる
client.create_collection(
collection_name="issue_collection",
vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE),
)
print("Collection 'issue_collection' created successfully")
except Exception as e:
print(f"Error creating collection: {str(e)}")

# 既存のコレクションの詳細情報取得を試みる
try:
collection_info = client.get_collection("issue_collection")
print("Collection info:")
print(collection_info)
except Exception as e:
print(f"Error getting collection info: {str(e)}")
4 changes: 4 additions & 0 deletions .github/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
openai==1.35.3
PyGithub==2.3.0
qdrant-client==1.9.2
regex==2024.5.15
221 changes: 221 additions & 0 deletions .github/scripts/review_issue.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
import os
from typing import List, Dict, Any
import regex as re
from github import Github
from github.Issue import Issue
from github.Repository import Repository
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct
import openai

# GitHub Actions環境で実行されていない場合のみ.envファイルを読み込む
if not os.getenv('GITHUB_ACTIONS'):
from dotenv import load_dotenv
load_dotenv()

# 定数
EMBEDDING_MODEL = "text-embedding-3-small"
COLLECTION_NAME = "issue_collection"
GPT_MODEL = "gpt-4o"
MAX_RESULTS = 3

class Config:
def __init__(self):
print("設定の初期化を開始します...")
self.github_token = os.getenv("GITHUB_TOKEN")
if self.github_token is None:
print("GITHUB_TOKENが見つかりません ...")
else:
print("GITHUB_TOKENからトークンを正常に取得しました。")

self.qd_api_key = os.getenv("QD_API_KEY")
print("QD_API_KEYの状態:", "取得済み" if self.qd_api_key else "見つかりません")

self.qd_url = os.getenv("QD_URL")
print("QD_URLの状態:", "取得済み" if self.qd_url else "見つかりません")

self.github_repo = os.getenv("GITHUB_REPOSITORY")
print("GITHUB_REPOSITORYの状態:", "取得済み" if self.github_repo else "見つかりません")

self.issue_number = os.getenv("GITHUB_EVENT_ISSUE_NUMBER")
if self.issue_number:
self.issue_number = int(self.issue_number)
print(f"GITHUB_EVENT_ISSUE_NUMBER: {self.issue_number}")
else:
print("GITHUB_EVENT_ISSUE_NUMBERが見つかりません")
print("設定の初期化が完了しました。")

class GithubHandler:
def __init__(self, config: Config):
self.github = Github(config.github_token)
self.repo = self.github.get_repo(config.github_repo)
self.issue = self.repo.get_issue(config.issue_number)

def create_labels(self):
"""ラベルを作成する(既に存在する場合は無視)"""
try:
self.repo.create_label(name="toxic", color="ff0000")
self.repo.create_label(name="duplicated", color="708090")
except:
pass

def add_label(self, label: str):
"""Issueにラベルを追加する"""
self.issue.add_to_labels(label)

def close_issue(self):
"""Issueをクローズする"""
self.issue.edit(state="closed")

def add_comment(self, comment: str):
"""Issueにコメントを追加する"""
self.issue.create_comment(comment)

class ContentModerator:
def __init__(self, openai_client: openai.Client):
self.openai_client = openai_client

def validate_image(self, text: str) -> bool:
"""画像の内容が不適切かどうかを判断する"""
image_url = self._extract_image_url(text)
if not image_url:
return False

prompt = "この画像が暴力的、もしくは性的な画像の場合trueと返してください。"
try:
response = self.openai_client.chat.completions.create(
model=GPT_MODEL,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{"type": "image_url", "image_url": {"url": image_url}},
],
}
],
max_tokens=1200,
)
return "true" in response.choices[0].message.content.lower()
except:
return True

def judge_violation(self, text: str) -> bool:
"""テキストと画像の内容が不適切かどうかを判断する"""
response = self.openai_client.moderations.create(input=text)
return response.results[0].flagged or self.validate_image(text)

@staticmethod
def _extract_image_url(text: str) -> str:
"""テキストから画像URLを抽出する"""
match = re.search(r"!\[[^\s]+\]\((https://[^\s]+)\)", text)
return match[1] if match and len(match) > 1 else ""

class QdrantHandler:
def __init__(self, client: QdrantClient, openai_client: openai.Client):
self.client = client
self.openai_client = openai_client

def add_issue(self, text: str, issue_number: int):
"""新しい問題をQdrantに追加する"""
embedding = self._create_embedding(text)
point = PointStruct(id=issue_number, vector=embedding, payload={"text": text})
self.client.upsert(COLLECTION_NAME, [point])

def search_similar_issues(self, text: str) -> List[Dict[str, Any]]:
"""類似の問題を検索する"""
embedding = self._create_embedding(text)
results = self.client.search(collection_name=COLLECTION_NAME, query_vector=embedding)
return results[:MAX_RESULTS]

def _create_embedding(self, text: str) -> List[float]:
"""テキストのembeddingを作成する"""
result = self.openai_client.embeddings.create(input=[text], model=EMBEDDING_MODEL)
return result.data[0].embedding

class IssueProcessor:
def __init__(self, github_handler: GithubHandler, content_moderator: ContentModerator, qdrant_handler: QdrantHandler, openai_client: openai.Client):
self.github_handler = github_handler
self.content_moderator = content_moderator
self.qdrant_handler = qdrant_handler
self.openai_client = openai_client

def process_issue(self, issue_content: str):
"""Issueを処理する"""
if self.content_moderator.judge_violation(issue_content):
self._handle_violation()
return

similar_issues = self.qdrant_handler.search_similar_issues(issue_content)
if not similar_issues:
self.qdrant_handler.add_issue(issue_content, self.github_handler.issue.number)
return

duplicate_id = self._check_duplication(issue_content, similar_issues)
if duplicate_id:
self._handle_duplication(duplicate_id)
else:
self.qdrant_handler.add_issue(issue_content, self.github_handler.issue.number)

def _handle_violation(self):
"""違反を処理する"""
self.github_handler.add_label("toxic")
self.github_handler.add_comment("不適切な投稿です。アカウントBANの危険性があります。")
self.github_handler.close_issue()

def _check_duplication(self, issue_content: str, similar_issues: List[Dict[str, Any]]) -> int:
"""重複をチェックする"""
prompt = self._create_duplication_check_prompt(issue_content, similar_issues)
completion = self.openai_client.chat.completions.create(
model=GPT_MODEL,
max_tokens=1024,
messages=[{"role": "system", "content": prompt}]
)
review = completion.choices[0].message.content
if ":" in review:
review = review.split(":")[-1]
return int(review) if review.isdecimal() and review != "0" else 0

def _handle_duplication(self, duplicate_id: int):
"""重複を処理する"""
self.github_handler.add_label("duplicated")
self.github_handler.add_comment(f"#{duplicate_id} と重複しているかもしれません")

@staticmethod
def _create_duplication_check_prompt(issue_content: str, similar_issues: List[Dict[str, Any]]) -> str:
"""重複チェック用のプロンプトを作成する"""
similar_issues_text = "\n".join([f'id:{issue.id}\n内容:{issue.payload["text"]}' for issue in similar_issues])
return f"""
以下は市民から寄せられた政策提案です。
{issue_content}
この投稿を読み、以下の過去提案の中に重複する提案があるかを判断してください。
{similar_issues_text}
重複する提案があればそのidを出力してください。
もし存在しない場合は0と出力してください。

[出力形式]
id:0
"""

def setup():
"""セットアップを行い、必要なオブジェクトを返す"""
config = Config()
github_handler = GithubHandler(config)
github_handler.create_labels()

openai_client = openai.Client()
content_moderator = ContentModerator(openai_client)

qdrant_client = QdrantClient(url=config.qd_url, api_key=config.qd_api_key)
qdrant_handler = QdrantHandler(qdrant_client, openai_client)

return github_handler, content_moderator, qdrant_handler, openai_client

def main():
github_handler, content_moderator, qdrant_handler, openai_client = setup()
issue_processor = IssueProcessor(github_handler, content_moderator, qdrant_handler, openai_client)
issue_content = f"{github_handler.issue.title}\n{github_handler.issue.body}"
issue_processor.process_issue(issue_content)

if __name__ == "__main__":
main()
Loading