diff --git a/.github/prompts/issue-deduplicator.txt b/.github/prompts/issue-deduplicator.txt new file mode 100644 index 0000000000..854e3c4487 --- /dev/null +++ b/.github/prompts/issue-deduplicator.txt @@ -0,0 +1,19 @@ +You are an assistant that triages new GitHub issues by identifying potential duplicates. + +You will receive the following JSON files located in the current working directory: +- `codex-current-issue.json`: JSON object describing the newly created issue (fields: number, title, body). +- `codex-existing-issues.json`: JSON array of recent issues (each element includes number, title, body, createdAt). + +Instructions: +- Load both files as JSON and review their contents carefully. +- Compare the current issue against the existing issues to find up to five that appear to describe the same underlying problem or request. +- Only consider an issue a potential duplicate if there is a clear overlap in symptoms, feature requests, reproduction steps, or error messages. +- Prioritize newer issues when similarity is comparable. +- Ignore pull requests and issues whose similarity is tenuous. +- When unsure, prefer returning fewer matches. + +Output requirements: +- Respond with a JSON array of issue numbers (integers), ordered from most likely duplicate to least. +- Include at most five numbers. +- If you find no plausible duplicates, respond with `[]`. +- Do not emit any additional commentary, text, or keys beyond the JSON array. diff --git a/.github/workflows/issue-deduplicator.yml b/.github/workflows/issue-deduplicator.yml new file mode 100644 index 0000000000..05453450e4 --- /dev/null +++ b/.github/workflows/issue-deduplicator.yml @@ -0,0 +1,88 @@ +name: Issue Deduplicator + +on: + issues: + types: +# - opened - disabled while testing + - labeled + +jobs: + gather-duplicates: + name: Identify potential duplicates + if: ${{ github.event.action == 'opened' || (github.event.action == 'labeled' && github.event.label.name == 'codex-deduplicate') }} + runs-on: ubuntu-latest + permissions: + contents: read + outputs: + codex_output: ${{ steps.codex.outputs.final_message }} + steps: + - uses: actions/checkout@v4 + + - name: Prepare Codex inputs + env: + GH_TOKEN: ${{ github.token }} + run: | + set -eo pipefail + + CURRENT_ISSUE_FILE=codex-current-issue.json + EXISTING_ISSUES_FILE=codex-existing-issues.json + + gh issue list --repo "${{ github.repository }}" \ + --json number,title,body,createdAt \ + --limit 1000 \ + --state all \ + --search "sort:created-desc" \ + | jq '.' \ + > "$EXISTING_ISSUES_FILE" + + printf '%s' '${{ toJson(github.event.issue) }}' \ + | jq '{number, title, body}' \ + > "$CURRENT_ISSUE_FILE" + + - id: codex + uses: openai/codex-action@main + with: + openai_api_key: ${{ secrets.CODEX_OPENAI_API_KEY }} + prompt_file: .github/prompts/issue-deduplicator.txt + require_repo_write: false + + comment-on-issue: + name: Comment with potential duplicates + needs: gather-duplicates + if: ${{ needs.gather-duplicates.result != 'skipped' }} + runs-on: ubuntu-latest + permissions: + contents: read + issues: write + steps: + - name: Comment on issue + uses: actions/github-script@v7 + env: + CODEX_OUTPUT: ${{ needs.gather-duplicates.outputs.codex_output }} + with: + github-token: ${{ github.token }} + script: | + let numbers; + try { + numbers = JSON.parse(process.env.CODEX_OUTPUT); + } catch (error) { + core.info(`Codex output was not valid JSON. Raw output: ${raw}`); + return; + } + + const lines = ['Potential duplicates detected:', ...numbers.map((value) => `- #${value}`)]; + + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.payload.issue.number, + body: lines.join("\n"), + }); + + - name: Remove codex-deduplicate label + if: ${{ always() && github.event.action == 'labeled' && github.event.label.name == 'codex-deduplicate' }} + env: + GH_TOKEN: ${{ github.token }} + run: | + gh issue edit "${{ github.event.issue.number }}" --remove-label codex-deduplicate || true + echo "Attempted to remove label: codex-deduplicate" diff --git a/.github/workflows/issue-labeler.yml b/.github/workflows/issue-labeler.yml index dfbd17c27f..24b9276db0 100644 --- a/.github/workflows/issue-labeler.yml +++ b/.github/workflows/issue-labeler.yml @@ -28,6 +28,7 @@ jobs: with: openai_api_key: ${{ secrets.CODEX_OPENAI_API_KEY }} prompt_file: .github/prompts/issue-labeler.txt + require_repo_write: false apply-labels: name: Apply labels from Codex output