In [1]:
from bug_crawler.github_fetcher import fetch_github_issues

In [2]:
from bug_crawler.openai_client import call_openai

In [3]:
# ------------------------
# CONFIGURATION
# ------------------------
# https://github.com/elastic/elasticsearch
OWNER = "elastic"         # GitHub username or org
REPO = "elasticsearch"         # Repository name
STATE = "closed"           # "open", "closed", or "all"
PER_PAGE = 50            # Max 100 per page
MAX_PAGES = 5           # How many pages to fetch (50*3 = 150 issues)
TOKEN = None             # Put a GitHub Personal Access Token here (optional)
start_date = "2024-01-01"  # ISO 8601 format, e.g. "2023-10-01"
end_date = None # "2025-09-30"    # ISO 8601 format,
keywords = None # "slowdown"

In [4]:
print("Fetching issues using the Search API...")
try:
    # Find issues created between Oct 1 and Oct 15, 2025
    issues_in_range = fetch_github_issues(
        owner=OWNER,
        repo=REPO,
        state=STATE,      # Search for closed issues
        per_page=PER_PAGE,
        max_pages=MAX_PAGES,
        start_date=start_date,
        end_date=end_date,
        keywords=keywords,
        token=TOKEN,
        include_comments=True
    )

    print(f"\nFound {len(issues_in_range)} issues in the date range.")
    for issue in issues_in_range:
        print(f"  #{issue['number']}: {issue['title']} (Created: {issue['created_at']})")

except Exception as e:
    print(f"An error occurred: {e}")

Fetching issues using the Search API...
An error occurred: GitHub API error: 429 {
  "documentation_url": "https://developer.github.com/v3/#abuse-rate-limits",
  "message": "You have triggered an abuse detection mechanism. Please wait a few minutes before you try again."
}



In [45]:
issues = issues_in_range
len(issues)

3

In [46]:
issue = issues[0]
issue_text = f"Title: {issue['title']}\n\nDescription: {issue['body']}\n\nComments: {issue.get('comments_thread_text', '')}"
print(issue_text)

Title: Possible performance regression in 9.1.x (TSDS)

Description: Hi, guys!

On a multi-node cluster, indexing of TSDS (otel-format metrics) is stable on 9.0.x.
After upgrading to 9.1.4, the indexing rate drops and the write queue grows, making the cluster unstable.

Profiling shows significant time spent in dv.writeField (see pic. 1)
Disabling the new 9.1.x optimization with:

```ini
-Dorg.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesConsumer.enableOptimizedMerge=false
```

restores normal performance.

<img width="1879" height="916" alt="Image" src="https://github.com/user-attachments/assets/0d519768-cc86-4113-83c9-9e7b6a253a2e" />

<img width="890" height="517" alt="Image" src="https://github.com/user-attachments/assets/5aecaa33-fc2b-4d70-bc54-20f9b2ea08c5" />
<img width="922" height="294" alt="Image" src="https://github.com/user-attachments/assets/13c028f7-84ee-4ba9-b519-b8e945775432" />


Can you help figure out the root cause?

(using java 21)

Comments: Comment by e

In [50]:
with open("bug_crawler/prompt_template/filter_application_resource.txt", "r") as f:
    FILTER_PROMPT = f.read()


response = call_openai(FILTER_PROMPT.format(app_name='Elasticsearch', issue_text=issue_text))

In [51]:
print(response)

{
  "application_resource": "yes"
}
