In [1]:
from bug_crawler.github_fetcher import fetch_github_issues

In [4]:
from bug_crawler.openai_client import call_openai

In [5]:
# ------------------------
# CONFIGURATION
# ------------------------
# https://github.com/elastic/elasticsearch
OWNER = "elastic"         # GitHub username or org
REPO = "elasticsearch"         # Repository name
STATE = "closed"           # "open", "closed", or "all"
PER_PAGE = 50            # Max 100 per page
MAX_PAGES = 5           # How many pages to fetch (50*3 = 150 issues)
TOKEN = None             # Put a GitHub Personal Access Token here (optional)
start_date = "2023-01-01"  # ISO 8601 format, e.g. "2023-10-01"
end_date = None # "2025-09-30"    # ISO 8601 format,
keywords = "slowdown"

In [6]:
print("Fetching issues using the Search API...")
try:
    # Find issues created between Oct 1 and Oct 15, 2025
    issues_in_range = fetch_github_issues(
        owner=OWNER,
        repo=REPO,
        state=STATE,      # Search for closed issues
        per_page=PER_PAGE,
        max_pages=MAX_PAGES,
        start_date=start_date,
        end_date=end_date,
        keywords=keywords,
        token=TOKEN,
        include_comments=True
    )

    print(f"\nFound {len(issues_in_range)} issues in the date range.")
    for issue in issues_in_range:
        print(f"  #{issue['number']}: {issue['title']} (Created: {issue['created_at']})")

except Exception as e:
    print(f"An error occurred: {e}")

Fetching issues using the Search API...

Found 7 issues in the date range.
  #135340: Possible performance regression in 9.1.x (TSDS) (Created: 2025-09-24T10:46:49Z)
  #118623: Performance degradation after upgrading from 8.6.1 to 8.16.1 (Created: 2024-12-12T21:38:03Z)
  #112781: Lazy data stream rollover is not triggered when using reroute (Created: 2024-09-12T02:47:46Z)
  #102063: During data ingestion, significants amount of CPU and memory is used to parse date strings (>10% realistic for some workloads) (Created: 2023-11-13T10:26:27Z)
  #101763: ESQL: unexpected count(*) query planning slowdown at scale (Created: 2023-11-03T08:30:36Z)
  #99409: [Ml] CircuitBreakingException when deploying the ELSER model (Created: 2023-09-11T09:14:13Z)
  #96349: Fetching many fields takes much more time than retrieving _source (Created: 2023-05-25T13:57:23Z)


In [7]:
issues = issues_in_range
len(issues)

7

In [9]:
issue = issues[0]
issue_text = f"Title: {issue['title']}\n\nDescription: {issue['body']}\n\nComments: {issue.get('comments_thread_text', '')}"
print(issue)

{'number': 135340, 'title': 'Possible performance regression in 9.1.x (TSDS)', 'body': 'Hi, guys!\n\nOn a multi-node cluster, indexing of TSDS (otel-format metrics) is stable on 9.0.x.\nAfter upgrading to 9.1.4, the indexing rate drops and the write queue grows, making the cluster unstable.\n\nProfiling shows significant time spent in dv.writeField (see pic. 1)\nDisabling the new 9.1.x optimization with:\n\n```ini\n-Dorg.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesConsumer.enableOptimizedMerge=false\n```\n\nrestores normal performance.\n\n<img width="1879" height="916" alt="Image" src="https://github.com/user-attachments/assets/0d519768-cc86-4113-83c9-9e7b6a253a2e" />\n\n<img width="890" height="517" alt="Image" src="https://github.com/user-attachments/assets/5aecaa33-fc2b-4d70-bc54-20f9b2ea08c5" />\n<img width="922" height="294" alt="Image" src="https://github.com/user-attachments/assets/13c028f7-84ee-4ba9-b519-b8e945775432" />\n\n\nCan you help figure out the root cause?\n

In [50]:
with open("bug_crawler/prompt_template/filter_application_resource.txt", "r") as f:
    FILTER_PROMPT = f.read()


response = call_openai(FILTER_PROMPT.format(app_name='Elasticsearch', issue_text=issue_text))

In [51]:
print(response)

{
  "application_resource": "yes"
}
