In [2]:
import requests
import csv
import time
from tenacity import retry, stop_after_attempt, wait_fixed

In [3]:


@retry(
    stop=stop_after_attempt(5),
    wait=wait_fixed(3),
    reraise=True  # Use reraise=True to retry on specific exceptions
)
def fetch_closed_issues(repo_link):
    # Extracting the owner and repo name from the provided GitHub repository link
    owner, repo = repo_link.split('/')[-2:]
    api_url = f"https://api.github.com/repos/{owner}/{repo}/issues"

    # Parameters to fetch closed issues
    params = {
        "state": "closed",
        "per_page": 100  # Fetching 100 items per page, you can adjust this number
    }
    headers = {"Authorization": f"token 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"}
    print(api_url)

    issues = []

    # Fetching closed issues from the GitHub API using pagination
    while True:       
        response = requests.get(api_url, params=params, headers=headers)
        if response.status_code == 200:
            issues += response.json()
            # Check if there are more pages
            if 'next' in response.links:
                api_url = response.links['next']['url']
            else:
                break
        else:
            print("Failed to fetch closed issues:", response.status_code)
            return None

    return issues

In [4]:
def save_to_csv(issues, csv_filename):
    # Extracting relevant information from each issue
    data = []
    for issue in issues:
        # Only include the issue if the body is not empty
        if issue["body"] is not None:
            data.append({
                "Issue ID": issue["number"],
                "Title": issue["title"],
                "Issue Body": issue["body"],
                "Closed At": issue["closed_at"]
            })
    print("Fetched Issue Count: "+str(len(data)))
    # Writing data to CSV file
    with open(csv_filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=["Issue ID", "Title", "Issue Body", "Closed At"])
        writer.writeheader()
        writer.writerows(data)

In [5]:

repo_link = "https://github.com/cognitect-labs/aws-api"
issues = fetch_closed_issues(repo_link)

if issues:
    save_to_csv(issues, "crawled_issue/data.csv")
    print("Closed issues data saved to data.csv")
else:
    print("No closed issues found or failed to fetch closed issues.")

https://api.github.com/repos/cognitect-labs/aws-api/issues


Fetched Issue Count: 191
Closed issues data saved to data.csv


: 