In [22]:
import requests
import pandas as pd
import time
import json

from requests.auth import HTTPBasicAuth

In [23]:
USERNAME = "feenionloyed"
PASSWORD = "2001adp@549"

In [24]:
# Store all data
all_boards = []
all_sprints = []
all_issues = []
all_backlog_issues = []

In [25]:
# Get all boards
def fetch_all_boards():
    boards = []
    start_at = 0
    max_results = 50
    is_last = False

    while not is_last:
        url = f"https://issues.apache.org/jira/rest/agile/1.0/board?startAt={start_at}&maxResults={max_results}"
        response = requests.get(url, auth=HTTPBasicAuth(USERNAME, PASSWORD))

        if response.status_code != 200:
            print(f"Error fetching boards: {response.status_code}")
            break

        data = response.json()
        boards.extend(data['values'])
        is_last = data['isLast']
        start_at += max_results
        time.sleep(1)  # Respect rate limits

    return boards


In [26]:
# Get sprints for a board
def fetch_sprints(board_id):
    sprints = []
    start_at = 0
    max_results = 50
    is_last = False

    while not is_last:
        url = f"https://issues.apache.org/jira/rest/agile/1.0/board/{board_id}/sprint?startAt={start_at}&maxResults={max_results}"
        response = requests.get(url, auth=HTTPBasicAuth(USERNAME, PASSWORD))

        if response.status_code != 200:
            # Some boards may not have sprints or might be inaccessible
            print(f"Error fetching sprints for board {board_id}: {response.status_code}")
            break

        try:
            data = response.json()
            sprints.extend(data['values'])
            is_last = data.get('isLast', True)
            start_at += max_results
        except:
            break

        time.sleep(1)  # Respect rate limits

    return sprints


In [27]:
# Get issues for a sprint
def fetch_sprint_issues(board_id, sprint_id):
    issues = []
    start_at = 0
    max_results = 50
    is_last = False

    while not is_last:
        url = f"https://issues.apache.org/jira/rest/agile/1.0/board/{board_id}/sprint/{sprint_id}/issue?startAt={start_at}&maxResults={max_results}"
        response = requests.get(url, auth=HTTPBasicAuth(USERNAME, PASSWORD))

        if response.status_code != 200:
            print(f"Error fetching issues for board {board_id}, sprint {sprint_id}: {response.status_code}")
            break

        try:
            data = response.json()
            issues.extend(data['issues'])
            is_last = data.get('isLast', True)
            start_at += max_results
        except:
            break

        time.sleep(1)  # Respect rate limits

    return issues



In [28]:
# Get backlog issues
def fetch_backlog_issues(board_id):
    issues = []
    start_at = 0
    max_results = 50
    is_last = False

    while not is_last:
        url = f"https://issues.apache.org/jira/rest/agile/1.0/board/{board_id}/backlog?startAt={start_at}&maxResults={max_results}"
        response = requests.get(url, auth=HTTPBasicAuth(USERNAME, PASSWORD))

        if response.status_code != 200:
            print(f"Error fetching backlog for board {board_id}: {response.status_code}")
            break

        try:
            data = response.json()
            issues.extend(data['issues'])
            is_last = data.get('isLast', True)
            start_at += max_results
        except:
            break

        time.sleep(1)  # Respect rate limits

    return issues



In [29]:
print("Fetching all boards...")
boards = fetch_all_boards();
print(f"Found {len(boards)} boards")
# print(boards)

Fetching all boards...
Found 251 boards


In [30]:
for board in boards:
    board_id = board['id']

    if(board["type"] == "scrum"):
        print(f"Processing board: {board['name']} (ID: {board_id})")
        # Get sprints
        sprints = fetch_sprints(board_id)
        print(f"Found {len(sprints)} sprints")

        for sprint in sprints:
            sprint['boardId'] = board_id
            all_sprints.append(sprint)

            # Get issues for this sprint
            sprint_id = sprint['id']
            issues = fetch_sprint_issues(board_id, sprint_id)
            print(f"Found {len(issues)} issues")

            for issue in issues:
                issue['boardId'] = board_id
                issue['sprintId'] = sprint_id
                all_issues.append(issue)

        # Get backlog issues
        backlog = fetch_backlog_issues(board_id)
        print(f"Found {len(backlog)} backlog issues")
        for issue in backlog:
            issue['boardId'] = board_id
            issue['isBacklog'] = True
            all_backlog_issues.append(issue)

Processing board: 0.6.1 (ID: 365)
Found 0 sprints
Found 50 backlog issues
Processing board: Apache Airavata (ID: 75)
Found 0 sprints
Found 50 backlog issues
Processing board: Apache Aurora Twitter Scrum (ID: 37)
Found 40 sprints
Found 18 issues
Found 22 issues
Found 1 issues
Found 31 issues
Found 9 issues
Found 35 issues
Found 22 issues
Found 46 issues
Found 27 issues
Found 30 issues
Found 14 issues
Found 26 issues
Found 10 issues
Found 17 issues
Found 22 issues
Found 20 issues
Found 16 issues
Found 14 issues
Found 6 issues
Found 6 issues
Found 23 issues
Found 13 issues
Found 5 issues
Found 7 issues
Found 10 issues
Found 12 issues
Found 8 issues
Found 16 issues
Found 15 issues
Found 12 issues
Found 4 issues
Found 2 issues
Found 5 issues
Found 4 issues
Found 2 issues
Found 3 issues
Found 7 issues
Found 1 issues
Found 6 issues
Found 7 issues
Found 35 backlog issues
Processing board: Apache DataLab (ID: 307)
Found 19 sprints
Found 50 issues
Found 50 issues
Found 50 issues
Found 50 issues


In [31]:
boards_df = pd.DataFrame(all_boards);
sprints_df = pd.DataFrame(all_sprints);
issues_df = pd.DataFrame(all_issues);
backlog_df = pd.DataFrame(all_backlog_issues);

In [32]:
# Save as CSV
boards_df.to_csv('./data/apache_boards.csv', index=False)
sprints_df.to_csv('./data/apache_sprints.csv', index=False)
issues_df.to_csv('./data/apache_sprint_issues.csv', index=False)
backlog_df.to_csv('./data/apache_backlog_issues.csv', index=False)