In [1]:
import requests
import pandas as pd
import time
import json

from requests.auth import HTTPBasicAuth

In [2]:
USERNAME = "feenionloyed"
PASSWORD = "2001adp@549"

In [3]:
# Store all data
all_boards = []
all_sprints = []
all_issues = []
all_backlog_issues = []

In [4]:
# Get all boards
def fetch_all_boards():
    boards = []
    start_at = 0
    max_results = 50
    is_last = False

    while not is_last:
        url = f"https://issues.apache.org/jira/rest/agile/1.0/board?startAt={start_at}&maxResults={max_results}"
        response = requests.get(url, auth=HTTPBasicAuth(USERNAME, PASSWORD))

        if response.status_code != 200:
            print(f"Error fetching boards: {response.status_code}")
            break

        data = response.json()
        boards.extend(data['values'])
        is_last = data['isLast']
        start_at += max_results
        time.sleep(1)  # Respect rate limits

    return boards


In [5]:
# Get sprints for a board
def fetch_sprints(board_id):
    sprints = []
    start_at = 0
    max_results = 50
    is_last = False

    while not is_last:
        url = f"https://issues.apache.org/jira/rest/agile/1.0/board/{board_id}/sprint?startAt={start_at}&maxResults={max_results}"
        response = requests.get(url, auth=HTTPBasicAuth(USERNAME, PASSWORD))

        if response.status_code != 200:
            # Some boards may not have sprints or might be inaccessible
            print(f"Error fetching sprints for board {board_id}: {response.status_code}")
            break

        try:
            data = response.json()
            sprints.extend(data['values'])
            is_last = data.get('isLast', True)
            start_at += max_results
        except:
            break

        time.sleep(1)  # Respect rate limits

    return sprints


In [6]:
# Get issues for a sprint
def fetch_sprint_issues(board_id, sprint_id):
    issues = []
    start_at = 0
    max_results = 50
    is_last = False

    while not is_last:
        url = f"https://issues.apache.org/jira/rest/agile/1.0/board/{board_id}/sprint/{sprint_id}/issue?startAt={start_at}&maxResults={max_results}"
        response = requests.get(url, auth=HTTPBasicAuth(USERNAME, PASSWORD))

        if response.status_code != 200:
            print(f"Error fetching issues for board {board_id}, sprint {sprint_id}: {response.status_code}")
            break

        try:
            data = response.json()
            issues.extend(data['issues'])
            is_last = data.get('isLast', True)
            start_at += max_results
        except:
            break

        time.sleep(1)  # Respect rate limits

    return issues



In [7]:
# Get backlog issues
def fetch_backlog_issues(board_id):
    issues = []
    start_at = 0
    max_results = 50
    is_last = False

    while not is_last:
        url = f"https://issues.apache.org/jira/rest/agile/1.0/board/{board_id}/backlog?startAt={start_at}&maxResults={max_results}"
        response = requests.get(url, auth=HTTPBasicAuth(USERNAME, PASSWORD))

        if response.status_code != 200:
            print(f"Error fetching backlog for board {board_id}: {response.status_code}")
            break

        try:
            data = response.json()
            issues.extend(data['issues'])
            is_last = data.get('isLast', True)
            start_at += max_results
        except:
            break

        time.sleep(1)  # Respect rate limits

    return issues



In [22]:
print("Fetching all boards...")
boards = fetch_all_boards()
print(f"Found {len(boards)} boards")
# print(boards)

Fetching all boards...
Found 251 boards


In [9]:
# boards = boards[:20]
# print(boards)

In [25]:
for board in boards:
    board_id = board['id']

    if(board["type"] == "scrum"):
        print(f"Processing board: {board['name']} (ID: {board_id})")
        # Get sprints
        sprints = fetch_sprints(board_id)
        print(f"Found {len(sprints)} sprints")

        for sprint in sprints:
            sprint['boardId'] = board_id
            all_sprints.append(sprint)

            # Get issues for this sprint
            sprint_id = sprint['id']
            issues = fetch_sprint_issues(board_id, sprint_id)
            print(f"Found {len(issues)} issues")

            for issue in issues:
                dummy_issue = {}
                dummy_issue['boardId'] = board_id
                dummy_issue['sprintId'] = sprint_id
                dummy_issue['key'] = issue['key']

                # if "fixVersions" in issue['fields']:
                #     dummy_issue['fixVersion'] = issue['fixVersions'][0].get('name')
                # else:
                #     dummy_issue['fixVersion'] = None
                #
                # issue["fields"].get("fixVersions") if issue["fields"].get("fixVersions") else None
                try:
                    dummy_issue['fixVersion'] = issue["fields"].get("fixVersions", [{}])[0].get("name", None)
                except:
                    dummy_issue['fixVersion'] = None

                dummy_issue['priority'] = issue["fields"]['priority']['name']
                dummy_issue['status'] = issue["fields"]['status']['name']
                dummy_issue['status_category'] = issue["fields"]['status']['statusCategory']['name']
                dummy_issue['creator'] = issue["fields"]['creator']['displayName']
                dummy_issue['reporter'] = issue["fields"]['reporter']['displayName']
                # dummy_issue['closedSprint'] = issue["fields"]['closedSprints']['id']

                dummy_issue['progress'] = issue["fields"]['progress']['progress']
                dummy_issue['progress_total'] = issue["fields"]['progress']['total']
                dummy_issue['worklog'] = issue["fields"]['progress']['total']
                dummy_issue['issuetype'] = issue["fields"]['issuetype']['name']
                dummy_issue['project'] = issue["fields"]['project']['name']
                dummy_issue['created'] = issue["fields"]['created']
                dummy_issue['updated'] = issue["fields"]['updated']
                dummy_issue['description'] = issue["fields"]['description']
                dummy_issue['summary'] = issue["fields"]['summary']
                dummy_issue['duedate'] = issue["fields"]['duedate']
                # print(dummy_issue)
                all_issues.append(dummy_issue)





        # Get backlog issues
        backlog = fetch_backlog_issues(board_id)
        # backlog = []
        print(f"Found {len(backlog)} backlog issues")
        for issue in backlog:
            # try:
            dummy_backlog_issue = {}
            dummy_backlog_issue['boardId'] = board_id
            dummy_backlog_issue['isBacklog'] = True




            try:
                fix_versions = issue["fields"].get("fixVersions", [])
                dummy_backlog_issue['fixVersion'] = fix_versions[0]["name"] if fix_versions else None
            except:
                dummy_backlog_issue['fixVersion'] = None

            try:
                dummy_backlog_issue['priority'] = issue["fields"].get("priority", {}).get("name")
            except:
                dummy_backlog_issue['priority'] = None

            try:
                dummy_backlog_issue['assignee'] = issue["fields"].get("assignee") if issue["fields"].get("displayName") else None
            except:
                dummy_backlog_issue['assignee'] = None

            try:
                dummy_backlog_issue['status'] = issue["fields"].get("status", {}).get("name")
            except:
                dummy_backlog_issue['status'] = None

            try:
                dummy_backlog_issue['status_category'] = issue["fields"].get("status", {}).get("status_category", {}).get("name")
            except:
                dummy_backlog_issue['status_category'] = None

            try:
                dummy_backlog_issue['creator'] = issue["fields"].get("creator", {}).get("displayName")
            except:
                dummy_backlog_issue['creator'] = None

            try:
                dummy_backlog_issue['reporter'] = issue["fields"].get("reporter", {}).get("displayName")
            except:
                dummy_backlog_issue['reporter'] = None

            try:
                dummy_backlog_issue['issuetype'] = issue["fields"].get("issuetype", {}).get("name")
            except:
                dummy_backlog_issue['issuetype'] = None

            try:
                dummy_backlog_issue['project'] = issue["fields"].get("project", {}).get("name")
            except:
                dummy_backlog_issue['project'] = None

            try:
                dummy_backlog_issue['created'] = issue["fields"].get("created", None)
            except:
                dummy_backlog_issue['created'] = None

            try:
                dummy_backlog_issue['updated'] = issue["fields"].get("updated", None)
            except:
                dummy_backlog_issue['updated'] = None

            try:
                dummy_backlog_issue['description'] = issue["fields"].get("description", None)
            except:
                dummy_backlog_issue['description'] = None

            try:
                dummy_backlog_issue['summary'] = issue["fields"].get("summary", None)
            except:
                dummy_backlog_issue['summary'] = None

            # except Exception as e:
            #         print('error on - ' + str(e))
            #         continue

            all_backlog_issues.append(dummy_backlog_issue)
            # print(dummy_backlog_issue)
            # print("-------------------")

Processing board: 0.6.1 (ID: 365)
Found 0 sprints
Found 50 backlog issues
Processing board: Apache Airavata (ID: 75)
Found 0 sprints
Found 50 backlog issues
Processing board: Apache Aurora Twitter Scrum (ID: 37)
Found 40 sprints
Found 18 issues
Found 22 issues
Found 1 issues
Found 31 issues
Found 9 issues
Found 35 issues
Found 22 issues
Found 46 issues
Found 27 issues
Found 30 issues
Found 14 issues
Found 26 issues
Found 10 issues
Found 17 issues
Found 22 issues
Found 20 issues
Found 16 issues
Found 14 issues
Found 6 issues
Found 6 issues
Found 23 issues
Found 13 issues
Found 5 issues
Found 7 issues
Found 10 issues
Found 12 issues
Found 8 issues
Found 16 issues
Found 15 issues
Found 12 issues
Found 4 issues
Found 2 issues
Found 5 issues
Found 4 issues
Found 2 issues
Found 3 issues
Found 7 issues
Found 1 issues
Found 6 issues
Found 7 issues
Found 35 backlog issues
Processing board: Apache DataLab (ID: 307)
Found 19 sprints
Found 50 issues
Found 50 issues
Found 50 issues
Found 50 issues


In [28]:
print(f"Found Total sprints {len(all_sprints)} ")
print(f"Found Total Boards {len(boards)} ")
print(f"Found Total Issues {len(all_issues)} ")
print(f"Found Total Backlog issues {len(all_backlog_issues)} ")

Found Total sprints 4760 
Found Total Boards 251 
Found Total Issues 68880 
Found Total Backlog issues 5717 


In [27]:
# Save to files
with open('apache_boards.json', 'w') as f:
    json.dump(boards, f)

with open('apache_sprints.json', 'w') as f:
    json.dump(all_sprints, f)

with open('apache_sprint_issues.json', 'w') as f:
    json.dump(all_issues, f)

with open('apache_backlog_issues.json', 'w') as f:
    json.dump(all_backlog_issues, f)

In [29]:
# Create DataFrames for analysis
boards_df = pd.json_normalize(boards)
sprints_df = pd.json_normalize(all_sprints)
issues_df = pd.json_normalize(all_issues)
backlog_df = pd.json_normalize(all_backlog_issues)

In [31]:
# boards_df = pd.DataFrame(boards);
# sprints_df = pd.DataFrame(all_sprints);
# issues_df = pd.DataFrame(all_issues);
# backlog_df = pd.DataFrame(all_backlog_issues);

In [30]:
# Save as CSV
boards_df.to_csv('./data/apache_boards.csv', index=False)
sprints_df.to_csv('./data/apache_sprints.csv', index=False)
issues_df.to_csv('./data/apache_sprint_issues.csv', index=False)
backlog_df.to_csv('./data/apache_backlog_issues.csv', index=False)