In [11]:
import arrow
import netrc
import pandas as pd
import pickle

from jira import JIRA
from os.path import exists

In [12]:
JIRA_DOMAIN = "gremlininc.atlassian.net"
CUSTOM_FIELDS = ""

# Authentication

See README.md in this folder for how to setup authentication

In [None]:
auth_file = netrc.netrc()
authTokens = auth_file.authenticators(JIRA_DOMAIN)

user_email = authTokens[0]
api_token = authTokens[2]

In [None]:
jira_url = f"https://{JIRA_DOMAIN}/"

jira = JIRA(jira_url, basic_auth=(user_email, api_token))

# projects = jira.projects()  # Test Authentication

# Jira API Limits

Jira cloud limits issue searches to 100 results. We paginate over week long periods to collate all the issues from our desired
timeline.

In [None]:
def retrieve_issues():
    # start = arrow.get("2021-03-01")
    start = arrow.get("2022-03-01")
    end = arrow.utcnow()

    jql_template = """
    PROJECT = EN
    AND created >= {}
    AND created <= {}
    AND statusCategory = Done
    AND resolution = Done
    """

    result_list = []

    # If a result set has 99+ results, there's a good chance the API is limiting results and the date windows should be smaller.
    class LimitedResultsException(Exception):
        pass

    for r in arrow.Arrow.span_range('week', start, end):
        jql = jql_template.format(r[0].date(), r[1].date())
        results = jira.search_issues(jql, expand="changelog,renderedFields")

        if len(results) >= 99:
            raise LimitedResultsException()
        result_list.extend(results)

    return result_list

In [2]:
cache_key = arrow.utcnow().format("YYYY-MM-DD")
cache_file = f"results_{cache_key}.pickle"

if exists(cache_file):
    print(f"Reading cached results from {cache_file}")
    with open(cache_file, 'rb') as f:
        result_list = pickle.load(f)
else:
    print("Retrieving results from JIRA.")
    result_list = retrieve_issues()
    print(f"Writing results to {cache_file}")
    with open(cache_file, 'wb') as f:
        pickle.dump(result_list, f, pickle.HIGHEST_PROTOCOL)

Reading cached results from results_2022-05-07.pickle


In [21]:
class WorkflowTransitionDAO:
    def __init__(self, timestamp, history_item):
        self._history_item = history_item
        
        self.field = history_item.field
        self.fieldtype = history_item.fieldtype
        
        self.timestamp = arrow.get(timestamp)
        
        self.from_state_id = getattr(history_item, "from")
        self.to_state_id = history_item.to
        
        self.from_state = history_item.fromString
        self.to_state = history_item.toString
        
    def __str__(self):
        return f"[{self.timestamp.date()}] '{self.from_state}'({self.from_state_id}) -> '{self.to_state}'({self.to_state_id})"


class WorkflowTransitionHistory(list):
    def boo(self):
        return "boo"

        
class Issue:
    def __init__(self, issue):
        self._issue = issue
        self.key = issue.key
        self.project = issue.fields.project

        self.created_ts = issue.fields.created
        self.created_date = arrow.get(issue.fields.created).date()
        
        self.resolution_ts = issue.fields.resolutiondate
        self.resolution_date = arrow.get(issue.fields.resolutiondate).date()
        
        
    @property
    def state_transitions(self):
        history_items = WorkflowTransitionHistory()

        for h in self._issue.changelog.histories:
            for i in [x for x in h.items if x.field == 'status']:
                history_items.append(WorkflowTransitionDAO(h.created, i))
            # print(getattr(h, "created", None))
        
        return history_items
        
    def __str__(self):
        return f"[{self.key}]"
    
    def __repr__(self):
        return f"{self.key}"

In [22]:
issues = []

for r in result_list:
    issues.append(Issue(r))        

In [23]:
for i in issues:
    for st in i.state_transitions:
        print(i, st)

[EN-2378] [2022-03-08] 'In Progress'(3) -> 'Done'(10001)
[EN-2378] [2022-03-03] 'Backlog'(10090) -> 'In Progress'(3)
[EN-2374] [2022-03-03] 'Backlog'(10090) -> 'Done'(10001)
[EN-2367] [2022-04-22] 'In Progress'(3) -> 'Done'(10001)
[EN-2367] [2022-04-14] 'Dev Ready'(10212) -> 'In Progress'(3)
[EN-2367] [2022-04-11] 'Prioritized'(10211) -> 'Dev Ready'(10212)
[EN-2367] [2022-04-11] 'Backlog'(10090) -> 'Prioritized'(10211)
[EN-2360] [2022-04-07] 'Dev Review'(10143) -> 'Done'(10001)
[EN-2360] [2022-04-04] 'In Progress'(3) -> 'Dev Review'(10143)
[EN-2360] [2022-03-28] 'Dev Ready'(10212) -> 'In Progress'(3)
[EN-2360] [2022-03-28] 'Backlog'(10090) -> 'Dev Ready'(10212)
[EN-2358] [2022-03-04] 'In Progress'(3) -> 'Done'(10001)
[EN-2358] [2022-03-02] 'Backlog'(10090) -> 'In Progress'(3)
[EN-2357] [2022-03-04] 'Backlog'(10090) -> 'Done'(10001)
[EN-2356] [2022-03-04] 'Backlog'(10090) -> 'Done'(10001)
[EN-2355] [2022-03-04] 'Backlog'(10090) -> 'Done'(10001)
[EN-2350] [2022-03-15] 'Backlog'(10090) ->

In [24]:
def calculate_lifecycle(issues):
    data = {
        "key": [x.key for x in issues],
        "project": [x.project for x in issues],
        "created_date": [x.created_date for x in issues],
        "created_ts": [x.created_ts for x in issues],
        "resolution_date": [x.resolution_date for x in issues],
        "resolution_ts": [x.resolution_ts for x in issues],

        
    }
    return pd.DataFrame(data)
    
calculate_lifecycle(issues)

Unnamed: 0,key,project,created_date,created_ts,resolution_date,resolution_ts
0,EN-2378,EN,2022-03-03,2022-03-03T14:31:57.610-0800,2022-03-08,2022-03-08T10:46:03.997-0800
1,EN-2374,EN,2022-03-03,2022-03-03T09:30:39.029-0800,2022-03-03,2022-03-03T09:30:52.246-0800
2,EN-2367,EN,2022-03-03,2022-03-03T08:40:04.215-0800,2022-04-22,2022-04-22T03:11:48.640-0700
3,EN-2360,EN,2022-03-02,2022-03-02T10:47:22.758-0800,2022-04-07,2022-04-07T13:55:00.594-0700
4,EN-2358,EN,2022-03-02,2022-03-02T08:45:50.766-0800,2022-03-04,2022-03-04T10:07:15.913-0800
...,...,...,...,...,...,...
116,EN-2751,EN,2022-05-03,2022-05-03T15:19:21.645-0700,2022-05-04,2022-05-04T13:33:50.308-0700
117,EN-2747,EN,2022-05-03,2022-05-03T12:12:01.494-0700,2022-05-05,2022-05-05T09:23:45.055-0700
118,EN-2737,EN,2022-05-02,2022-05-02T20:43:02.167-0700,2022-05-06,2022-05-06T12:48:31.144-0700
119,EN-2736,EN,2022-05-02,2022-05-02T18:53:22.624-0700,2022-05-04,2022-05-04T13:34:06.337-0700


In [7]:
df

NameError: name 'df' is not defined