In [None]:
import pandas as pd
from datetime import datetime
from IPython.display import Markdown as md


def timedelta_to_hours_rounded(td, precision=2):
    return round(td.total_seconds() / 60 / 60, precision)


# Read the data
data = pd.read_csv('scratch.data.csv')

# Remove empty start dates
data_filtered = data[~data['Started'].isna()]
# Just to be safe also remove empty end dates
data_filtered = data_filtered[~data_filtered['Completed'].isna()]

# Create the new data frame
df = pd.DataFrame(columns=['Task', 'Start', 'Finish', 'Resource'])

# Add the data
df['Task'] = data_filtered['ID']
df['Start'] = pd.to_datetime(data_filtered['Started'])
df['Finish'] = pd.to_datetime(data_filtered['Completed'])
df['Duration'] = df['Finish'] - df['Start']
df['Resource'] = data_filtered['Assignee']

# Define primary stats
throughput = len(df)
mean_duration = timedelta_to_hours_rounded(df['Duration'].mean())
median_duration = timedelta_to_hours_rounded(df['Duration'].median())
min_duration = timedelta_to_hours_rounded(df['Duration'].min())
max_duration = timedelta_to_hours_rounded(df['Duration'].max())

md("""
\\begin{{align*}}
  \\textbf{{Throughput}} &= {} \\text{{ cards}}\\\\
  \\textbf{{Mean}} &= {} \\text{{ hours}}\\\\
  \\textbf{{Median}} &= {} \\text{{ hours}}\\\\
  \\textbf{{Min}} &= {} \\text{{ hours}}\\\\
  \\textbf{{Max}} &= {} \\text{{ hours}}\\\\
\\end{{align*}}
""".format(
    throughput,
    mean_duration,
    median_duration,
    min_duration,
    max_duration,
))


In [52]:
import json
from datetime import datetime

with open('states.json') as f:
    states_result = json.load(f)

states = list()
for state in states_result['workflowStates']['nodes']:
    states.append(state['name'])

columns = ['ID']
for state in states:
    columns.append(f"{state} Start")
    columns.append(f"{state} End")
    columns.append(f"{state} Duration")

issue_state_dates = pd.DataFrame(columns=columns)

with open('data.json') as f:
    result = json.load(f)

oldest_created_date = pd.to_datetime(
    datetime.utcnow().strftime("%d/%m/%Y %H:%M:%SZ"))
for issue in result['issues']['nodes']:
    issue_created_at = pd.to_datetime(issue['createdAt'])
    if issue_created_at < oldest_created_date:
        oldest_created_date = issue_created_at
    row = dict()
    row['ID'] = issue['identifier']
    for state in states:
        row[f"{state} Start"] = pd.NA
        row[f"{state} End"] = pd.NA
        row[f"{state} Duration"] = pd.NA
    oldest_state = 'For Grooming'
    oldest_state_date = pd.to_datetime(
        datetime.utcnow().strftime("%d/%m/%Y %H:%M:%SZ"))
    for history in issue['history']['nodes']:
        if history['fromState'] is None:
            continue
        row[f"{history['fromState']['name']} End"] = pd.to_datetime(
            history['createdAt'])
        row[f"{history['toState']['name']} Start"] = pd.to_datetime(
            history['createdAt'])
        if pd.to_datetime(history['createdAt']) < oldest_state_date:
            oldest_state = history['fromState']['name']
            oldest_state_date = pd.to_datetime(history['createdAt'])
    row[f"{oldest_state} Start"] = issue_created_at
    for state in states:
        if row[f"{state} Start"] is not pd.NA and row[f"{state} End"] is not pd.NA:
            row[f"{state} Duration"] = row[f"{state} End"] - \
                row[f"{state} Start"]
    issue_state_dates.loc[len(issue_state_dates)] = row

pd.set_option('display.max_columns', None)

state_counts = pd.DataFrame(columns=['Date'] + states + ['Total'])

for day in pd.date_range(start=oldest_created_date, end=pd.to_datetime(datetime.utcnow().strftime("%d/%m/%Y %H:%M:%SZ")), freq='D', normalize=True):
    next_day = day + pd.Timedelta(days=1)
    row = dict()
    row['Date'] = day
    row['Total'] = 0
    for state in states:
        row[state] = 0
    oldest_possible = oldest_created_date - pd.Timedelta(days=1)
    for index, issue in issue_state_dates.iterrows():
        latest_state = ''
        latest_state_date = oldest_possible
        for state in states:
            if issue[f"{state} Start"] is not pd.NA:
                state_start = pd.to_datetime(issue[f"{state} Start"])
                if state_start < next_day and state_start > latest_state_date:
                    latest_state = state
                    latest_state_date = state_start
        if '' != latest_state:
            row[latest_state] += 1
            row['Total'] += 1

    state_counts.loc[len(state_counts)] = row

In [None]:
import matplotlib.pyplot as plt

columns = ['Date', 'Triage', 'For Grooming', 'Todo backlog', 'Todo', 'In Progress', 'In Review', 'Done', 'Canceled']
plot_states = state_counts[columns]


figure = plt.figure()
ax = figure.add_subplot(1, 1, 1)
ax.stackplot(plot_states['Date'], plot_states.drop('Date', axis=1).T, labels=columns[1:])
ax.legend(loc='upper left')
plt.xticks(rotation=45, ha='right')
plt.show()