In [10]:
import os
import json, urllib, requests
import numpy as np
import pandas as pd
import datetime
import seaborn as sns
import pylab as plt
import matplotlib.dates as mdates
from pandas.io.json import json_normalize
epoch = datetime.datetime.utcfromtimestamp(0)
def unix_time_millis(dt):
    return int((dt - epoch).total_seconds() * 1000.0)

## Getting all events for a release

In [11]:
def get_events(issue_id, token, days=90, limit=None):
    cutoff_cursor = unix_time_millis(datetime.datetime.now() - datetime.timedelta(days=days))
    
    all_events = []
    results = True
    url = f"https://sentry.io/api/0/issues/{issue_id}/events/?query="
    counter = 0
    while limit is None or counter < limit:
        r = requests.get(url, 
                         headers={'Authorization': 'Bearer %s' % token})
        
        try:
            events_json = r.json()
        except:
            print(r)
            raise
            
        for event in events_json:
            for tag in event['tags']:
                if tag['key'] == 'environment' and tag['value'] in ('prod', ):
                    all_events.append(event)
                    print(".", end="")
        cursor = r.headers['Link'].split(',')[1].split(';')[3].split('=')[1].replace('"', '')
        results_str = r.headers['Link'].split(',')[1].split(';')[2].split('=')[1].replace('"', '')
        results = results_str.strip() == 'true'
        new_url = f"https://sentry.io/api/0/issues/{issue_id}/events/?cursor={cursor}&query="
        if not results:
            break
        url = new_url
        counter += 1
        
    for e in all_events:
        e['tags'] = dict([(a['key'], a['value']) for a in e['tags']])
    
    all_events_df = json_normalize(all_events)
    
    all_events_df.dateCreated = pd.to_datetime(all_events_df.dateCreated)
    
    all_events_df['date_minus_time'] = all_events_df["dateCreated"].apply( lambda df : datetime.datetime(year=df.year, month=df.month, day=df.day))
    all_events_df['date_minus_time'] = all_events_df['date_minus_time'] - pd.to_timedelta(7, unit='d')
    all_events_df.set_index(all_events_df["date_minus_time"], inplace=True)
    
    return all_events_df

In [None]:
all_successful_events_df = get_events("758615130", os.environ['SENTRY_TOKEN'])

........................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................................

In [None]:
all_successful_events_df.to_csv('20221206_success.tsv', sep='\t', index=None)

In [None]:
all_started_events_df = get_events("540334560", os.environ['SENTRY_TOKEN'])

In [None]:
all_started_events_df.to_csv('20221206_started.tsv', sep='\t', index=None)

In [None]:
all_failed_events_df = get_events('848853674', os.environ['SENTRY_TOKEN'])

In [None]:
no_disk_events_df = get_events('767302904', os.environ['SENTRY_TOKEN'])

In [None]:
sigkill_events_df = get_events('854282951', os.environ['SENTRY_TOKEN'])

## Filtering dataframes

In [None]:
unique_success = all_successful_events_df.drop_duplicates(subset=['tags.run_uuid'])
unique_started = all_started_events_df.drop_duplicates(subset=['tags.run_uuid'])
unique_started_success = unique_success.loc[unique_success['tags.run_uuid'].isin(unique_started['tags.run_uuid'])]

versions = sorted(unique_started['tags.release'].unique())[-12:]

In [None]:
group_success = unique_success.groupby([unique_success['date_minus_time'].dt.year, unique_success['date_minus_time'].dt.week])['id'].count()
grouped_started = unique_started.groupby([unique_started['date_minus_time'].dt.year, unique_started['date_minus_time'].dt.week])['id'].count()
grouped_started_success = unique_started_success.groupby([unique_started_success['date_minus_time'].dt.year, unique_started_success['date_minus_time'].dt.week])['id'].count()

In [None]:
versions_success = {}
versions_started = {}

for version in versions:
    ver_suc = unique_started_success[unique_started_success['tags.release'] == version]
    ver_sta = unique_started[unique_started['tags.release'] == version]
    
    versions_success[version] = ver_suc.groupby([ver_suc['date_minus_time'].dt.year, ver_suc['date_minus_time'].dt.week])['id'].count()
    versions_started[version] = ver_sta.groupby([ver_sta['date_minus_time'].dt.year, ver_sta['date_minus_time'].dt.week])['id'].count()

versions_success = pd.DataFrame(versions_success)
versions_started = pd.DataFrame(versions_started)


## Successful runs per week

In [None]:
group_success.index

In [None]:
sns.set_style("whitegrid")
sns.set_context("poster")
fig,ax = plt.subplots(figsize=(10, 6))
indexes = [f"{w}" for _, w in group_success.index[:-1]]
ax.bar(indexes, group_success.values[:-1], width=0.8, label='Success')
# ax.bar(indexes + 0.2, grouped_started_success.values[:-1], width=0.4,
#        label='Started+Success')

plt.ylabel("Number of successful runs")
plt.xlabel("Week of year (2020-2021)")
plt.title("Successful fMRIPrep runs per week (all versions)")
sns.despine(left=True)
plt.legend(bbox_to_anchor=(1, 1), title='Events');
plt.savefig('20221206_successful_weekly_abs.png', dpi=300, bbox_inches='tight')

In [None]:
fig,ax = plt.subplots(figsize=(10, 6))
indexes = [f"{w}" for _, w in grouped_started.index[:-1]]
ax.bar(indexes, grouped_started.values[:-1], width=0.8, label='Success')
# ax.bar(indexes + 0.2, grouped_started_success.values[:-1], width=0.4,
#        label='Started+Success')

plt.ylabel("Number of executions")
plt.xlabel("Week of year (2020)")
plt.title("Initiated fMRIPrep runs per week (all versions)")
sns.despine(left=True)
plt.legend(bbox_to_anchor=(1, 1), title='Events');
plt.savefig('20221206_all_weekly_abs.png', dpi=300, bbox_inches='tight')

In [None]:
sns.set_style("whitegrid")
fig,ax = plt.subplots(figsize=(15, 6))
ax.bar(indexes, (grouped_started_success.values[:-1] / grouped_started.values[:-1]) * 100,
       width=0.9, label='Success')

plt.ylabel("Percentage of successful runs")
plt.xlabel("Week of year (2020-2021)")
plt.title("Successful fMRIPrep runs per week (all versions)")
ax.set_ylim((0,100))
sns.despine(left=True)
plt.legend(bbox_to_anchor=(1, 1), title='Events');
plt.savefig('20221206_successful_weekly_rel.png', dpi=300, bbox_inches='tight')

In [None]:
fig,ax = plt.subplots(figsize=(16,6))
ax = versions_success.plot.bar(stacked=True, ax=ax)

sns.despine(left=True)
plt.ylabel("Number of successful runs, broken by version")
plt.xlabel("Week of year")
plt.title("fMRIPrep successful runs per week")
plt.legend(bbox_to_anchor=(1, 1));
plt.savefig('20221206_version_successful_weekly_abs.png', dpi=300, bbox_inches='tight')

In [None]:
fig,ax = plt.subplots(figsize=(16,6))
ax = ((versions_success.T / versions_started.sum(axis=1)).T * 100).plot.bar(stacked=True, ax=ax)

sns.despine(left=True)
ax.set_ylim((0,100))
plt.ylabel("Percentage of total successful runs, broken by version")
plt.xlabel("Week of year")
plt.title("fMRIPrep successful runs per week")
plt.legend(bbox_to_anchor=(1, 1));
plt.savefig('20221206_version_successful_weekly_rel_total.png', dpi=300, bbox_inches='tight')

In [None]:
fig,ax = plt.subplots(figsize=(16,6))
ax = ((versions_success.sum(axis=0) / versions_started.sum(axis=0)) * 100).plot.bar(ax=ax, width=1.0)

sns.despine(left=True)
ax.set_ylim((0,100))

plt.ylabel("Percentage of total successful runs, broken by version")
plt.xlabel("Week of year")
plt.title("fMRIPrep successful runs per week")
plt.legend(bbox_to_anchor=(1, 1));
plt.savefig('20221206_version_successful_weekly_rel_broken.png', dpi=300, bbox_inches='tight')

In [None]:
versions_success.sum(axis=0)

In [None]:
versions_started.sum(axis=0)