In [114]:
from jira import JIRA
import numpy as np
import pandas as pd
import xlsxwriter

import json

with open('jira.json') as json_data_file:
    data = json.load(json_data_file)
    username = data['auth']['username']
    password = data['auth']['password']
    bugqueryadd = data['bugqueryadd']
    epicqueryadd = data['epicqueryadd']
    storyqueryadd = data['storyqueryadd']
    domain = data['domain']
    columns = data['columns']
    fields = data['fields']

if not domain:
    domain = raw_input("Jira Domain (e.g https://XXX:PPP/jira): ")

if not username:
    username = raw_input("Username: ")

if not password:
    password = getpass.getpass("Password: ")

if not columns:
    columns = raw_input("Columns (List of colums): ")

if not fields:
    fields = raw_input("Fields (List of JQL fields): ")

if not bugqueryadd:
    bugqueryadd = raw_input("List of fixversions (no quotes, commas allowed):")
    bugqueryadd = 'fixversion in (' + bugqueryadd + ')'

if not epicqueryadd:
    epicqueryadd = raw_input("List of fixversions (no quotes, commas allowed):")
    epicqueryadd = 'fixversion in (' + epicqueryadd + ')'

if not storyqueryadd:
    storyqueryadd = raw_input("List of fixversions (no quotes, commas allowed):")
    storyqueryadd = 'fixversion in (' + storyqueryadd + ')'
    
def get_jira_client(domain, username, password):
    options = {'server': domain}
    return JIRA(options, basic_auth=(username, password))

def print_jira_issue(issue):
    print (issue['key'], ":", issue['fields']['summary'])

In [115]:
jira = get_jira_client(domain, username, password)

In [116]:
epics = jira.search_issues('type=epic and ' + epicqueryadd, json_result=True, maxResults=1000, fields = fields)

In [117]:
stories = jira.search_issues('type=story and ' + storyqueryadd, json_result=True, maxResults=1000, fields = fields, expand='changelog')

In [118]:
bugs = jira.search_issues('type=bug and ' + bugqueryadd, json_result=True, maxResults=1000, fields = fields)

In [119]:
#prep the stories and epics dataframes
#fix the column names
#extract comment data 
#extract all the history from stories and build all the workflow fields

for issue in stories['issues']:
    #merge the textual fields of comments, summary
    alltext = [comment['body'] for comment in issue['fields']['comment']['comments']]
    if (issue['fields']['summary'] != None):
        alltext.append(issue['fields']['summary'])
    if (issue['fields']['description'] != None):
        alltext.append(issue['fields']['description'])
    try:
        issue['fields']['textinfo'] = ' '.join(alltext)
    except TypeError:
        print(alltext)

    #for stories only, record the important parts of change log as separate columns
    changelog = issue['changelog']
    for history in changelog['histories']:
        for item in history['items']:
            if item['field'] == 'status':
                issue['fields'][item['toString'] + ' ' + 'Set To Date'] = history['created']
                issue['fields'][item['toString'] + ' ' + 'Set By'] = history['author']['name']

for issue in epics['issues']:
    alltext = [comment['body'] for comment in issue['fields']['comment']['comments']]
    alltext.append(issue['fields']['summary'])
    #alltext.append(issue['fields']['description'])
    issue['fields']['textinfo'] = ' '.join(alltext)

epic_list = []
for epic in epics['issues']:
    epic['fields']['key'] = epic['key']
    epic_list.append(epic['fields'])

epics_df = pd.DataFrame(epic_list)

story_list = []
for story in stories['issues']:
    story['fields']['key'] = story['key']
    story_list.append(story['fields'])

stories_df = pd.DataFrame(story_list)

#replacement of custom field's by their names is only done inside the dataframe
# Fetch all fields
allfields=jira.fields()
# Make a map from field name -> field id
nameMap = {field['name']:field['id'] for field in allfields}
idMap = {field['id']:field['name'] for field in allfields}

for column in epics_df.columns:
    if ('custom' in column):
        epics_df.rename(columns={column: idMap[column]}, inplace=True)

for column in stories_df.columns:
    if ('custom' in column):
        stories_df.rename(columns={column: idMap[column]}, inplace=True)

stories_df['Team'] = stories_df['Team'].dropna().apply(lambda x: x[0].get('value') if (type(x) == list) else None)
stories_df['status'] = stories_df['status'].dropna().apply(lambda x: x.get('name'))
stories_df['reporter'] = stories_df['reporter'].dropna().apply(lambda x: x.get('name'))
stories_df['fixVersions'] = stories_df['fixVersions'].dropna().apply(lambda x: x[0].get('name')if (type(x) == dict) else None)
stories_df['Platform'] = stories_df['Platform'].dropna().apply(lambda x: x[0].get('value'))

#Change the string time fields into the python datetime structures

from datetime import datetime
from datetime import timedelta

stories_df['Approval Set To Date'] = pd.to_datetime(stories_df['Approval Set To Date'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')
stories_df['Closed Set To Date'] = pd.to_datetime(stories_df['Closed Set To Date'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')
stories_df['Code Review Set To Date'] = pd.to_datetime(stories_df['Code Review Set To Date'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')
stories_df['In Analysis Set To Date'] = pd.to_datetime(stories_df['In Analysis Set To Date'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')
stories_df['In Progress Set To Date'] = pd.to_datetime(stories_df['In Progress Set To Date'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')
stories_df['In UI/UX Set To Date'] = pd.to_datetime(stories_df['In UI/UX Set To Date'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')
stories_df['Open Set To Date'] = pd.to_datetime(stories_df['Open Set To Date'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')
stories_df['Ready for Estimation Set To Date'] = pd.to_datetime(stories_df['Ready for Estimation Set To Date'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')
stories_df['Testing Set To Date'] = pd.to_datetime(stories_df['Testing Set To Date'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')


In [120]:
#extract the sprint information from the sprints field and create a separate sprints-issue dataframe
#this is only possible once we have the stories dataframe

from functools import reduce

#Takes a list of sprints of the form:
#['com.atlassian.greenhopper.service.sprint.Sprint@1b7eb58a[id=519,rapidViewId=219,state=CLOSED,name=Knight Riders Sprint 2018 - 22,startDate=2018-05-23T21:16:06.149+05:30,endDate=2018-06-05T19:44:00.000+05:30,completeDate=2018-06-06T20:45:27.547+05:30,sequence=519]',
# 'com.atlassian.greenhopper.service.sprint.Sprint@2a28663d[id=542,rapidViewId=219,state=ACTIVE,name=Knight Riders Sprint 2018-23,startDate=2018-06-06T22:14:10.412+05:30,endDate=2018-06-19T20:42:00.000+05:30,completeDate=<null>,sequence=542]']
# and returns one list with a dictionary object for each sprint located. The object also contains the issue key
# the other is 
# we return a dictionary
def getSprintInfo(issueKey, sprint):
    #locate the part in square braces
    start = sprint.find('[') + 1
    end = sprint.find(']', start)
    dict_sprint = dict(x.split('=') for x in sprint[start:end].split(','))
    dict_sprint['issue_key'] = issueKey
    return dict_sprint

#we return a list of dictionaries, where each dictionary is a sprint paired with the issue.
def getSprints (issueKey, sprints):
    if type(sprints) == list:
        return [getSprintInfo(issueKey, sprint) for sprint in sprints]
    else:
        return []

x1 = []
for index, row in stories_df.iterrows():
    x1 = x1 + (getSprints(row['key'], row['Sprint']))

#x1 = scope_df.apply(lambda x: getSprints(x['key_story'], x['Sprint_story']), axis=1).dropna()
#x1

#y = reduce((lambda x, y: x + y), x1)

sprints_df =  pd.DataFrame(x1)
sprints_df['endDate'] = pd.to_datetime(sprints_df['endDate'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')
sprints_df['startDate'] = pd.to_datetime(sprints_df['startDate'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')
sprints_df['completeDate'] = pd.to_datetime(sprints_df['completeDate'], format='%Y-%m-%dT%H:%M:%S.%f', errors='coerce')

In [121]:
#prep up the bugs dataframe

bugs_list = []
for bug in bugs['issues']:
    bug['fields']['key'] = bug['key']
    for issuelink in bug['fields']['issuelinks']:
        try:
            if ((issuelink['outwardIssue']['fields']['issuetype']['name'] == 'Story') and 
            ((issuelink['type']['outward'] == 'associated with') or 
             (issuelink['type']['outward'] == 'relates to'))):
                bug['fields']['linkKey'] = issuelink['outwardIssue']['key']
                bug['fields']['linktype'] = issuelink['type']['outward']
        except:
            #print(issuelink)
            if ((issuelink['inwardIssue']['fields']['issuetype']['name'] == 'Story') and 
            ((issuelink['type']['inward'] == 'associated with') or 
             (issuelink['type']['inward'] == 'relates to'))):
                bug['fields']['linkKey'] = issuelink['inwardIssue']['key']
                bug['fields']['linktype'] = issuelink['type']['inward']
    #add each bug to bug list after updating the fields
    bugs_list.append(bug['fields'])
        
bugs_df = pd.DataFrame(bugs_list)

for column in bugs_df.columns:
    if ('custom' in column):
        bugs_df.rename(columns={column: idMap[column]}, inplace=True)

In [122]:
#first merge - create the epics and stories merge
scope_df = pd.merge(epics_df, stories_df, how='right', on=None, left_on='key', right_on='Epic Link',
         left_index=False, right_index=False, sort=True,
         suffixes=('_epic', '_story'), copy=True, indicator=False,
         validate=None)

#insert a column for jira link
scope_df['story_link'] = '=HYPERLINK("' + domain + '/browse/' + scope_df['key_story'] + '","' + scope_df['key_story'] + '")'

In [123]:
#Combine the sprints with the epics and stories dataframe and we can then drop the duplicate issue_key field.

sprintsWithStoriesAndEpics_df = pd.merge(scope_df, sprints_df, how='left', on=None, left_on='key_story', right_on='issue_key',
         left_index=False, right_index=False, 
         suffixes=('_story', '_sprint'),
         copy=True, indicator=False,
         validate=None).drop(columns = ['issue_key'])

In [132]:
#After combining sprints with the stories and epics we can now filter out records where the 
#end date of the sprint was prior to our window of interest

sprintsWithStoriesAndEpics_df = sprintsWithStoriesAndEpics_df[sprintsWithStoriesAndEpics_df['endDate'] > datetime(2018, 4, 3)]
sprintsWithStoriesAndEpics_df = sprintsWithStoriesAndEpics_df[sprintsWithStoriesAndEpics_df['endDate'] < datetime(2018, 7, 5)]

In [133]:
#Lets do some basic statistics
#get the number of unique stories - note that these stories are duplicated because they are part of multiple sprints
#in some cases.
# also this is the stories that were worked on and not necessariy finished. They were simply inside the sprints
sprintsWithStoriesAndEpics_df['key_story'].unique().size

357

In [134]:
sprintsWithStoriesAndEpics_df['key_epic'].unique().size

41

In [149]:
#Lets eliminate the stories which are not closed yet and then count the unique stories.
sprintsWithStoriesAndEpics_df = sprintsWithStoriesAndEpics_df[sprintsWithStoriesAndEpics_df['status_story'] == 'Closed']
sprintsWithStoriesAndEpics_df['key_story'].unique().shape

#Note if there is a difference from previous count to check if all stories were closed.

(357,)

In [150]:
#Calculate the number of stories each team worked on, number of points each team covered, number of bugs
#each team fixed, number of features that were worked on.

#first add up the number of sprints a story is in
#sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_df.drop_duplicates(subset = 'key_story')

sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_df[['Team_story', 'key_story', 'Story Points', 'name']].copy()
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy.groupby(['Team_story']).agg({'key_story':['count'], 'Story Points':['sum'], 'name':['nunique']})

sprintsWithStoriesAndEpics_dfCopy.columns

sprintsWithStoriesAndEpics_dfCopy['average velocity'] = sprintsWithStoriesAndEpics_dfCopy['Story Points']['sum']/sprintsWithStoriesAndEpics_dfCopy['name']['nunique']

sprintsWithStoriesAndEpics_dfCopy

Unnamed: 0_level_0,key_story,Story Points,name,average velocity
Unnamed: 0_level_1,count,sum,nunique,Unnamed: 4_level_1
Team_story,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Admin Console Builder,5,42.0,1,42.0
Admin Console Management,1,2.0,1,2.0
Cache Back,95,653.0,7,93.285714
Core,8,27.0,1,27.0
End User Dashboard,20,118.0,1,118.0
End User Responses,11,93.0,1,93.0
Healers,147,860.0,8,107.5
Karma,60,517.0,8,64.625
Knight Riders,92,607.0,9,67.444444
Mission Control,1,8.0,1,8.0


In [303]:
#Calculate the spillover stories per team

#first add up the number of sprints a story is in
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_df[['Team_story', 'key_story', 'name', 'startDate', 'Open Set To Date']].copy()

sprintsWithStoriesAndEpics_dfCopy['sprintLeadTime'] = (sprintsWithStoriesAndEpics_dfCopy['startDate'] - sprintsWithStoriesAndEpics_dfCopy['Open Set To Date']).dt.days 
sprintsWithStoriesAndEpics_dfCopy['sprintCommitment'] = sprintsWithStoriesAndEpics_dfCopy['sprintLeadTime'] > -2
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy[sprintsWithStoriesAndEpics_dfCopy['sprintCommitment']].sort_values(by='key_story')

sprintsWithStoriesAndEpics_dfCopy= sprintsWithStoriesAndEpics_dfCopy.drop(columns = ['startDate', 'Open Set To Date', 'sprintCommitment', 'sprintLeadTime'])

sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy.groupby(['Team_story', 'key_story']).agg(['count'])

#reset index since we need to do another groupby
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy.reset_index()

sprintsWithStoriesAndEpics_dfCopy['spillover sprint count'] = sprintsWithStoriesAndEpics_dfCopy['name']['count']
sprintsWithStoriesAndEpics_dfCopy= sprintsWithStoriesAndEpics_dfCopy.drop(columns = ['name'])
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy.groupby(['Team_story', 'spillover sprint count']).agg(['count'])
sprintsWithStoriesAndEpics_dfCopy.groupby(level=0).apply(max)
sprintsWithStoriesAndEpics_dfCopy

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Unnamed: 0_level_0,Unnamed: 1_level_0,key_story
Unnamed: 0_level_1,Unnamed: 1_level_1,count
Team_story,spillover sprint count,Unnamed: 2_level_2
Admin Console Builder,1,5
Admin Console Management,1,1
Cache Back,1,15
Cache Back,2,9
Cache Back,3,6
Cache Back,4,3
Cache Back,6,1
Core,1,7
End User Dashboard,1,16
End User Responses,1,11


In [304]:
#reset index since we need to do another groupby
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy.reset_index()

sprintsWithStoriesAndEpics_dfCopy['story count'] = sprintsWithStoriesAndEpics_dfCopy['key_story']['count']
sprintsWithStoriesAndEpics_dfCopy= sprintsWithStoriesAndEpics_dfCopy.drop(columns = ['key_story'])

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


In [305]:
#lets calculate the weighted average
sprintsWithStoriesAndEpics_dfCopy.groupby(['Team_story']).apply(lambda g: np.average(g['spillover sprint count'], weights=g['story count']))

Team_story
Admin Console Builder       1.000000
Admin Console Management    1.000000
Cache Back                  2.029412
Core                        1.000000
End User Dashboard          1.000000
End User Responses          1.000000
Healers                     2.636364
Karma                       2.500000
Knight Riders               1.628571
Mission Control             1.000000
Optimus                     1.312500
Seal Team                   1.709677
Slide Tackle                1.400000
The Finer Tings Club        1.371429
dtype: float64

In [None]:
#join bugs with sprints to determine how many bugs were attached to sprints and hence part of the relevant period

In [None]:
#find total number of bugs created within sprints and compare with bugs created in total within the period. Note we need 
#to focus on created bugs and not the ones which were fixed.

#also need to compare bugs found during regression with the sprint bugs

#bugs resolved but not closed

#qa and sprints

In [356]:
#combine the bugs with the stories dataframe
#find the number of bugs for each story point

storiesWithBugs_df = pd.merge(bugs_df, stories_df, how='right', on=None, left_on='linkKey', right_on='key',
         left_index=False, right_index=False, sort=True,
         suffixes=('_bug', '_story'), copy=True, indicator=False,
         validate=None)

storiesWithBugs_df = storiesWithBugs_df[['Team_story', 'key_bug', 'Code Review Set By', 'reporter_story',
                                         'Story Points_story']].copy().dropna()
storiesWithBugs_df = storiesWithBugs_df.groupby(['Team_story', 'Code Review Set By', 'reporter_story', 'Story Points_story']).agg(['count'])
storiesWithBugs_df = storiesWithBugs_df.reset_index()
storiesWithBugs_df['bugs per story point'] = storiesWithBugs_df['key_bug']['count']/storiesWithBugs_df['Story Points_story']
storiesWithBugs_df = storiesWithBugs_df.drop(columns = ['Story Points_story', 'key_bug'])
storiesWithBugs_df = storiesWithBugs_df.groupby(['Team_story', 'Code Review Set By', 'reporter_story']).agg(['mean'])
storiesWithBugs_df = storiesWithBugs_df.reset_index()


storiesWithBugs_df['avg bugs per story point'] = storiesWithBugs_df['bugs per story point']['mean']
storiesWithBugs_df= storiesWithBugs_df.drop(columns = ['bugs per story point'])

storiesWithBugs_df = storiesWithBugs_df.sort_values(by='avg bugs per story point', ascending=False)
storiesWithBugs_df

  obj = obj._drop_axis(labels, axis, level=level, errors=errors)


Unnamed: 0,Team_story,Code Review Set By,reporter_story,avg bugs per story point
,,,,
,,,,
16.0,Knight Riders,jason.pierce,archana.josaitis,1.230769
12.0,Knight Riders,durul.dalkanat,archana.josaitis,0.675
9.0,Karma,pradeep.sant,madhav.kumbhar,0.625
14.0,Knight Riders,jahangir.iqbal,archana.josaitis,0.6
8.0,Karma,pankaja.chaudhary,shekhar.sukhadeve,0.461538
33.0,The Finer Tings Club,jamie.nola,addisu.alemu,0.444231
25.0,Seal Team,eka.renardi,addisu.alemu,0.428571
28.0,Seal Team,eric.herring,eka.renardi,0.4


In [262]:
#find the stories which were inserted in sprints after sprints started

sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_df[['Team_story', 'startDate', 'Open Set To Date', 'reporter_story', 'Story Points', 'key_story', 'name']].copy()

sprintsWithStoriesAndEpics_dfCopy['sprintLeadTime'] = (sprintsWithStoriesAndEpics_dfCopy['startDate'] - sprintsWithStoriesAndEpics_dfCopy['Open Set To Date']).dt.days 
sprintsWithStoriesAndEpics_dfCopy['sprintCommitment'] = sprintsWithStoriesAndEpics_dfCopy['sprintLeadTime'] > -2
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy[sprintsWithStoriesAndEpics_dfCopy['sprintCommitment'] != True].sort_values(by='key_story')
#sprintsWithStoriesAndEpics_dfCopy['key_story'].unique().size
sprintsWithStoriesAndEpics_dfCopy

Unnamed: 0,Team_story,startDate,Open Set To Date,reporter_story,Story Points,key_story,name,sprintLeadTime,sprintCommitment
217,Healers,2018-03-28 06:50:52.976,2018-04-11 08:04:35,archana.josaitis,13.0,AC-20679,End User Responses - 18,-15.0,False
208,Karma,2018-03-28 06:50:52.976,2018-03-30 12:31:01,madhav.kumbhar,13.0,AC-20821,End User Responses - 18,-3.0,False
196,Karma,2018-04-11 09:19:32.832,2018-04-18 06:10:08,madhav.kumbhar,8.0,AC-20823,Team Karma Sprint 2018 - 19,-7.0,False
431,End User Dashboard,2018-03-28 15:01:01.602,2018-04-04 12:55:26,alize.chene,3.0,AC-23323,EU Dashboard 2018 - 18,-7.0,False
965,Knight Riders,2018-05-23 15:46:06.149,2018-06-01 14:31:05,alize.chene,5.0,AC-24312,Knight Riders Sprint 2018 - 22,-9.0,False
969,Healers,2018-05-09 05:21:08.428,2018-05-17 10:39:55,alize.chene,3.0,AC-24342,Team Healers Sprint 2018 - 21,-9.0,False
967,Karma,2018-05-22 05:12:20.755,2018-06-01 12:38:03,alize.chene,3.0,AC-24343,Team Karma Sprint 2018 - 22,-11.0,False
1345,Seal Team,2018-03-28 14:01:54.766,2018-03-30 15:30:12,eric.herring,0.0,AC-24680,Seal Team Sprint 2018 - 18,-3.0,False
943,The Finer Tings Club,2018-03-28 14:17:11.877,2018-04-03 13:24:42,ashmita.kumar,3.0,AC-24923,AC Mgmt Sprint 2018 - 18a,-6.0,False
1023,Karma,2018-03-28 06:50:52.976,2018-03-29 17:34:32,archana.josaitis,1.0,AC-25075,End User Responses - 18,-2.0,False


In [263]:
sprintsWithStoriesAndEpics_dfCopy[sprintsWithStoriesAndEpics_dfCopy.duplicated(subset='key_story')]['key_story'].unique().size

17

In [264]:
#drop unnecessary columns before we do stats
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy.drop(columns=['startDate', 'Open Set To Date', 'Story Points', 'name', 'sprintCommitment'])
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy.groupby(['reporter_story', 'Team_story']).agg({'sprintLeadTime':['mean'], 'key_story':['count']})

#we must filter the noise
#sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy.reset_index()
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy[sprintsWithStoriesAndEpics_dfCopy['key_story']['count'] > 5]
sprintsWithStoriesAndEpics_dfCopy

Unnamed: 0_level_0,Unnamed: 1_level_0,sprintLeadTime,key_story
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,count
reporter_story,Team_story,Unnamed: 2_level_2,Unnamed: 3_level_2
addisu.alemu,Cache Back,-23.428571,7
addisu.alemu,Seal Team,-6.903226,31
addisu.alemu,The Finer Tings Club,-4.875,8
archana.josaitis,Cache Back,-11.428571,7
archana.josaitis,Healers,-13.454545,11
archana.josaitis,Knight Riders,-23.290323,31
eka.renardi,Seal Team,-11.0,6
jason.cao,Seal Team,-9.555556,9
jayanth.prathipati,Healers,-7.833333,6
seth.sobhani,Optimus,-9.6,6


In [14]:
#changes to description of story after 

In [15]:
#number of bugs found post sprints are over that need to be fixed in release
#bug creation date > end 

In [300]:
#number of issues left in Testing and Testing lead time inside sprint
#find the issues that are still in Testing before the end of their sprint. Only include issues that were committed 
#to in the beginning of the sprint.

sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_df[['Team_story', 'startDate', 'endDate', 'Testing Set To Date', 'Approval Set To Date', 'Approval Set By', 'Open Set To Date', 'key_story']].copy()
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy.dropna()
sprintsWithStoriesAndEpics_dfCopy['Testing Lead Time'] = (sprintsWithStoriesAndEpics_dfCopy['endDate'] - sprintsWithStoriesAndEpics_dfCopy['Testing Set To Date']).dt.days

sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy[sprintsWithStoriesAndEpics_dfCopy['Testing Lead Time'] >= 2] 
sprintsWithStoriesAndEpics_dfCopy['Ready for Approval Delay'] = (sprintsWithStoriesAndEpics_dfCopy['Approval Set To Date'] - sprintsWithStoriesAndEpics_dfCopy['endDate']).dt.days

sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy[sprintsWithStoriesAndEpics_dfCopy['Ready for Approval Delay'] >= 2]

sprintsWithStoriesAndEpics_dfCopy['sprintLeadTime'] = (sprintsWithStoriesAndEpics_dfCopy['startDate'] - sprintsWithStoriesAndEpics_dfCopy['Open Set To Date']).dt.days 
sprintsWithStoriesAndEpics_dfCopy['sprintCommitment'] = sprintsWithStoriesAndEpics_dfCopy['sprintLeadTime'] > -2
#sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy[sprintsWithStoriesAndEpics_dfCopy['sprintCommitment']].sort_values(by='key_story')
sprintsWithStoriesAndEpics_dfCopy['Testing Time'] = sprintsWithStoriesAndEpics_dfCopy['Testing Lead Time'] + sprintsWithStoriesAndEpics_dfCopy['Ready for Approval Delay']
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy.sort_values(by='key_story').drop(columns = ['Ready for Approval Delay', 'startDate', 'endDate', 'Testing Set To Date', 'Approval Set To Date', 'Open Set To Date', 'sprintLeadTime', 'sprintCommitment'])
sprintsWithStoriesAndEpics_dfCopy['key_story'].unique().size


42

In [299]:
sprintsWithStoriesAndEpics_dfCopy.groupby(['Approval Set By', 'Team_story']).agg({'key_story':['count'], 'Testing Lead Time':['mean'], 'Testing Time':['mean']})

Unnamed: 0_level_0,Unnamed: 1_level_0,key_story,Testing Lead Time,Testing Time
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,mean
Approval Set By,Team_story,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
amit.jagtap,Healers,7,11.571429,18.428571
bajarang.joshilkar,Healers,1,15.0,30.0
bajarang.joshilkar,Karma,3,9.0,20.666667
cgacek,Optimus,3,5.0,11.0
cgacek,The Finer Tings Club,2,11.5,25.0
fred.briden,Optimus,1,5.0,18.0
harshada.pacharne,Healers,1,4.0,6.0
harshada.pacharne,Karma,1,5.0,15.0
harshitha.balabadruni,Knight Riders,6,18.166667,27.833333
lakshman.patil,Admin Console Builder,1,11.0,13.0


In [314]:
#number of issues left in Approval and Approval lead time inside sprint

sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_df[['Team_story', 'startDate', 'endDate', 'Approval Set To Date', 'Approval Set By', 'Closed Set By', 'Open Set To Date', 'Closed Set To Date', 'key_story']].copy()
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy.dropna()

sprintsWithStoriesAndEpics_dfCopy['Approval Lead Time'] = (sprintsWithStoriesAndEpics_dfCopy['endDate'] - sprintsWithStoriesAndEpics_dfCopy['Approval Set To Date']).dt.days

sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy[sprintsWithStoriesAndEpics_dfCopy['Approval Lead Time'] >= 2] 
sprintsWithStoriesAndEpics_dfCopy['Close Delay'] = (sprintsWithStoriesAndEpics_dfCopy['Closed Set To Date'] - sprintsWithStoriesAndEpics_dfCopy['endDate']).dt.days

sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy[sprintsWithStoriesAndEpics_dfCopy['Close Delay'] >= 2]

sprintsWithStoriesAndEpics_dfCopy['sprintLeadTime'] = (sprintsWithStoriesAndEpics_dfCopy['startDate'] - sprintsWithStoriesAndEpics_dfCopy['Open Set To Date']).dt.days 
sprintsWithStoriesAndEpics_dfCopy['sprintCommitment'] = sprintsWithStoriesAndEpics_dfCopy['sprintLeadTime'] > -2
sprintsWithStoriesAndEpics_dfCopy = sprintsWithStoriesAndEpics_dfCopy[sprintsWithStoriesAndEpics_dfCopy['sprintCommitment']].sort_values(by='key_story')
sprintsWithStoriesAndEpics_dfCopy['Approval Time'] = sprintsWithStoriesAndEpics_dfCopy['Approval Lead Time'] + sprintsWithStoriesAndEpics_dfCopy['Close Delay']

sprintsWithStoriesAndEpics_dfCopy


Unnamed: 0,Team_story,startDate,endDate,Approval Set To Date,Approval Set By,Closed Set By,Open Set To Date,Closed Set To Date,key_story,Approval Lead Time,Close Delay,sprintLeadTime,sprintCommitment,Approval Time
200,Karma,2018-06-05 08:37:11.228,2018-06-19 06:05:00,2018-06-15 10:17:59,harshada.pacharne,archana.josaitis,2018-04-18 06:10:08,2018-06-25 20:52:07,AC-20823,3,6,48,True,9
184,Karma,2018-06-05 08:37:11.228,2018-06-19 06:05:00,2018-06-15 10:12:33,harshada.pacharne,archana.josaitis,2018-02-09 06:35:40,2018-06-26 03:12:36,AC-20826,3,6,116,True,9
729,Admin Console Builder,2018-03-28 06:36:55.255,2018-04-11 05:39:00,2018-03-16 11:45:34,harshada.pacharne,archana.josaitis,2018-02-20 10:23:30,2018-04-13 21:14:37,AC-23282,25,2,35,True,27
159,Healers,2018-05-23 07:04:25.574,2018-06-05 17:07:00,2018-05-25 10:53:32,amit.jagtap,archana.josaitis,2018-04-25 08:47:27,2018-06-11 02:39:20,AC-23495,11,5,27,True,16
155,Healers,2018-05-23 07:04:25.574,2018-06-05 17:07:00,2018-05-25 10:54:31,amit.jagtap,archana.josaitis,2018-04-25 08:48:06,2018-06-18 17:26:28,AC-23496,11,13,27,True,24
151,Healers,2018-05-23 07:04:25.574,2018-06-05 17:07:00,2018-05-25 10:55:23,amit.jagtap,archana.josaitis,2018-04-25 08:48:08,2018-06-11 15:53:04,AC-23497,11,5,27,True,16
978,The Finer Tings Club,2018-04-11 16:04:50.701,2018-04-24 19:30:00,2018-04-10 20:15:05,vasily.smirnov,mathios.dejene,2018-03-09 16:33:45,2018-04-27 20:56:14,AC-24328,13,3,32,True,16
970,Healers,2018-05-23 07:04:25.574,2018-06-05 17:07:00,2018-05-31 10:20:56,amit.jagtap,archana.josaitis,2018-05-17 10:39:55,2018-06-08 04:09:11,AC-24342,5,2,5,True,7
258,Healers,2018-05-23 07:04:25.574,2018-06-05 17:07:00,2018-05-29 13:42:16,bajarang.joshilkar,archana.josaitis,2018-04-11 06:33:53,2018-06-13 04:44:47,AC-24693,7,7,42,True,14
944,The Finer Tings Club,2018-04-04 14:03:24.285,2018-04-10 12:12:00,2018-04-03 15:37:25,vasily.smirnov,addisu.alemu,2018-04-03 13:24:42,2018-04-19 13:53:47,AC-24923,6,9,1,True,15


In [315]:
sprintsWithStoriesAndEpics_dfCopy.groupby(['Closed Set By', 'Team_story']).agg({'key_story':['count'], 'Approval Lead Time':['mean'], 'Approval Time':['mean']})

Unnamed: 0_level_0,Unnamed: 1_level_0,key_story,Approval Lead Time,Approval Time
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,mean
Closed Set By,Team_story,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
addisu.alemu,Seal Team,16,10.5,20.1875
addisu.alemu,The Finer Tings Club,1,6.0,15.0
archana.josaitis,Admin Console Builder,1,25.0,27.0
archana.josaitis,Cache Back,3,8.666667,20.333333
archana.josaitis,Healers,19,16.105263,27.473684
archana.josaitis,Karma,3,4.0,10.0
archana.josaitis,Knight Riders,1,3.0,5.0
daniel.uribe,Healers,2,9.0,17.0
eric.herring,Cache Back,2,13.0,35.0
jason.cao,Seal Team,2,12.0,28.0


In [38]:
#this is a list of strings
#scope_df['textinfo'] = scope_df['textinfo_story'] + scope_df['textinfo_epic']
scope_df['textinfo'] = scope_df['textinfo_story']

In [39]:
scope_df['Invalid AC'] = scope_df['textinfo'].str.contains('Acceptance', case = False, regex = False) == False

In [40]:
invalid_ac_df = scope_df[['reporter_story', 'Invalid AC']].copy()

In [41]:
#produce statistics for valid/invalid AC
invalid_ac_df.groupby(['reporter_story']).sum().sort_values(by=['Invalid AC'], ascending=False).head()

Unnamed: 0_level_0,Invalid AC
reporter_story,Unnamed: 1_level_1
addisu.alemu,12.0
jason.cao,8.0
eka.renardi,5.0
anna.pikhalenko,5.0
jayanth.prathipati,3.0


In [26]:
dataset1 = pd.DataFrame(scope_df, columns = ['Epic Name', 'textinfo'])
dataset1.dropna(inplace=True)

In [27]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB

#create document vectors

vectorizer = TfidfVectorizer()
vectors_ds1 = vectorizer.fit_transform(dataset1.textinfo)

In [28]:
#split this into training and test data
from sklearn.model_selection import train_test_split

predictors = vectors_ds1
targets = dataset1['Epic Name']

pred_train, pred_test, tar_train, tar_test  =   train_test_split(predictors, targets, test_size=.20)

In [29]:
clf = MultinomialNB()
clf.fit(pred_train, tar_train)

MultinomialNB(alpha=1.0, class_prior=None, fit_prior=True)

In [30]:
predictions = clf.predict(pred_test)

In [31]:
import sklearn.metrics

sklearn.metrics.confusion_matrix(tar_test,predictions)
sklearn.metrics.accuracy_score(tar_test, predictions)

0.6153846153846154

In [32]:
pred_train.size

13670

In [None]:
vectors_ds1