In [132]:
import pandas as pd
from pandas.io.json import json_normalize
import urllib, json
import numpy as np
import datetime as dt
import re
import requests

from IPython.html import widgets 
from IPython.display import display, clear_output

import plotly.plotly as py
from plotly.graph_objs import *
import plotly
from plotly.widgets import GraphWidget
import cufflinks as cf

import requests.packages.urllib3
requests.packages.urllib3.disable_warnings()

################
#  USER INPUT  #
################
GET_DATA_SINCE = '2016-01-01T00:00:00Z' #Format: YYYY-MM-DDTHH:MM:SSZ
STATE = 'all' #Indicates the state of the issues to return. Can be either open, closed, or all

#Initial variable
OWNER_NAME = 'GDP-ADMIN'
REPOSITORY_NAME = 'deepsea-tm'
ACCESS_TOKEN = '' #Put token here
URL_GITHUB_API = 'https://api.github.com/repos/'+OWNER_NAME+'/'+REPOSITORY_NAME+'/issues'

cf.set_config_file(offline=True, world_readable=True, theme='ggplot')

#Get json file from github api
def readJson(url):
    response = urllib.urlopen(url)
    return json.loads(response.read())

#Reduce pandas row with same issue number
def reduceSameRow(issues):
    #print issues
    label = issues.groupby("number")['name'].apply(lambda x: "%s" % ', '.join(x)).sort_index(ascending=False)
    label = map(lambda x:x,label)
    reduced_issues = issues.groupby((issues["number"] != issues["number"].shift()).cumsum().values).first()
    reduced_issues['Current Label(s)'] = np.array(label)
    return reduced_issues

#Get start and stop name time according to its feature
def getDurationName(feature):
    if feature=='cycle time':
        start='ready'
        stop='Need Review'
    elif feature=='recent task':
        start='ready'
        stop='now'
    elif feature=='new issues':
        start='init'
        stop='now'
        
    return (start,stop)

#Get time interval between two time in seconds
def getTimeInterval(start_time,stop_time):
    Y,M,D,h,m,s,emp = re.split(r'[-T:Z]+',start_time)
    new_start_time = dt.datetime(int(Y),int(M),int(D),int(h),int(m),int(s))
    
    Y,M,D,h,m,s,emp = re.split(r'[-T:Z]+',stop_time)
    new_stop_time = dt.datetime(int(Y),int(M),int(D),int(h),int(m),int(s))
    
    return (new_stop_time - new_start_time).total_seconds()

def fromIndonesiaTime(current_time=False):
    if current_time:
        Y,M,D,h,m,s,emp = re.split(r'[-T:Z]+',current_time)
        new_time = dt.datetime(int(Y),int(M),int(D),int(h),int(m),int(s))
    else:
        new_time = dt.datetime.now()
    
    return (new_time - dt.timedelta(hours=7)).strftime("%Y-%m-%dT%H:%M:%SZ")

#Get events from an issue
def getIssueEvents(issue_number):
    #print issue.number
    issue_event = json_normalize(readJson(URL_GITHUB_API+'/'+str(issue_number)+'/events?access_token='+ACCESS_TOKEN),None,['created_at','event'])
    labeled_issue = issue_event[(issue_event['event']=='labeled')|(issue_event['event']=='closed')]
    
    issue_events = pd.DataFrame({
            'number': issue_number,
            'created_at': labeled_issue['created_at'],
            'labels': labeled_issue['label.name'],
            'event': labeled_issue['event']})
    return issue_events

#Generate events from issues
def generateIssueEvents(issues):
    for index, row in issues.iterrows():
        issue_event = getIssueEvents(row['number'])
        if index==1:
            issue_events = pd.DataFrame(issue_event)
        else:
            issue_events = issue_events.append(issue_event, ignore_index=True)
    return issue_events

#Check issue duration
# start:
#   1. ready -> initial time from state ready
#   2. init -> initial time from the issue has created
# stop:
#   1. need review -> end time to need review
#   2. now -> end time to now
def getIssueDuration(issue,issue_events,feature):
    labeled_issue = issue_events[issue_events['number']==issue['number']]
    
    start,stop = getDurationName(feature)
    #print labeled_issue
    if stop=='Need Review':
        stop_time = labeled_issue[labeled_issue['event']=='closed']['created_at']
    elif stop=='now':
        current_time = fromIndonesiaTime(current_time=False)
        stop_time = pd.Series([current_time])
    if start=='init':
        start_time = issue['created_at']
    elif start=='ready':
        if labeled_issue[labeled_issue['labels']=='ready']['created_at'].empty==True:
            if labeled_issue[labeled_issue['labels']=='in progress']['created_at'].empty==True:
                if labeled_issue[labeled_issue['labels']=='Need Review']['created_at'].empty==True:
                    start_time = stop_time
                else:
                    start_time = labeled_issue[labeled_issue['labels']=='Need Review']['created_at']
            else:
                start_time = labeled_issue[labeled_issue['labels']=='in progress']['created_at']
        else:
            start_time = labeled_issue[labeled_issue['labels']=='ready']['created_at']
    
    start_time = start_time.values[0]
    stop_time = stop_time.values[0]
    
    return getTimeInterval(start_time,stop_time)

#Filter issue according to its feature
# feature:
#   1. cycle time
#   2. recent task
#   3. new issues
def getIssuesFeature(issues,feature):
    if feature=='cycle time':
        issues_feature = issues[issues['labels']=='Need Review']
    elif feature=='recent task':
        issues = issues[issues['labels']!='Need Review']
        issues_feature = issues[(issues['labels']=='ready') | (issues['labels']=='in progress')]
    elif feature=='new issues':
        issues_feature = issues
    
    return issues_feature.groupby((issues_feature["number"] != issues_feature["number"].shift()).cumsum().values).first()

#Filter issue according to its type
# issue_type:
#   1. all
#   2. feature
#   3. enhancement
#   4. bug
def getIssuesType(issues,issues_feature,issue_type):
    if issue_type=='all':
        issues_result = issues_feature
    else:
        issues_feature = issues_feature['number']
        issues_type = issues[issues['labels']=='type: '+issue_type]['number']
        issues_filter = issues[issues['number'].isin(pd.DataFrame(issues_feature[issues_feature.isin(issues_type)])['number'])]
        issues_result = issues_filter
    
    return issues_result.groupby((issues_result["number"] != issues_result["number"].shift()).cumsum().values).first()

#Filter issue according to its feature and its type
# feature:
#   1. cycle time
#   2. recent task
#   3. new issues
# issue_type:
#   1. all
#   2. feature
#   3. enhancement
#   4. bug
def filterIssues(issues,feature,issue_type):
    issues_feature = getIssuesFeature(issues,feature)
    issues_result = getIssuesType(issues,issues_feature,issue_type)
    
    return issues_result

#Convert second to day, hour, minute, second
def getSpecificTime(seconds):
    seconds = int(seconds)
    m, s = divmod(seconds,60)
    h, m = divmod(m,60)
    d, h = divmod(h,24) 
    
    return "%d day(s) - %d hour(s) - %d minute(s) - %d second(s)"%(d, h, m, s)

#Get issues cycle time
# issue_type:
#   1. all
#   2. feature
#   3. enhancement
#   4. bug
def getCycleTime(issues,issue_events,issue_type):
    feature = 'cycle time'
    issue_events = issue_events[issue_events['number'].isin(issues[issues['state']=='closed']['number'])]
    issues_filter = filterIssues(issue_events,feature,issue_type)['number']
    issues_filter = issues[issues['number'].isin(issues_filter)]
    
    if issues_filter.empty:
        return "Not data found"

    issues_duration = issues_filter.apply(lambda x: float(getIssueDuration(x,issue_events,feature))/3600.0,axis=1)
    issues_filter.loc[:,'Duration'] = issues_duration
    issues_filter = issues_filter.sort_values('updated_at', ascending=False)
    
    issues_cycle = pd.DataFrame({
            'Issue Title': issues_filter['title'],
            'Duration in Hour(s)': issues_filter['Duration']})
    
    return issues_cycle.set_index('Issue Title')

#Get recent task issues
# issue_type:
#   1. all
#   2. feature
#   3. enhancement
#   4. bug
def getRecentTask(issues,issue_events,issue_type):
    feature = 'recent task'
    
    issues = issues[(issues['state']=='open')&((issues['Current Label(s)'].str.contains("ready"))|(issues['Current Label(s)'].str.contains("in progress")))]
    
    issue_events = issue_events[issue_events['number'].isin(issues['number'])]
    
    issues_filter = filterIssues(issue_events,feature,issue_type)['number']
    issues_filter = issues[issues['number'].isin(issues_filter)]
    
    issues_filter = filterIssues(issue_events,feature,issue_type)['number']
    issues_filter = issues[issues['number'].isin(issues_filter)]
    
    if issues_filter.empty:
        return "Not data found"
    
    issues_duration = issues_filter.apply(lambda x: getIssueDuration(x,issue_events,feature),axis=1)
    issues_filter.loc[:,'Duration'] = issues_duration.map(lambda x: getSpecificTime(x))
    
    recent_issues = pd.DataFrame({
            'Assignee': issues_filter['assignee'].map(lambda x:x['login'] if isinstance(x, dict) else '-'),
            'Issue Title': issues_filter['title'],
            'Label(s)': issues_filter['Current Label(s)'],
            'Duration': issues_filter['Duration'],
            'State': issues_filter['state'],
            'Created At': issues_filter['created_at'],
            'Updated At': issues_filter['updated_at']})
    
    return recent_issues

#Get new issues
# issue_type:
#   1. all
#   2. feature
#   3. enhancement
#   4. bug
def getNewIssues(issues,issue_events,issue_type):
    feature = 'new issues'
    #issues = issues[issues['created_at']>fromIndonesiaTime(GET_DATA_SINCE)]
    issue_events = issue_events[issue_events['number'].isin(issues['number'])]
    
    issues_filter = filterIssues(issue_events,feature,issue_type)['number']
    issues_filter = issues[issues['number'].isin(issues_filter)]
    
    if issues_filter.empty:
        return "Not data found"
    
    issues_filter = issues_filter[issues_filter['created_at']>fromIndonesiaTime(GET_DATA_SINCE)]
    
    for ind, row in issues_filter.iterrows():
        issues_filter.loc[ind,'created_at']=issues_filter.loc[ind,'created_at'][0:10]
    
    return issues_filter['created_at'].value_counts().sort_index(ascending=False)

    issue_date = []
    issue_count = []
    
    for ind, row in issues_filter.groupby('created_at').count().iterrows():
        issue_date.append(ind)
        issue_count.append(row[0])

    new_issues = pd.DataFrame({
            'Date': pd.Series(np.array(issue_date)),
            'Count Issues': pd.Series(np.array(issue_count))})
    
    return new_issues.sort_values('Date', ascending=False)

#Get last page of issues in github api
# state:
#   1. all
#   2. close
#   3. open
def getLastPage(url):
    response = requests.get(url)
    link = response.headers.get('link', None)
    if link is None:
        last_page = 0
    else:
        first = 348
        last = link.find('>; rel="last"')
        last_page = link[first:last]
    
    return last_page

#Get all issues
# state:
#   1. all
#   2. close
#   3. open
def getAllIssues(state='all'):
    issue_github_url = URL_GITHUB_API+'?access_token='+ACCESS_TOKEN+'&since='+fromIndonesiaTime(GET_DATA_SINCE)+'&state='+state+'&per_page=100'
    last_page = int(getLastPage(issue_github_url))    
    issues = json_normalize(readJson(issue_github_url),'labels',['assignee','title','number','created_at','updated_at','state'])
    
    issues = issues[issues['created_at']>fromIndonesiaTime(GET_DATA_SINCE)]
    
    if last_page>0:
        for page in range(2,last_page):
            new_issues = json_normalize(readJson(issue_github_url+'&page='+str(page)),'labels',['assignee','title','number','created_at','updated_at','state'])
            issues = issues.append(new_issues, ignore_index=True)
            
    return issues

In [118]:
# GET DATA FROM GITHUB API

#Get all issues
issues = getAllIssues(state='all')

issues = reduceSameRow(issues)
issue_events = generateIssueEvents(issues)

print "Finished get API data, please continue to run another section..."

Finished get API data, please continue to run another section...


In [125]:
#Plot cycle time

##################
# Parameter type:
#   1. all
#   2. feature
#   3. enhancement
#   4. bug
##################

################
#  USER INPUT  #
################
cycle_time_issues = getCycleTime(issues,issue_events,'all')

#Plot as graph
if isinstance(cycle_time_issues,str):
    cycle_time_issues=pd.DataFrame({'Duration' : pd.Series([0], index=['Not Data Found'])})
    
cycle_time_issues.iplot(dimensions=[950,500],kind='bar', yTitle='Time (in Hours)', xTitle='Issue Title', title='CYCLE TIME', filename='cufflinks/categorical-bar-chart')

In [126]:
#Plot recent task

##################
# Parameter type:
#   1. all
#   2. feature
#   3. enhancement
#   4. bug
##################

################
#  USER INPUT  #
################
getRecentTask(issues,issue_events,'all')

Unnamed: 0,Assignee,Created At,Duration,Issue Title,Label(s),State,Updated At
4,-,2016-04-05T10:32:50Z,1 day(s) - 18 hour(s) - 0 minute(s) - 39 secon...,Sending email from server sometimes times out,"area: API, module: Publish, ready, type: bug",open,2016-04-05T10:33:59Z
19,darwin-gautalius,2016-03-30T04:05:20Z,1 day(s) - 21 hour(s) - 3 minute(s) - 11 secon...,New UX Implementation,"High Level, in progress, type: enhancement",open,2016-04-06T06:27:44Z
21,ahayamb,2016-03-29T14:46:02Z,8 day(s) - 13 hour(s) - 48 minute(s) - 36 seco...,Enhance Analyze's Module Performance Phase 2: ...,"area: API, module: Analyze, ready, type: feature",open,2016-04-05T07:26:04Z
112,eckyputrady,2016-02-22T00:06:39Z,3 day(s) - 0 hour(s) - 8 minute(s) - 19 second(s),Enhance Analyze's Module Performance Phase 1: ...,"area: Worker, in progress, module: Analyze, ty...",open,2016-04-05T03:35:29Z


In [133]:
#Plot new issues

##################
# Parameter type:
#   1. all
#   2. feature
#   3. enhancement
#   4. bug
##################

################
#  USER INPUT  #
################
new_issues = getNewIssues(issues,issue_events,'all')

#Plot as graph
if isinstance(new_issues,str):
    new_issues=pd.DataFrame({'Duration' : pd.Series([0], index=['Not Data Found'])})
    
new_issues.iplot(dimensions=[950,500], kind='bar', yTitle='Number of Issues', xTitle='Date', title='NEW ISSUES')

