In [1]:
import calendar
import datetime as dt
import dateutil.parser as dp
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import os

from IPython.display import display, Markdown
from functools import partial
from oauth2client.service_account import ServiceAccountCredentials
from googleapiclient.discovery import build

# Parameters

In [2]:
YEAR_FILTER = '2021'
EXPECTED_PARSERS_PER_FTE = 2.5
INTERACTIVE_CHARTS = True

# Notebook Options

In [3]:
if INTERACTIVE_CHARTS:
    %matplotlib widget
else:
    %matplotlib inline

pd.set_option('display.max_rows', None)

# Setting up key, token and Service Account

In [4]:
with open('keys.json') as file:
    keys = json.load(file)
    api_key = keys['trello']['api_key']
    token = keys['trello']['token']
    spreadsheet_key = keys['sheet']['spreadsheet_key']
    sa_file = keys['sheet']['sa_file']

# Extracting Sheet Data

In [5]:
def convert_sheet_date(sheet_date):
    conversion_table = {
        'January': '01',
        'February': '02',
        'March': '03',
        'April': '04',
        'May': '05',
        'June': '06',
        'July': '07',
        'August': '08',
        'September': '09',
        'October': '10',
        'November': '11',
        'December': '12'
    }
    
    month = conversion_table[sheet_date.split('-')[0].strip()]
    year = sheet_date.split('-')[1].strip()
    
    return year + '-' + month

In [6]:
def get_consolidated_sheet():
    scope = ['https://spreadsheets.google.com/feeds']
    credentials = ServiceAccountCredentials.from_json_keyfile_name(sa_file, scope)
    service = build('sheets', 'v4', credentials=credentials)

    SAMPLE_RANGE_NAME = 'Consolidated'
    sheet = service.spreadsheets()
    result = sheet.values().get(spreadsheetId=spreadsheet_key,
                                range=SAMPLE_RANGE_NAME).execute()
    values = result.get('values', [])
    
    return values

In [7]:
def get_total_by_date():
    table = get_consolidated_sheet()
    
    header_row = table[0]
    columns = [value for index, value in enumerate(header_row) if value] 
    converted_columns = [ convert_sheet_date(x) for x in columns[1:]]
    
    total_row = [value for value in table if value and value[0] == 'Total'][0]
    # getting only the executed column
    total = [value for index, value in enumerate(total_row) if index % 3 == 0]
    
    # getting only the planned column
    planned = [total_row[i] for i in range(2, len(total_row), 3)]

    total.pop(0)
    
    total_by_date = [ [converted_columns[index], value] for index, value in enumerate(total)]
    
    return total_by_date

In [8]:
def get_planned_by_date():
    table = get_consolidated_sheet()
    
    header_row = table[0]
    columns = [value for index, value in enumerate(header_row) if value] 
    converted_columns = [ convert_sheet_date(x) for x in columns[1:]]
    
    total_row = [value for value in table if value and value[0] == 'Total'][0]
    
    # getting only the planned column
    planned = [total_row[i] for i in range(2, len(total_row), 3)]

    planned.pop(0)
    
    planned_by_date = [ [converted_columns[index], value] for index, value in enumerate(planned)]
    
    return planned_by_date

In [9]:
def create_ftes_dataframe():
    # from deprecated sheet, it will never be changed
    old_data = [
        ['2020-03', 4.0],
        ['2020-04', 6.15],
        ['2020-05', 6.25],
        ['2020-06', 6.0],
        ['2020-07', 3.65],
        ['2020-08', 4.57],
        ['2020-09', 4.52],
        ['2020-10', 4.9],
        ['2020-11', 4.7]
    ]
    
    new_data = get_total_by_date()
    
    full_data = old_data + new_data
    
    total_fte = pd.DataFrame(full_data, columns=['month_base', 'fte'])
    
    total_fte['month_base'] = pd.to_datetime(total_fte['month_base'])
    total_fte['month'] = pd.PeriodIndex(total_fte['month_base'], freq='M')
    total_fte['quarter'] = pd.PeriodIndex(total_fte['month_base'], freq='Q')
    total_fte['fte'] = total_fte['fte'].astype(float)
    
    del total_fte['month_base']
    
    return total_fte

In [10]:
def create_planned_ftes_dataframe():
    data = get_planned_by_date()

    planned_fte = pd.DataFrame(data, columns=['month_base', 'planned_fte'])
    
    planned_fte['month_base'] = pd.to_datetime(planned_fte['month_base'])
    planned_fte['month'] = pd.PeriodIndex(planned_fte['month_base'], freq='M')
    planned_fte['quarter'] = pd.PeriodIndex(planned_fte['month_base'], freq='Q')
    planned_fte['planned_fte'] = planned_fte['planned_fte'].astype(float)
    
    del planned_fte['month_base']
    
    return planned_fte

In [11]:
total_fte = create_ftes_dataframe()

In [12]:
total_fte

Unnamed: 0,fte,month,quarter
0,4.0,2020-03,2020Q1
1,6.15,2020-04,2020Q2
2,6.25,2020-05,2020Q2
3,6.0,2020-06,2020Q2
4,3.65,2020-07,2020Q3
5,4.57,2020-08,2020Q3
6,4.52,2020-09,2020Q3
7,4.9,2020-10,2020Q4
8,4.7,2020-11,2020Q4
9,4.9,2020-12,2020Q4


In [13]:
planned_ftes = create_planned_ftes_dataframe()
planned_ftes

Unnamed: 0,planned_fte,month,quarter
0,5.65,2020-12,2020Q4
1,5.15,2021-01,2021Q1
2,6.05,2021-02,2021Q1
3,6.4,2021-03,2021Q1
4,5.29,2021-04,2021Q2
5,5.05,2021-05,2021Q2
6,6.27,2021-06,2021Q2
7,6.27,2021-07,2021Q3
8,4.52,2021-08,2021Q3
9,6.2,2021-09,2021Q3


# Extracting Trello Data

In [14]:
def get_data_from_trello_api(url):
    headers = {
       "Accept": "application/json"
    }
    
    query = {
       'key': api_key,
       'token': token
    }
    
    response = requests.request("GET", url, headers=headers, params=query)
    
    if response.status_code > 299:
        raise Exception('Something went wrong with the request {0} '\
                        'with status: {1}'.format(url, response.status_code))
    
    return json.loads(response.text)

In [15]:
def read_json(json_name):
    with open(json_name) as file:
        json_opened = json.load(file)
           
    return json_opened 

In [16]:
def write_json(json_name, content_to_write):
    with open(json_name, 'w') as json_file:
        json.dump(content_to_write, json_file, indent=4, sort_keys=True)

In [17]:
def generate_timestamp():
    current_timestamp = dt.datetime.now().strftime('%d-%m-%Y')
    
    return current_timestamp

In [18]:
def create_folder_for_dumping(name, current_timestamp):
    if not os.path.exists('dumps'):
        os.mkdir('dumps')
    
    if not os.path.exists('dumps/' + name):
        os.mkdir('dumps/' + name)
    
    if not os.path.exists('dumps/' + name + '/' + current_timestamp):
        os.mkdir('dumps/' + name + '/' + current_timestamp)

In [19]:
def get_data_from_dump(board_name, dump_name, timestamp):
    path = 'dumps/' + board_name + '/' + timestamp + '/dump_' + dump_name + '.json'
    
    file_opened = read_json(path)
    
    return file_opened

# Acessing API

In [20]:
def get_board_by_name(board_name):
    boards_url = f'https://api.trello.com/1/search?query={board_name}'
    board = get_data_from_trello_api(boards_url)
    
    return board

In [21]:
def get_lists_by_board(board_name, board_id, current_timestamp):
    lists_url = 'https://api.trello.com/1/boards/{0}/lists'
    lists = get_data_from_trello_api(lists_url.format(board_id))
          
    return lists

In [22]:
def get_custom_fields_by_board(board_name, board_id, current_timestamp):
    custom_fields_url = 'https://api.trello.com/1/boards/{0}/customFields'
    
    custom_fields = get_data_from_trello_api(custom_fields_url.format(board_id))
    
    return custom_fields

In [23]:
def get_cards_by_board(board_name, board_id, current_timestamp):
    cards_on_board_url = 'https://api.trello.com/1/boards/{0}/cards/?customFieldItems=true'
    board_cards = get_data_from_trello_api(cards_on_board_url.format(board_id))
    
    return board_cards

In [24]:
def get_members_by_board(board_name, board_id, current_timestamp):
    members_on_board_url = 'https://api.trello.com/1/boards/{0}/members'
    board_members = get_data_from_trello_api(members_on_board_url.format(board_id))
    
    return board_members

# Creating dumps

In [25]:
def create_boards_dump(board_name, current_timestamp):
    boards = get_board_by_name(board_name)

    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_board.json'
        
    write_json(name_of_dump, boards)
    
    return name_of_dump

In [26]:
def get_id_board_from_dump(board_name, current_timestamp):
    board = get_board_by_name_from_dump(board_name, current_timestamp)
    id_board = board['boards'][0]['id']

    return id_board

In [27]:
def create_lists_dump(board_name, board_id, current_timestamp):
    lists = get_lists_by_board(board_name, board_id, current_timestamp)
    
    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_lists.json'
    write_json(name_of_dump, lists)
        
    return name_of_dump

In [28]:
def create_custom_fields_dump(board_name, board_id, current_timestamp):
    custom_fields = get_custom_fields_by_board(board_name, board_id, current_timestamp)
        
    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_custom_field.json'
    write_json(name_of_dump, custom_fields)
        
    return name_of_dump

In [29]:
def create_cards_dump(board_name, board_id, current_timestamp):
    board_cards = get_cards_by_board(board_name, board_id, current_timestamp)    
       
    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_cards.json'
    
    write_json(name_of_dump, board_cards)
            
    return name_of_dump

In [30]:
def create_members_dump(board_name, board_id, current_timestamp):
    board_members = get_members_by_board(board_name, board_id, current_timestamp)    
       
    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_members.json'
    
    write_json(name_of_dump, board_members)
            
    return name_of_dump

In [31]:
def create_dumps_by_name(board_name, current_timestamp):
    create_folder_for_dumping(board_name, current_timestamp)
    
    create_boards_dump(board_name, current_timestamp)
    
    board_id = get_id_board_from_dump(board_name, current_timestamp)
    
    create_lists_dump(board_name, board_id, current_timestamp)
    create_custom_fields_dump(board_name, board_id, current_timestamp)
    create_cards_dump(board_name, board_id, current_timestamp)
    create_members_dump(board_name, board_id, current_timestamp)

# Getting data from dumps

In [32]:
def get_board_by_name_from_dump(board_name, timestamp):
    board = get_data_from_dump(board_name, 'board', timestamp)
    
    return board

In [33]:
def mapping_lists_by_board_from_dump(board_name, timestamp):
    lists_json = get_data_from_dump(board_name, 'lists', timestamp)
    
    list_map = {}
    for list in lists_json:
        list_map[list['id']] = list['name']
    
    return list_map

In [34]:
def mapping_custom_fields_by_board_from_dump(board_name, timestamp):
    custom_fields_json = get_data_from_dump(board_name, 'custom_field', timestamp)
    
    custom_field_map = {}
    for custom_field in custom_fields_json:
        custom_field_map[custom_field['id']] = custom_field['name']
        
        if custom_field['type'] == 'list':
            options = custom_field['options']
            for option in options:
                custom_field_map[option['id']] = option['value']['text']
    
    return custom_field_map

In [35]:
def mapping_members_by_board_from_dump(board_name, timestamp):
    members_json = get_data_from_dump(board_name, 'members', timestamp)
    
    members_map = {}
    for member in members_json:
        members_map[member['id']] = member['fullName']
    
    return members_map

In [36]:
def create_normalized_card(card, lists_map, members_map, custom_fields_map, custom_field_required):
    normalized_card = {}
    normalized_card['name'] = card['name']
    normalized_card['shortUrl'] = card['shortUrl']
    normalized_card['idList'] = lists_map[card['idList']]

    members_in_card = []
    for member in card['idMembers']:
        members_in_card.append(members_map[member])
    normalized_card['idMember'] = members_in_card

    for custom_field in card['customFieldItems']:
        name = custom_fields_map[custom_field['idCustomField']]

        if name in custom_field_required:

            if 'idValue' in custom_field:
                name_value = custom_fields_map[custom_field['idValue']]
                normalized_card[name] = name_value

            elif 'value' in custom_field:
                for key, value in custom_field['value'].items():
                    result = value 
                normalized_card[name] = result                           

    if len(normalized_card) < 3:
        raise Exception(
            'Make sure all dates are filled in the card: Start, EndDev and End for {0}'.format(card['name']))
    
    return normalized_card

In [37]:
def get_useful_cards_by_board(board_name, timestamp):
    cards_raw = get_data_from_dump(board_name, 'cards' , timestamp)
        
    fields = ('id', 'name', 'idList', 'shortUrl', 'customFieldItems', 'idMembers')

    cards = [{key : value for key, value in card.items() if key in fields} for card in cards_raw ]

    custom_fields_map = mapping_custom_fields_by_board_from_dump(board_name, timestamp)

    lists_map = mapping_lists_by_board_from_dump(board_name, timestamp)

    members_map = mapping_members_by_board_from_dump(board_name, timestamp)
    
    custom_field_required = read_json('custom_fields_required.json')

    useful_cards = []
    for card in cards:
        
        idListName = lists_map[card['idList']]
        
        if idListName in ['Done']:
            normalized_card = create_normalized_card(card, lists_map, members_map, custom_fields_map, custom_field_required)
            useful_cards.append(normalized_card)
            
    return useful_cards

In [38]:
def get_useful_cards_in_dev_by_board(board_name, timestamp):
    cards_raw = get_data_from_dump(board_name, 'cards' , timestamp)
        
    fields = ('id', 'name', 'idList', 'shortUrl', 'customFieldItems', 'idMembers')

    cards = [{key : value for key, value in card.items() if key in fields} for card in cards_raw ]

    custom_fields_map = mapping_custom_fields_by_board_from_dump(board_name, timestamp)

    lists_map = mapping_lists_by_board_from_dump(board_name, timestamp)

    members_map = mapping_members_by_board_from_dump(board_name, timestamp)
    
    custom_field_required = read_json('custom_fields_required.json')

    useful_cards = []
    for card in cards:
        
        idListName = lists_map[card['idList']]
        
        if idListName in ['Development', 'Review', 'Refinement', 'Assessment']:
            normalized_card = create_normalized_card(card, lists_map, members_map, custom_fields_map, custom_field_required)
            useful_cards.append(normalized_card)
            
    return useful_cards

In [39]:
def create_dataframe_from_trello(board_name, timestamp):
    cards = get_useful_cards_by_board(board_name, timestamp)
    df = pd.DataFrame.from_dict(cards)

    df['dev_duration'] = (pd.to_datetime(df['EndDev']).dt.date - pd.to_datetime(df['Start']).dt.date).dt.days
    df['duration'] = (pd.to_datetime(df['End']).dt.date - pd.to_datetime(df['Start']).dt.date).dt.days
    
    df['busday_dev_duration'] = np.busday_count(
        pd.to_datetime(df['Start']).dt.date,
        pd.to_datetime(df['EndDev']).dt.date)
    
    df['busday_duration'] = np.busday_count(
        pd.to_datetime(df['Start']).dt.date,
        pd.to_datetime(df['End']).dt.date)
    
    df['quarter'] = pd.PeriodIndex(df['End'], freq='Q')
    
    df['month'] = pd.PeriodIndex(df['End'], freq='M')

    df["count"] = 1
    
    return df

In [40]:
def create_dataframe_in_dev_from_trello(board_name, timestamp):
    cards = get_useful_cards_in_dev_by_board(board_name, timestamp)
    df = pd.DataFrame.from_dict(cards)

    df["count"] = 1
    
    return df

In [41]:
current_timestamp = generate_timestamp()
create_dumps_by_name('CBN', current_timestamp)
df = create_dataframe_from_trello('CBN', current_timestamp)

# Filter only items from defined year - by End date.
if YEAR_FILTER:
    df = df[df['End'].str.contains(YEAR_FILTER)]

In [42]:
df

Unnamed: 0,name,shortUrl,idList,idMember,Commit Date (by Google),Date requested (by Google),Impacted parsers,New parser,Size/Complexity,EndDev,Priority,End,Start,dev_duration,duration,busday_dev_duration,busday_duration,quarter,month,count
0,GCP_CLOUDAUDIT_b197038246,https://trello.com/c/P4E1B5C6,Done,[diegogr_cit],2021-07-31T15:00:00.000Z,2021-06-28T15:00:00.000Z,Default,No,XS,2021-08-19T21:43:46.000Z,P1,2021-08-26T17:49:48.000Z,2021-08-18T21:33:00.000Z,1,8,1,6,2021Q3,2021-08,1
1,WINEVTLOG,https://trello.com/c/lGidlGym,Done,"[atos_cit, diegogr_cit]",2021-08-13T15:00:00.000Z,2021-08-08T15:00:00.000Z,Default and Customer,No,S,2021-08-25T22:25:12.000Z,P2,2021-08-26T15:40:00.000Z,2021-08-20T14:45:56.000Z,5,6,3,4,2021Q3,2021-08,1
2,GCP_BIGQUERY_CONTEXT,https://trello.com/c/6AFvTi4U,Done,[diegogr_cit],2021-08-29T15:00:00.000Z,2021-08-09T15:00:00.000Z,Default,Yes,XS,2021-08-18T21:13:10.000Z,P2,2021-08-25T14:30:34.000Z,2021-08-17T15:01:00.000Z,1,8,1,6,2021Q3,2021-08,1
3,WORKSPACE_ACTIVITY,https://trello.com/c/IIFpxgMF,Done,[felipegc_cit],2021-08-01T20:09:00.000Z,2021-07-16T20:09:00.000Z,Default,Yes,L,2021-08-23T15:14:00.000Z,P2,2021-08-25T13:54:06.000Z,2021-08-04T18:07:16.000Z,19,21,13,15,2021Q3,2021-08,1
4,CBN - GCP Security Center (include nampespace),https://trello.com/c/ILOE5BES,Done,[felipegc_cit],2021-08-20T15:00:00.000Z,2021-06-24T15:45:00.000Z,Default,No,XS,2021-08-18T14:17:39.000Z,P2,2021-08-24T13:40:58.000Z,2021-08-17T22:11:55.000Z,1,7,1,5,2021Q3,2021-08,1
5,BLUECOAT_WEBPROXY,https://trello.com/c/gt4NyMtb,Done,"[felipegc_cit, DIEGO DE OLIVEIRA MARANHAO]",2021-07-22T15:00:00.000Z,2021-06-15T15:00:00.000Z,Default,No,M,2021-08-17T21:53:00.000Z,P2,2021-08-23T21:47:30.000Z,2021-07-28T15:21:53.000Z,20,26,14,18,2021Q3,2021-08,1
6,WORKSPACE_ALERTS,https://trello.com/c/wrHwWqku,Done,"[felipegc_cit, fmendonca_cit, Davisom da Cunha...",2021-08-01T15:00:00.000Z,2021-07-16T15:00:00.000Z,Default,Yes,L,2021-08-19T21:54:48.000Z,P2,2021-08-23T13:51:39.000Z,2021-08-11T13:17:49.000Z,8,12,6,8,2021Q3,2021-08,1
7,CISCO_IOS,https://trello.com/c/RKgW7vrG,Done,"[felipegc_cit, atos_cit]",2021-08-07T15:00:00.000Z,2021-06-29T15:00:00.000Z,Default,Yes,S,2021-08-20T18:14:54.000Z,P2,2021-08-23T13:49:14.000Z,2021-08-17T14:36:21.000Z,3,6,3,4,2021Q3,2021-08,1
8,GCP_CLOUDAUDIT_b192934169_CreateCryptoKey,https://trello.com/c/SGVdVMM7,Done,"[diegogr_cit, atos_cit]",2021-07-18T15:00:00.000Z,2021-07-06T15:00:00.000Z,Default,No,XS,2021-08-16T23:59:56.000Z,P2,2021-08-18T21:16:54.000Z,2021-08-16T19:14:57.000Z,0,2,0,2,2021Q3,2021-08,1
9,GCP_CLOUDAUDIT_b192933604_ SetIamPolicy,https://trello.com/c/qLpcGw4l,Done,"[atos_cit, diegogr_cit]",2021-07-18T15:00:00.000Z,2021-07-06T15:00:00.000Z,Default,No,XS,2021-08-16T23:59:58.000Z,,2021-08-18T21:16:52.000Z,2021-08-09T12:49:32.000Z,7,9,5,7,2021Q3,2021-08,1


In [43]:
cancelled = df[(df['idList'] == 'Cancelled')]
done = df[(df['idList'] == 'Done')]

In [90]:
df.groupby('Size/Complexity').size()

Size/Complexity
L     10
M     16
S     25
XL     6
XS    24
dtype: int64

In [44]:
df_in_dev = create_dataframe_in_dev_from_trello('CBN', current_timestamp)

df_in_dev['Size/Complexity'].fillna('S', inplace=True)
df_in_dev

Unnamed: 0,name,shortUrl,idList,idMember,Date requested (by Google),Impacted parsers,New parser,Priority,Start,Commit Date (by Google),Size/Complexity,EndDev,End,count
0,PAN_FIREWALL_195481905,https://trello.com/c/JZuMkDuG,Assessment,"[felipegc_cit, atos_cit]",2021-08-04T15:00:00.000Z,Default,No,P2,2021-08-31T19:18:25.000Z,,M,,,1
1,WINEVTLOG_b196948147,https://trello.com/c/GdHPy4te,Development,[atos_cit],2021-08-17T15:00:00.000Z,Default and Customer,No,,2021-08-25T21:30:33.000Z,2021-09-01T15:00:00.000Z,S,,,1
2,WATCHGUARD,https://trello.com/c/mZrNDyPE,Development,"[diegogr_cit, Davisom da Cunha Correa]",2021-08-10T15:00:00.000Z,Default,Yes,,2021-08-24T14:51:13.000Z,2021-08-31T15:00:00.000Z,M,,,1
3,SEP (Enhancement),https://trello.com/c/mC971RaN,Development,[diegogr_cit],2021-03-24T15:00:00.000Z,Default and Customer,No,P2,2021-07-21T15:41:35.000Z,2021-06-18T15:00:00.000Z,XL,,,1
4,SYMANTEC_WEB_ISOLATION,https://trello.com/c/7C0nRzab,Review,"[Thiago Maia, diegogr_cit]",2021-06-09T15:00:00.000Z,,Yes,,2021-08-12T17:24:40.000Z,2021-06-10T15:00:00.000Z,M,2021-08-31T22:12:41.000Z,2021-09-02T15:00:00.000Z,1
5,SOPHOS_UTM,https://trello.com/c/LYGpCoYO,Review,"[diegogr_cit, DIEGO DE OLIVEIRA MARANHAO]",2021-08-10T15:00:00.000Z,Default,Yes,P1,2021-08-23T12:04:42.000Z,2021-08-31T15:00:00.000Z,XS,2021-08-31T22:08:31.000Z,,1


In [115]:
q_low = df['duration'].quantile(0.01)
q_hi  = df['duration'].quantile(0.89)

df_estimative = df[(df['duration'] < q_hi) & (df['duration'] > q_low)]

#display(df_estimative)
df_estimative_group = df_estimative.groupby('Size/Complexity').mean()
display(df_estimative_group)
df_estimative_group.sort_values(by='duration', ascending=False, inplace=True)
estimative_day_by_size = df_estimative_group.loc[:, 'busday_duration']
#estimative_day_by_size *= 0.7
estimative_day_by_size

Unnamed: 0_level_0,dev_duration,duration,busday_dev_duration,busday_duration,count
Size/Complexity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
L,21.571429,29.285714,15.285714,20.714286,1.0
M,16.357143,22.357143,11.642857,16.071429,1.0
S,9.791667,14.583333,7.041667,10.25,1.0
XL,29.0,32.0,21.0,23.0,1.0
XS,3.545455,6.545455,2.454545,4.727273,1.0


Size/Complexity
XL    23.000000
L     20.714286
M     16.071429
S     10.250000
XS     4.727273
Name: busday_duration, dtype: float64

In [116]:
total_list_parsers = df_in_dev.groupby(['idList', 'Size/Complexity']).size()
total_list_parsers = total_list_parsers.astype('float')
total_list_parsers_normalized = total_list_parsers.copy()
#30% Assesment e Refinement e 50% Develepment e 20% Review
for i in total_list_parsers_normalized.index:
    if i[0] == 'Refinement':
        total_list_parsers_normalized[i] *= (1 - 0.2)
    elif i[0] == 'Development':
        total_list_parsers_normalized[i] *= (1 - 0.3)
    elif i[0] == 'Review':
        total_list_parsers_normalized[i] *= (1 - 0.8)
        
display(total_list_parsers_normalized)
display(total_list_parsers)
#total_list_parsers.groupby('Size/Complexity').size()

idList       Size/Complexity
Assessment   M                  1.0
Development  M                  0.8
             S                  0.8
             XL                 0.8
Review       M                  0.2
             XS                 0.2
dtype: float64

idList       Size/Complexity
Assessment   M                  1.0
Development  M                  1.0
             S                  1.0
             XL                 1.0
Review       M                  1.0
             XS                 1.0
dtype: float64

In [117]:
to_make_parsers_list = pd.Series({'XL':0, 'L':0, 'M':0, 'S':0, 'XS':0})

#Current month fte and day of month
current_day = int(dt.datetime.now().strftime('%d'))
if dt.datetime.now().strftime('%m') == '02':
    busdays_to_end_of_the_month = np.busday_count(dt.datetime.now().strftime('%Y-%m-%d'), str(dt.datetime.now().strftime('%Y-%m'))+'-28')
else:
    busdays_to_end_of_the_month = np.busday_count(dt.datetime.now().strftime('%Y-%m-%d'), str(dt.datetime.now().strftime('%Y-%m'))+'-30')

current_month = dt.datetime.now().strftime('%Y-%m')

#fte
row_month = planned_ftes[planned_ftes['month'] == current_month]
fte = row_month['planned_fte'].item()

#Total fte days and Total expected parsers
#total_busday = 20
fte_days = fte * busdays_to_end_of_the_month#total_busday

to_make_parsers = fte * EXPECTED_PARSERS_PER_FTE

#Total parsers done this month
total_parsers_done_current_month = sum(df[df['month'] == current_month]['count'])

to_make_parsers -= total_parsers_done_current_month

#Consedering parsers in development
for i in total_list_parsers_normalized.index:
    work_days_left_for_each_parser = (total_list_parsers_normalized[i]*estimative_day_by_size[i[1]] / total_list_parsers[i])
    #print(f'{work_days_left_for_each_parser} dias pra fechar o parser de tamanho{i}')
    if work_days_left_for_each_parser <= busdays_to_end_of_the_month:
        fte_days -= total_list_parsers_normalized[i]*estimative_day_by_size[i[1]]
        to_make_parsers -= total_list_parsers[i]
    else:
        fte_days -= total_list_parsers[i]*busdays_to_end_of_the_month

#Sugestion to make parsers
i = 0
parser_day_value = estimative_day_by_size[i]
parser_size = estimative_day_by_size.index[i]

while True:
    if parser_day_value <= 0.3*fte_days and parser_day_value <= busdays_to_end_of_the_month:
        fte_days -= parser_day_value
        to_make_parsers_list[i] += 1
        to_make_parsers -= 1
        
    elif parser_size == 'XS' and to_make_parsers > 0:
        fte_days = 0
        to_make_parsers_list['XS'] += round(to_make_parsers)
        to_make_parsers = 0
        
    else:
        if i+1 < len(estimative_day_by_size):
            i += 1
        parser_day_value = estimative_day_by_size[i]
        parser_size = estimative_day_by_size.index[i]
        if to_make_parsers <= 0:
            break
print()
if sum(to_make_parsers_list) == 0:
    print(f'O número de parsers feitos nesse mês é de {total_parsers_done_current_month} e já atingiu a meta de {fte * EXPECTED_PARSERS_PER_FTE}.')
else:
    print('Os seguintes parsers já estão sendo feitos:')
    print(total_list_parsers.groupby('Size/Complexity').size())
    print()
    print('É necessário então para completar a meta mensal:')
    print()
    for i in to_make_parsers_list.index:
        print(f'{to_make_parsers_list[i]} parsers de tamanho {i}')
        print('--*--'*5)


Os seguintes parsers já estão sendo feitos:
Size/Complexity
M     3
S     1
XL    1
XS    1
dtype: int64

É necessário então para completar a meta mensal:

0 parsers de tamanho XL
--*----*----*----*----*--
1 parsers de tamanho L
--*----*----*----*----*--
0 parsers de tamanho M
--*----*----*----*----*--
2 parsers de tamanho S
--*----*----*----*----*--
7 parsers de tamanho XS
--*----*----*----*----*--


# Calculating General Estimatives

In [48]:
def get_extremes(data_frame, duration_column):
    upper_q = partial(pd.Series.quantile, q=0.95)
    lower_q = partial(pd.Series.quantile, q=0.05)

    upper_extremes = data_frame[duration_column].agg([upper_q])["quantile"]
    lower_extremes = data_frame[duration_column].agg([lower_q])["quantile"]
    
    return lower_extremes, upper_extremes

In [49]:
def calculate_estimatives_by_duration_column(data_frame, duration_column, print_results=True):
    lower_extremes, upper_extremes = get_extremes(data_frame, duration_column)
    
    done_extremes_removed = data_frame[(data_frame[duration_column] > lower_extremes) & (data_frame[duration_column] < upper_extremes)]
    mean_removed_extremes = done_extremes_removed[duration_column].mean()
    
    small_q = partial(pd.Series.quantile, q=0.25)
    small_limit = done_extremes_removed[duration_column].agg([small_q])["quantile"]
    
    small_extremes_removed = done_extremes_removed[(done_extremes_removed[duration_column] <= small_limit)]
    not_small_extremes_removed = done_extremes_removed[(done_extremes_removed[duration_column] > small_limit)]
    
    mean_small_extremes_removed = small_extremes_removed[duration_column].mean()
    mean_not_small_extremes_removed = not_small_extremes_removed[duration_column].mean()
    
    total_developed = len(data_frame)
    
    if print_results:
        features = ('lower_extremes', 'upper_extremes', 'small limit', 'Done estimate (with "extremes" removed)',
                   'Done estimate for "Small" ones', 'Done estimate for "Big" ones', 'Total_developed')
        values = (lower_extremes, upper_extremes, small_limit, mean_removed_extremes, mean_small_extremes_removed, 
                 mean_not_small_extremes_removed, total_developed)
        general_estimatives = {'Feature':features, 'Value':values}
        general_estimatives_df = pd.DataFrame(data=general_estimatives)
        display(general_estimatives_df)
    
    return done_extremes_removed

In [50]:
def calculate_estimatives(data_frame):
    display(Markdown('### Total Duration:'))
    calculate_estimatives_by_duration_column(data_frame, 'duration')
    print('\n')
    display(Markdown('### Total Dev Duration:'))
    calculate_estimatives_by_duration_column(data_frame, 'dev_duration')
    print('\n')
    display(Markdown('## BUSINESS DAY'))
    print('\n')
    display(Markdown('### Business Day Duration:'))
    calculate_estimatives_by_duration_column(data_frame, 'busday_duration')
    print('\n')
    display(Markdown('### Business Day Dev Duration:'))
    calculate_estimatives_by_duration_column(data_frame, 'busday_dev_duration')

In [51]:
def generate_table_amount_delivered_by_period(df, total_fte, period): # quarter or month
    by_period = df[[period, 'count']].groupby(period).sum('count')

    by_period_fte = pd.merge(by_period, total_fte, on=period, how='left')

    period_result = by_period_fte[[period, 'count', 'fte']].groupby([period,'count']).sum('fte')
    period_result.reset_index(drop=False, inplace=True)

    period_result['parsers_per_fte'] = period_result['count'].div(period_result['fte'])
    
    period_result['target_count'] = period_result['fte'].multiply(EXPECTED_PARSERS_PER_FTE)
    period_result['target_diff'] = period_result['count'].subtract(period_result['target_count'])

    period_result[period] = period_result[period].astype(str)

    return period_result

In [52]:
def generate_chart_amount_delivered_by_period(df, period):
    display(df)
    ax = df[[period,'fte', 'parsers_per_fte', 'target_count']].plot(x=period, linestyle='-', marker='o', color=['orange', 'pink', 'cyan'])
    df[[period,'count']].plot(x=period, kind='bar', ax=ax)
    plt.legend(loc='upper right')
    y = tuple(df.groupby(period).sum('count')['count'])
    for i in range(len(y)):
        plt.text(x=i, y=y[i], s=str(y[i]), ha='center', va='bottom')

In [53]:
def generate_chart_and_table_amount_delivered_by_period(df, total_fte, period):
    if period in ('month', 'quarter'):
        period_result = generate_table_amount_delivered_by_period(df, total_fte, period)
        generate_chart_amount_delivered_by_period(period_result, period)
    else:
        raise Exception(f'"{period}" is not defined. Must be "month" or "quarter".')

In [54]:
def generate_table_amount_delivered_by_period_and_size_complexity(df, period): # quarter or month
    period_result = df.groupby([period, 'Size/Complexity']).size().unstack()
    period_result.fillna(value=0, inplace=True)
    return period_result

In [55]:
def generate_chart_amount_delivered_by_period_and_size_complexity(df, period):
    display(df)
    df.plot(kind='bar', stacked=True)
    plt.title(f'Parsers delivered by {period} and Size/Complexity')
    plt.legend(loc='upper right')
    y = tuple(df.sum(axis=1))
    for i in range(len(y)):
        plt.text(x=i, y=y[i], s=str(y[i]), ha='center', va='bottom')

In [56]:
def generate_chart_and_table_amount_delivered_by_period_and_size_complexity(df, period):
    if period in ('month', 'quarter'):
        period_result = generate_table_amount_delivered_by_period_and_size_complexity(df, period)
        generate_chart_amount_delivered_by_period_and_size_complexity(period_result, period)
    else:
        raise Exception(f'"{period}" is not defined. Must be "month" or "quarter".')

# General Estimatives

In [57]:
calculate_estimatives(done)

### Total Duration:

Unnamed: 0,Feature,Value
0,lower_extremes,1.0
1,upper_extremes,62.0
2,small limit,9.0
3,"Done estimate (with ""extremes"" removed)",18.84507
4,"Done estimate for ""Small"" ones",6.454545
5,"Done estimate for ""Big"" ones",24.408163
6,Total_developed,81.0






### Total Dev Duration:

Unnamed: 0,Feature,Value
0,lower_extremes,0.0
1,upper_extremes,43.0
2,small limit,5.0
3,"Done estimate (with ""extremes"" removed)",12.971831
4,"Done estimate for ""Small"" ones",2.857143
5,"Done estimate for ""Big"" ones",17.22
6,Total_developed,81.0






## BUSINESS DAY





### Business Day Duration:

Unnamed: 0,Feature,Value
0,lower_extremes,1.0
1,upper_extremes,44.0
2,small limit,7.0
3,"Done estimate (with ""extremes"" removed)",13.43662
4,"Done estimate for ""Small"" ones",4.727273
5,"Done estimate for ""Big"" ones",17.346939
6,Total_developed,81.0






### Business Day Dev Duration:

Unnamed: 0,Feature,Value
0,lower_extremes,0.0
1,upper_extremes,31.0
2,small limit,3.0
3,"Done estimate (with ""extremes"" removed)",9.28169
4,"Done estimate for ""Small"" ones",2.095238
5,"Done estimate for ""Big"" ones",12.3
6,Total_developed,81.0


# Amount delivered by month

In [58]:
generate_chart_and_table_amount_delivered_by_period(df, total_fte, 'month')

Unnamed: 0,month,count,fte,parsers_per_fte,target_count,target_diff
0,2021-01,4,6.1,0.655738,15.25,-11.25
1,2021-02,11,5.65,1.946903,14.125,-3.125
2,2021-03,10,6.05,1.652893,15.125,-5.125
3,2021-04,12,6.4,1.875,16.0,-4.0
4,2021-05,8,5.29,1.512287,13.225,-5.225
5,2021-06,10,4.96,2.016129,12.4,-2.4
6,2021-07,9,6.27,1.435407,15.675,-6.675
7,2021-08,17,6.5,2.615385,16.25,0.75


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Amount delivered by quarter

In [59]:
generate_chart_and_table_amount_delivered_by_period(df, total_fte, 'quarter')

Unnamed: 0,quarter,count,fte,parsers_per_fte,target_count,target_diff
0,2021Q1,25,17.8,1.404494,44.5,-19.5
1,2021Q2,30,16.65,1.801802,41.625,-11.625
2,2021Q3,26,12.87,2.020202,32.175,-6.175


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Amount delivered by month and Size/Complexity

In [60]:
generate_chart_and_table_amount_delivered_by_period_and_size_complexity(df, 'month')

Size/Complexity,L,M,S,XL,XS
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-01,2.0,0.0,0.0,0.0,2.0
2021-02,2.0,2.0,3.0,3.0,1.0
2021-03,0.0,3.0,4.0,1.0,2.0
2021-04,0.0,2.0,8.0,1.0,1.0
2021-05,0.0,3.0,1.0,0.0,4.0
2021-06,2.0,2.0,3.0,0.0,3.0
2021-07,0.0,2.0,2.0,0.0,5.0
2021-08,4.0,2.0,4.0,1.0,6.0


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

# Amount delivered by quarter and Size/Complexity

In [61]:
generate_chart_and_table_amount_delivered_by_period_and_size_complexity(df, 'quarter')

Size/Complexity,L,M,S,XL,XS
quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021Q1,4,5,7,4,5
2021Q2,2,7,12,1,8
2021Q3,4,4,6,1,11


Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …