In [None]:
import calendar
import datetime as dt
import dateutil.parser as dp
import gspread
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import os

from IPython.display import display
from functools import partial
from oauth2client.service_account import ServiceAccountCredentials

# Setting up key, token and Service Account

In [None]:
with open('keys.json') as file:
    keys = json.load(file)
    api_key = keys['trello']['api_key']
    token = keys['trello']['token']
    spreadsheet_key = keys['sheet']['spreadsheet_key']
    sa_file = keys['sheet']['sa_file']

# Extracting Sheet Data

In [None]:
def convert_sheet_date(sheet_date):
    conversion_table = {
        'January': '01',
        'February': '02',
        'March': '03',
        'April': '04',
        'May': '05',
        'June': '06',
        'July': '07',
        'August': '08',
        'September': '09',
        'October': '10',
        'November': '11',
        'December': '12'
    }
    
    month = conversion_table[sheet_date.split('-')[0].strip()]
    year = sheet_date.split('-')[1].strip()
    
    return year + '-' + month

In [None]:
def get_consolidated_sheet():
    scope = ['https://spreadsheets.google.com/feeds']
    credentials = ServiceAccountCredentials.from_json_keyfile_name(sa_file, scope) 
    gc = gspread.authorize(credentials)
    
    book = gc.open_by_key(spreadsheet_key)
    worksheet = book.worksheet("Consolidated")
    table = worksheet.get_all_values()
    
    return table

In [None]:
def get_total_by_date():
    table = get_consolidated_sheet()
    
    header_row = table[0]
    columns = [value for index, value in enumerate(header_row) if value] 
    converted_columns = [ convert_sheet_date(x) for x in columns[1:]]
    
    total_row = table[14]
    # getting only the executed column
    total = [value for index, value in enumerate(total_row) if index % 3 == 0]
    total.pop(0)
    
    total_by_date = [ [converted_columns[index], value] for index, value in enumerate(total)]
    
    return total_by_date

In [None]:
def create_ftes_dataframe():
    # from deprecated sheet, it will never be changed
    old_data = [
        ['2020-03', 4.0],
        ['2020-04', 6.15],
        ['2020-05', 6.25],
        ['2020-06', 6.0],
        ['2020-07', 3.65],
        ['2020-08', 4.57],
        ['2020-09', 4.52],
        ['2020-10', 4.9],
        ['2020-11', 4.7]
    ]
    
    new_data = get_total_by_date()
    
    full_data = old_data + new_data
    
    total_fte = pd.DataFrame(full_data, columns=['month_base', 'fte'])
    
    total_fte['month_base'] = pd.to_datetime(total_fte['month_base'])
    total_fte['month'] = pd.PeriodIndex(total_fte['month_base'], freq='M')
    total_fte['quarter'] = pd.PeriodIndex(total_fte['month_base'], freq='Q')
    total_fte['fte'] = total_fte['fte'].astype(float)
    
    del total_fte['month_base']
    
    return total_fte

In [None]:
total_fte = create_ftes_dataframe()

In [None]:
total_fte

# Extracting Trello Data

In [None]:
def get_data_from_trello_api(url):
    headers = {
       "Accept": "application/json"
    }
    
    query = {
       'key': api_key,
       'token': token
    }
    
    response = requests.request("GET", url, headers=headers, params=query)
    
    if response.status_code > 299:
        raise Exception('Something went wrong with the request {0} '\
                        'with status: {1}'.format(url, response.status_code))
    
    return json.loads(response.text)

In [None]:
def read_json(json_name):
    with open(json_name) as file:
        json_opened = json.load(file)
           
    return json_opened 

In [None]:
def write_json(json_name, content_to_write):
    with open(json_name, 'w') as json_file:
        json.dump(content_to_write, json_file, indent=4, sort_keys=True)

In [None]:
def generate_timestamp():
    current_timestamp = dt.datetime.now().strftime('%d-%m-%Y')
    
    return current_timestamp

In [None]:
def create_folder_for_dumping(name, current_timestamp):
    if not os.path.exists('dumps'):
        os.mkdir('dumps')
    
    if not os.path.exists('dumps/' + name):
        os.mkdir('dumps/' + name)
    
    if not os.path.exists('dumps/' + name + '/' + current_timestamp):
        os.mkdir('dumps/' + name + '/' + current_timestamp)

In [None]:
def get_data_from_dump(board_name, dump_name, timestamp):
    path = 'dumps/' + board_name + '/' + timestamp + '/dump_' + dump_name + '.json'
    
    file_opened = read_json(path)
    
    return file_opened

# Acessing API

In [None]:
def get_board_by_name(board_name):
    boards_url = f'https://api.trello.com/1/search?query={board_name}'
    board = get_data_from_trello_api(boards_url)
    
    return board

In [None]:
def get_lists_by_board(board_name, board_id, current_timestamp):
    lists_url = 'https://api.trello.com/1/boards/{0}/lists'
    lists = get_data_from_trello_api(lists_url.format(board_id))
          
    return lists

In [None]:
def get_custom_fields_by_board(board_name, board_id, current_timestamp):
    custom_fields_url = 'https://api.trello.com/1/boards/{0}/customFields'
    
    custom_fields = get_data_from_trello_api(custom_fields_url.format(board_id))
    
    return custom_fields

In [None]:
def get_cards_by_board(board_name, board_id, current_timestamp):
    cards_on_board_url = 'https://api.trello.com/1/boards/{0}/cards/?customFieldItems=true'
    board_cards = get_data_from_trello_api(cards_on_board_url.format(board_id))
    
    return board_cards

# Creating dumps

In [None]:
def create_boards_dump(board_name, current_timestamp):
    boards = get_board_by_name(board_name)

    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_board.json'
        
    write_json(name_of_dump, boards)
    
    return name_of_dump

In [None]:
def get_id_board_from_dump(board_name, current_timestamp):
    board = get_board_by_name_from_dump(board_name, current_timestamp)
    id_board = board['boards'][0]['id']

    return id_board

In [None]:
def create_lists_dump(board_name, board_id, current_timestamp):
    lists = get_lists_by_board(board_name, board_id, current_timestamp)
    
    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_lists.json'
    write_json(name_of_dump, lists)
        
    return name_of_dump

In [None]:
def create_custom_fields_dump(board_name, board_id, current_timestamp):
    custom_fields = get_custom_fields_by_board(board_name, board_id, current_timestamp)
        
    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_custom_field.json'
    write_json(name_of_dump, custom_fields)
        
    return name_of_dump

In [None]:
def create_cards_dump(board_name, board_id, current_timestamp):
    board_cards = get_cards_by_board(board_name, board_id, current_timestamp)    
       
    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_cards.json'
    
    write_json(name_of_dump, board_cards)
            
    return name_of_dump

In [None]:
def create_dumps_by_name(board_name, current_timestamp):
    create_folder_for_dumping(board_name, current_timestamp)
    
    create_boards_dump(board_name, current_timestamp)
    
    board_id = get_id_board_from_dump(board_name, current_timestamp)
    
    create_lists_dump(board_name, board_id, current_timestamp)
    create_custom_fields_dump(board_name, board_id, current_timestamp)
    create_cards_dump(board_name, board_id, current_timestamp)

# Getting data from dumps

In [None]:
def get_board_by_name_from_dump(board_name, timestamp):
    board = get_data_from_dump(board_name, 'board', timestamp)
    
    return board

In [None]:
def mapping_lists_by_board_from_dump(board_name, timestamp):
    lists_json = get_data_from_dump(board_name, 'lists', timestamp)
    
    list_map = {}
    for list in lists_json:
        list_map[list['id']] = list['name']
    
    return list_map

In [None]:
def mapping_custom_fields_by_board_from_dump(board_name, timestamp):
    custom_fields_json = get_data_from_dump(board_name, 'custom_field', timestamp)
    
    custom_field_map = {}
    for custom_field in custom_fields_json:
        custom_field_map[custom_field['id']] = custom_field['name']
    
    return custom_field_map

In [None]:
def get_useful_cards_by_board(board_name, timestamp):
    cards_raw = get_data_from_dump(board_name, 'cards' , timestamp)
        
    fields = ('id', 'name', 'idList', 'shortUrl', 'customFieldItems')

    cards = [{key : value for key, value in card.items() if key in fields} for card in cards_raw ]

    custom_fields_map = mapping_custom_fields_by_board_from_dump(board_name, timestamp)

    lists_map = mapping_lists_by_board_from_dump(board_name, timestamp)

    useful_cards = []
    for card in cards:
        idListName = lists_map[card['idList']]
        if idListName in ['Done']:
            normalized_card = {}

            for custom_field in card['customFieldItems']:
                name = custom_fields_map[custom_field['idCustomField']]
                if name in ['Start', 'End', 'EndDev']:
                    value = custom_field['value']['date']
                    normalized_card[name] = value

            if len(normalized_card) < 3:
                raise Exception(
                    'Make sure all dates are filled in the card: Start, EndDev and End for {0}'.format(card['name']))

            normalized_card['name'] = card['name']
            normalized_card['shortUrl'] = card['shortUrl']
            normalized_card['idList'] = lists_map[card['idList']]
            useful_cards.append(normalized_card)
    
    
    
    return useful_cards

In [None]:
def create_dataframe_from_trello(board_name, timestamp):
    cards = get_useful_cards_by_board(board_name, timestamp)
    df = pd.DataFrame.from_dict(cards)

    df['dev_duration'] = (pd.to_datetime(df['EndDev']).dt.date - pd.to_datetime(df['Start']).dt.date).dt.days
    df['duration'] = (pd.to_datetime(df['End']).dt.date - pd.to_datetime(df['Start']).dt.date).dt.days
    
    df['busday_dev_duration'] = np.busday_count(
        pd.to_datetime(df['Start']).dt.date,
        pd.to_datetime(df['EndDev']).dt.date)
    
    df['busday_duration'] = np.busday_count(
        pd.to_datetime(df['Start']).dt.date,
        pd.to_datetime(df['End']).dt.date)
    
    df['quarter'] = pd.PeriodIndex(df['End'], freq='Q')
    
    df['month'] = pd.PeriodIndex(df['End'], freq='M')

    df["count"] = 1
    
    return df

In [None]:
pd.set_option('display.max_rows', None)

current_timestamp = generate_timestamp()
create_dumps_by_name('CBN', current_timestamp)
df = create_dataframe_from_trello('CBN', current_timestamp)

In [None]:
df

In [None]:
cancelled = df[(df['idList'] == 'Cancelled')]
done = df[(df['idList'] == 'Done')]

# Joining both fte and parsers tables

# Calculating General Estimatives

# Functions

In [None]:
def get_extremes(data_frame, duration_column):
    upper_q = partial(pd.Series.quantile, q=0.95)
    lower_q = partial(pd.Series.quantile, q=0.05)

    upper_extremes = data_frame[duration_column].agg([upper_q])["quantile"]
    lower_extremes = data_frame[duration_column].agg([lower_q])["quantile"]
    
    return lower_extremes, upper_extremes

In [None]:
def calculate_estimatives_by_duration_column(data_frame, duration_column, print_results=True):
    lower_extremes, upper_extremes = get_extremes(data_frame, duration_column)
    
    done_extremes_removed = data_frame[(data_frame[duration_column] > lower_extremes) & (data_frame[duration_column] < upper_extremes)]
    mean_removed_extremes = done_extremes_removed[duration_column].mean()
    
    small_q = partial(pd.Series.quantile, q=0.25)
    small_limit = done_extremes_removed[duration_column].agg([small_q])["quantile"]
    
    small_extremes_removed = done_extremes_removed[(done_extremes_removed[duration_column] <= small_limit)]
    not_small_extremes_removed = done_extremes_removed[(done_extremes_removed[duration_column] > small_limit)]
    
    mean_small_extremes_removed = small_extremes_removed[duration_column].mean()
    mean_not_small_extremes_removed = not_small_extremes_removed[duration_column].mean()
    
    total_developed = len(data_frame)
    
    if print_results:
        print('lower_extremes ->', lower_extremes)
        print('upper_extremes ->', upper_extremes)
        print('small limit ->', small_limit)
        print('Done estimate (with "extremes" removed) ->', mean_removed_extremes)
        print('Done estimate for "Small" ones ->', mean_small_extremes_removed)
        print('Done estimate for "Big" ones ->', mean_not_small_extremes_removed)
        print('Total_developed ->', total_developed)

    return done_extremes_removed
    
    

In [None]:
def calculate_estimatives(data_frame):
    print('Total Duration:')
    print('\n')
    calculate_estimatives_by_duration_column(data_frame, 'duration')
    print('\n')
    print('Total Dev Duration:')
    print('\n')
    calculate_estimatives_by_duration_column(data_frame, 'dev_duration')
    print('\n')
    print('##############')
    print('#BUSINESS DAY#')
    print('##############')
    print('\n')
    print('Business Day Duration:')
    print('\n')
    calculate_estimatives_by_duration_column(data_frame, 'busday_duration')
    print('\n')
    print('Business Day Dev Duration:')
    print('\n')
    calculate_estimatives_by_duration_column(data_frame, 'busday_dev_duration')
    

In [None]:
def generate_tables_and_charts(data_frame, duration_column, print_tables, plot_charts):
    df1 = data_frame[[duration_column, 'quarter']].groupby('quarter').mean()
    df2 = data_frame[['quarter', 'count']].groupby('quarter').sum('count')
    df3 = data_frame[['month', 'count']].groupby('month').sum('count')
    
    if print_tables:
        display(df1)
        display(df2)
        display(df3)
    
    if plot_charts:
        df1.plot()
        df2.plot()
        df3.plot()


In [None]:
def generate_tables_and_charts_by_duration_colum(data_frame, duration_column, remove_extremes):
    if remove_extremes:
        df_with_no_extremes = calculate_estimatives_by_duration_column(data_frame, duration_column, False)
        generate_tables_and_charts(df_with_no_extremes, duration_column, True, True)
    else:
        generate_tables_and_charts(data_frame, duration_column, True, True)
    

In [None]:
def generate_full_duration_table_and_chart(data_frame, period): # quarter or month
    new_df = data_frame[['dev_duration', 'duration', 'busday_dev_duration', 'busday_duration', period]]
    new_df = new_df.groupby(period).mean()
    display(new_df)
    new_df.plot()

# General Estimatives

In [None]:
calculate_estimatives(done)

# Duration time

In [None]:
df1 = df.copy()

In [None]:
generate_full_duration_table_and_chart(df1, 'quarter')

In [None]:
generate_full_duration_table_and_chart(df1, 'month')

# Duration Time without extremes

In [None]:
df2 = df.copy()
df2 = calculate_estimatives_by_duration_column(df2, 'duration', False)

In [None]:
generate_full_duration_table_and_chart(df2, 'quarter')

In [None]:
generate_full_duration_table_and_chart(df2, 'month')

# Amount delivered by month

In [None]:
df[['month', 'count']].groupby('month').sum('count').plot(kind='bar')

In [None]:
by_month = df[['month', 'count']].groupby('month').sum('count')

monthly_result = pd.merge(by_month, total_fte, on='month', how='left')

monthly_result['month'] = monthly_result['month'].astype(str)

ax = monthly_result[['month','fte']].plot(x='month', linestyle='-', marker='o', color='orange')
monthly_result[['month','count']].plot(x='month', kind='bar', ax=ax)

In [None]:
monthly_result

# Amount delivered by quarter

In [None]:
df[['quarter', 'count']].groupby('quarter').sum('count').plot(kind='bar')

In [None]:
by_quarter = df[['quarter', 'count']].groupby('quarter').sum('count')

by_quarter_fte = pd.merge(by_quarter, total_fte, on='quarter', how='left')

quartly_result = by_quarter_fte[['quarter', 'count', 'fte']].groupby(['quarter','count']).sum('fte')
quartly_result.reset_index(drop=False, inplace=True)

quartly_result['quarter'] = quartly_result['quarter'].astype(str)

ax = quartly_result[['quarter','fte']].plot(x='quarter', linestyle='-', marker='o', color='orange')
quartly_result[['quarter','count']].plot(x='quarter', kind='bar', ax=ax)

In [None]:
quartly_result

# Data for business days

In [None]:
generate_tables_and_charts_by_duration_colum(df, 'busday_duration', True)

In [None]:
generate_tables_and_charts_by_duration_colum(df, 'busday_duration', False)

# Data for duration days

In [None]:
generate_tables_and_charts_by_duration_colum(df, 'duration', True)

In [None]:
generate_tables_and_charts_by_duration_colum(df, 'duration', True)