In [None]:
import calendar
import datetime as dt
import dateutil.parser as dp
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import os

from IPython.display import display, Markdown
from functools import partial
from oauth2client.service_account import ServiceAccountCredentials
from googleapiclient.discovery import build

from sklearn.feature_selection import SelectKBest
from sklearn.preprocessing import MinMaxScaler
from sklearn import linear_model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV, train_test_split

# Setting up key, token and Service Account

In [None]:
with open('keys.json') as file:
    keys = json.load(file)
    api_key = keys['trello']['api_key']
    token = keys['trello']['token']
    spreadsheet_key = keys['sheet']['spreadsheet_key']
    sa_file = keys['sheet']['sa_file']

# Extracting Sheet Data

In [None]:
def convert_sheet_date(sheet_date):
    conversion_table = {
        'January': '01',
        'February': '02',
        'March': '03',
        'April': '04',
        'May': '05',
        'June': '06',
        'July': '07',
        'August': '08',
        'September': '09',
        'October': '10',
        'November': '11',
        'December': '12'
    }
    
    month = conversion_table[sheet_date.split('-')[0].strip()]
    year = sheet_date.split('-')[1].strip()
    
    return year + '-' + month

In [None]:
def get_consolidated_sheet():
    scope = ['https://spreadsheets.google.com/feeds']
    credentials = ServiceAccountCredentials.from_json_keyfile_name(sa_file, scope)
    service = build('sheets', 'v4', credentials=credentials)

    SAMPLE_RANGE_NAME = 'Consolidated'
    sheet = service.spreadsheets()
    result = sheet.values().get(spreadsheetId=spreadsheet_key,
                                range=SAMPLE_RANGE_NAME).execute()
    values = result.get('values', [])
    
    return values

In [None]:
def get_total_by_date():
    table = get_consolidated_sheet()
    
    header_row = table[0]
    columns = [value for index, value in enumerate(header_row) if value] 
    converted_columns = [ convert_sheet_date(x) for x in columns[1:]]
    
    total_row = [value for value in table if value and value[0] == 'Total'][0]
    # getting only the executed column
    total = [value for index, value in enumerate(total_row) if index % 3 == 0]

    total.pop(0)
    
    total_by_date = [ [converted_columns[index], value] for index, value in enumerate(total)]
    
    return total_by_date

In [None]:
def create_ftes_dataframe():
    # from deprecated sheet, it will never be changed
    old_data = [
        ['2020-03', 4.0],
        ['2020-04', 6.15],
        ['2020-05', 6.25],
        ['2020-06', 6.0],
        ['2020-07', 3.65],
        ['2020-08', 4.57],
        ['2020-09', 4.52],
        ['2020-10', 4.9],
        ['2020-11', 4.7]
    ]
    
    new_data = get_total_by_date()
    
    full_data = old_data + new_data
    
    total_fte = pd.DataFrame(full_data, columns=['month_base', 'fte'])
    
    total_fte['month_base'] = pd.to_datetime(total_fte['month_base'])
    total_fte['month'] = pd.PeriodIndex(total_fte['month_base'], freq='M')
    total_fte['quarter'] = pd.PeriodIndex(total_fte['month_base'], freq='Q')
    total_fte['fte'] = total_fte['fte'].astype(float)
    
    del total_fte['month_base']
    
    return total_fte

In [None]:
total_fte = create_ftes_dataframe()

In [None]:
total_fte

# Extracting Trello Data

In [None]:
def get_data_from_trello_api(url):
    headers = {
       "Accept": "application/json"
    }
    
    query = {
       'key': api_key,
       'token': token
    }
    
    response = requests.request("GET", url, headers=headers, params=query)
    
    if response.status_code > 299:
        raise Exception('Something went wrong with the request {0} '\
                        'with status: {1}'.format(url, response.status_code))
    
    return json.loads(response.text)

In [None]:
def read_json(json_name):
    with open(json_name) as file:
        json_opened = json.load(file)
           
    return json_opened 

In [None]:
def write_json(json_name, content_to_write):
    with open(json_name, 'w') as json_file:
        json.dump(content_to_write, json_file, indent=4, sort_keys=True)

In [None]:
def generate_timestamp():
    current_timestamp = dt.datetime.now().strftime('%d-%m-%Y')
    
    return current_timestamp

In [None]:
def create_folder_for_dumping(name, current_timestamp):
    if not os.path.exists('dumps'):
        os.mkdir('dumps')
    
    if not os.path.exists('dumps/' + name):
        os.mkdir('dumps/' + name)
    
    if not os.path.exists('dumps/' + name + '/' + current_timestamp):
        os.mkdir('dumps/' + name + '/' + current_timestamp)

In [None]:
def get_data_from_dump(board_name, dump_name, timestamp):
    path = 'dumps/' + board_name + '/' + timestamp + '/dump_' + dump_name + '.json'
    
    file_opened = read_json(path)
    
    return file_opened

# Acessing API

In [None]:
def get_board_by_name(board_name):
    boards_url = f'https://api.trello.com/1/search?query={board_name}'
    board = get_data_from_trello_api(boards_url)
    
    return board

In [None]:
def get_lists_by_board(board_name, board_id, current_timestamp):
    lists_url = 'https://api.trello.com/1/boards/{0}/lists'
    lists = get_data_from_trello_api(lists_url.format(board_id))
          
    return lists

In [None]:
def get_custom_fields_by_board(board_name, board_id, current_timestamp):
    custom_fields_url = 'https://api.trello.com/1/boards/{0}/customFields'
    
    custom_fields = get_data_from_trello_api(custom_fields_url.format(board_id))
    
    return custom_fields

In [None]:
def get_cards_by_board(board_name, board_id, current_timestamp):
    cards_on_board_url = 'https://api.trello.com/1/boards/{0}/cards/?customFieldItems=true'
    board_cards = get_data_from_trello_api(cards_on_board_url.format(board_id))
    
    return board_cards

# Creating dumps

In [None]:
def create_boards_dump(board_name, current_timestamp):
    boards = get_board_by_name(board_name)

    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_board.json'
        
    write_json(name_of_dump, boards)
    
    return name_of_dump

In [None]:
def get_id_board_from_dump(board_name, current_timestamp):
    board = get_board_by_name_from_dump(board_name, current_timestamp)
    id_board = board['boards'][0]['id']

    return id_board

In [None]:
def create_lists_dump(board_name, board_id, current_timestamp):
    lists = get_lists_by_board(board_name, board_id, current_timestamp)
    
    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_lists.json'
    write_json(name_of_dump, lists)
        
    return name_of_dump

In [None]:
def create_custom_fields_dump(board_name, board_id, current_timestamp):
    custom_fields = get_custom_fields_by_board(board_name, board_id, current_timestamp)
        
    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_custom_field.json'
    write_json(name_of_dump, custom_fields)
        
    return name_of_dump

In [None]:
def create_cards_dump(board_name, board_id, current_timestamp):
    board_cards = get_cards_by_board(board_name, board_id, current_timestamp)    
       
    name_of_dump = f'dumps/{board_name}/{current_timestamp}/dump_cards.json'
    
    write_json(name_of_dump, board_cards)
            
    return name_of_dump

In [None]:
def create_dumps_by_name(board_name, current_timestamp):
    create_folder_for_dumping(board_name, current_timestamp)
    
    create_boards_dump(board_name, current_timestamp)
    
    board_id = get_id_board_from_dump(board_name, current_timestamp)
    
    create_lists_dump(board_name, board_id, current_timestamp)
    create_custom_fields_dump(board_name, board_id, current_timestamp)
    create_cards_dump(board_name, board_id, current_timestamp)

# Getting data from dumps

In [None]:
def get_board_by_name_from_dump(board_name, timestamp):
    board = get_data_from_dump(board_name, 'board', timestamp)
    
    return board

In [None]:
def mapping_lists_by_board_from_dump(board_name, timestamp):
    lists_json = get_data_from_dump(board_name, 'lists', timestamp)
    
    list_map = {}
    for list in lists_json:
        list_map[list['id']] = list['name']
    
    return list_map

In [None]:
def mapping_custom_fields_by_board_from_dump(board_name, timestamp):
    custom_fields_json = get_data_from_dump(board_name, 'custom_field', timestamp)
    
    custom_field_map = {}
    for custom_field in custom_fields_json:
        custom_field_map[custom_field['id']] = custom_field['name']
    
    return custom_field_map

In [None]:
def get_useful_cards_by_board(board_name, timestamp):
    cards_raw = get_data_from_dump(board_name, 'cards' , timestamp)
        
    fields = ('id', 'name', 'idList', 'shortUrl', 'customFieldItems')

    cards = [{key : value for key, value in card.items() if key in fields} for card in cards_raw ]

    custom_fields_map = mapping_custom_fields_by_board_from_dump(board_name, timestamp)

    lists_map = mapping_lists_by_board_from_dump(board_name, timestamp)

    useful_cards = []
    for card in cards:
        idListName = lists_map[card['idList']]
        if idListName in ['Done']:
            normalized_card = {}

            for custom_field in card['customFieldItems']:
                name = custom_fields_map[custom_field['idCustomField']]
                if name in ['Start', 'End', 'EndDev']:
                    value = custom_field['value']['date']
                    normalized_card[name] = value

            if len(normalized_card) < 3:
                raise Exception(
                    'Make sure all dates are filled in the card: Start, EndDev and End for {0}'.format(card['name']))

            normalized_card['name'] = card['name']
            normalized_card['shortUrl'] = card['shortUrl']
            normalized_card['idList'] = lists_map[card['idList']]
            useful_cards.append(normalized_card)
    
    
    
    return useful_cards

In [None]:
def create_dataframe_from_trello(board_name, timestamp):
    cards = get_useful_cards_by_board(board_name, timestamp)
    df = pd.DataFrame.from_dict(cards)

    df['dev_duration'] = (pd.to_datetime(df['EndDev']).dt.date - pd.to_datetime(df['Start']).dt.date).dt.days
    df['duration'] = (pd.to_datetime(df['End']).dt.date - pd.to_datetime(df['Start']).dt.date).dt.days
    
    df['busday_dev_duration'] = np.busday_count(
        pd.to_datetime(df['Start']).dt.date,
        pd.to_datetime(df['EndDev']).dt.date)
    
    df['busday_duration'] = np.busday_count(
        pd.to_datetime(df['Start']).dt.date,
        pd.to_datetime(df['End']).dt.date)
    
    df['quarter'] = pd.PeriodIndex(df['End'], freq='Q')
    
    df['month'] = pd.PeriodIndex(df['End'], freq='M')

    df["count"] = 1
    
    return df

In [None]:
pd.set_option('display.max_rows', None)

current_timestamp = generate_timestamp()
create_dumps_by_name('CBN', current_timestamp)
df = create_dataframe_from_trello('CBN', current_timestamp)

In [None]:
df

In [None]:
cancelled = df[(df['idList'] == 'Cancelled')]
done = df[(df['idList'] == 'Done')]

# Calculating General Estimatives

In [None]:
def get_extremes(data_frame, duration_column):
    upper_q = partial(pd.Series.quantile, q=0.95)
    lower_q = partial(pd.Series.quantile, q=0.05)

    upper_extremes = data_frame[duration_column].agg([upper_q])["quantile"]
    lower_extremes = data_frame[duration_column].agg([lower_q])["quantile"]
    
    return lower_extremes, upper_extremes

In [None]:
def calculate_estimatives_by_duration_column(data_frame, duration_column, print_results=True):
    lower_extremes, upper_extremes = get_extremes(data_frame, duration_column)
    
    done_extremes_removed = data_frame[(data_frame[duration_column] > lower_extremes) & (data_frame[duration_column] < upper_extremes)]
    mean_removed_extremes = done_extremes_removed[duration_column].mean()
    
    small_q = partial(pd.Series.quantile, q=0.25)
    small_limit = done_extremes_removed[duration_column].agg([small_q])["quantile"]
    
    small_extremes_removed = done_extremes_removed[(done_extremes_removed[duration_column] <= small_limit)]
    not_small_extremes_removed = done_extremes_removed[(done_extremes_removed[duration_column] > small_limit)]
    
    mean_small_extremes_removed = small_extremes_removed[duration_column].mean()
    mean_not_small_extremes_removed = not_small_extremes_removed[duration_column].mean()
    
    total_developed = len(data_frame)
    
    if print_results:
        features = ('lower_extremes', 'upper_extremes', 'small limit', 'Done estimate (with "extremes" removed)',
                   'Done estimate for "Small" ones', 'Done estimate for "Big" ones', 'Total_developed')
        values = (lower_extremes, upper_extremes, small_limit, mean_removed_extremes, mean_small_extremes_removed, 
                 mean_not_small_extremes_removed, total_developed)
        general_estimatives = {'Feature':features, 'Value':values}
        general_estimatives_df = pd.DataFrame(data=general_estimatives)
        display(general_estimatives_df)
    
    return done_extremes_removed

In [None]:
def calculate_estimatives(data_frame):
    display(Markdown('### Total Duration:'))
    calculate_estimatives_by_duration_column(data_frame, 'duration')
    print('\n')
    display(Markdown('### Total Dev Duration:'))
    calculate_estimatives_by_duration_column(data_frame, 'dev_duration')
    print('\n')
    display(Markdown('## BUSINESS DAY'))
    print('\n')
    display(Markdown('### Business Day Duration:'))
    calculate_estimatives_by_duration_column(data_frame, 'busday_duration')
    print('\n')
    display(Markdown('### Business Day Dev Duration:'))
    calculate_estimatives_by_duration_column(data_frame, 'busday_dev_duration')

In [None]:
def generate_table_amount_delivered_by_period(df, total_fte, period): # quarter or month
    by_period = df[[period, 'count']].groupby(period).sum('count')

    by_period_fte = pd.merge(by_period, total_fte, on=period, how='left')

    period_result = by_period_fte[[period, 'count', 'fte']].groupby([period,'count']).sum('fte')
    period_result.reset_index(drop=False, inplace=True)

    period_result['parsers_per_fte'] = period_result['count'].div(period_result['fte'])

    period_result[period] = period_result[period].astype(str)

    return period_result

In [None]:
def generate_chart_amount_delivered_by_period(df, period):
    ax = df[[period,'fte', 'parsers_per_fte']].plot(x=period, linestyle='-', marker='o', color=['orange', 'pink'])
    df[[period,'count']].plot(x=period, kind='bar', ax=ax)
    display(df)

In [None]:
def generate_chart_and_table_amount_delivered_by_period(df, total_fte, period):
    if period in ('month', 'quarter'):
        period_result = generate_table_amount_delivered_by_period(df, total_fte, period)
        generate_chart_amount_delivered_by_period(period_result, period)
    else:
        raise Exception(f'"{period}" is not defined. Must be "month" or "quarter".')

# General Estimatives

In [None]:
calculate_estimatives(done)

# Amount delivered by month

In [None]:
generate_chart_and_table_amount_delivered_by_period(df, total_fte, 'month')

# Amount delivered by quarter

In [None]:
generate_chart_and_table_amount_delivered_by_period(df, total_fte, 'quarter')

# Predict

In [None]:
def mapping_custom_fields_value_by_board_from_dump(board_name, timestamp):
    custom_fields_json = get_data_from_dump(board_name, 'custom_field', timestamp)
    
    custom_field_map = {}
    for custom_field in custom_fields_json:
        if 'options' in custom_field.keys():
            for options in custom_field['options']:
                custom_field_map[options['id']] = options['value']['text']
                #print(options)
    
    return custom_field_map

In [None]:
def get_useful_cards_by_board_predict(board_name, timestamp):
    cards_raw = get_data_from_dump(board_name, 'cards' , timestamp)
        
    fields = ('id', 'name', 'idList', 'shortUrl', 'customFieldItems')

    cards = [{key : value for key, value in card.items() if key in fields} for card in cards_raw ]

    custom_fields_map = mapping_custom_fields_by_board_from_dump(board_name, timestamp)
    options_custom_fields_map = mapping_custom_fields_value_by_board_from_dump(board_name, timestamp)
    #print(teste)

    lists_map = mapping_lists_by_board_from_dump(board_name, timestamp)

    useful_cards = []
    for card in cards:
        idListName = lists_map[card['idList']]
        if idListName in ['Done']:
            normalized_card = {}

            for custom_field in card['customFieldItems']:
                name = custom_fields_map[custom_field['idCustomField']]
                if 'value' in custom_field.keys():
                    value = custom_field['value'][list(custom_field['value'].keys())[0]]
                    normalized_card[name] = value
                elif 'idValue' in custom_field.keys():
                    name_option = options_custom_fields_map[custom_field['idValue']]
                    normalized_card[name] = name_option
                    

            normalized_card['name'] = card['name']
            normalized_card['shortUrl'] = card['shortUrl']
            normalized_card['idList'] = lists_map[card['idList']]
            useful_cards.append(normalized_card)
    
    
    
    return useful_cards

In [None]:
def create_dataframe_from_trello_predict(board_name, timestamp):
    cards = get_useful_cards_by_board_predict(board_name, timestamp)
    df = pd.DataFrame.from_dict(cards)

    df['dev_duration'] = (pd.to_datetime(df['EndDev']).dt.date - pd.to_datetime(df['Start']).dt.date).dt.days
    df['duration'] = (pd.to_datetime(df['End']).dt.date - pd.to_datetime(df['Start']).dt.date).dt.days
    
    df['busday_dev_duration'] = np.busday_count(
        pd.to_datetime(df['Start']).dt.date,
        pd.to_datetime(df['EndDev']).dt.date)
    
    df['busday_duration'] = np.busday_count(
        pd.to_datetime(df['Start']).dt.date,
        pd.to_datetime(df['End']).dt.date)
    
    df['quarter'] = pd.PeriodIndex(df['End'], freq='Q')
    
    df['month'] = pd.PeriodIndex(df['End'], freq='M')

    df["count"] = 1
    
    return df

In [None]:
pd.set_option('display.max_rows', None)

current_timestamp = generate_timestamp()
create_dumps_by_name('CBN', current_timestamp)
df_predict = create_dataframe_from_trello_predict('CBN', current_timestamp)
df_predict

In [None]:
df_predict = df_predict[pd.notna(df_predict['Size/Complexity'] )]
q_low = df_predict["duration"].quantile(0.01)
q_hi  = df_predict["duration"].quantile(0.93)

df_predict = df_predict[(df_predict["duration"] < q_hi) & (df_predict["duration"] > q_low)]
df_predict

In [None]:
# Separando features e labels
features = df_predict.drop(['Buganizer', 'name', 'shortUrl', 'End', 'idList', 'CL', 
                            'Commit Date (by Google)', 'Date requested (by Google)', 
                            'EndDev', 'Start', 'quarter', 'month', 'Customers', 
                            'busday_dev_duration', 'busday_duration', 'dev_duration', 
                            'duration', 'count', 'Size/Complexity'], 1)
labels = df_predict['duration']
features

In [None]:
features_dummies = pd.get_dummies(features)
features_dummies.dtypes

In [None]:
features_dummies.dropna(inplace=True)
features_dummies

In [None]:

#Escolhendo as melhores features com Kbest

features_list = tuple(features_dummies.columns)

k_best_features = SelectKBest(k='all')
k_best_features.fit_transform(features_dummies, labels)
k_best_features_scores = k_best_features.scores_
raw_pairs = zip(features_list[1:], k_best_features_scores)
ordered_pairs = list(reversed(sorted(raw_pairs, key=lambda x: x[1])))

k_best_features_final = dict(ordered_pairs)

print ("Features:")
print (k_best_features_final)

In [None]:
best_features = list(k_best_features_final.keys())[:12]
features_dummies = features_dummies.loc[:,best_features]

In [None]:
#Separando os dados de treino teste
X_train, X_test, y_train, y_test = train_test_split(features_dummies, labels, test_size=0.15, random_state=10)

print( len(X_train), len(y_train))
print( len(X_test), len(y_test))

In [None]:
# Treinamento usando regressão linear
lr = linear_model.LinearRegression()

lr.fit(X_train, y_train)

pred= lr.predict(X_test)

cd =r2_score(y_test, pred)
print(f'Coeficiente de determinação:{cd * 100:.2f}')

In [None]:
# Rede neural padrão
rn = MLPRegressor(max_iter=2000)

rn.fit(X_train, y_train)
pred= rn.predict(X_test)

#cd = rn.score(X_test, y_test)
cd =r2_score(y_test, pred)
print(f'Coeficiente de determinação:{cd * 100:.2f}')

In [None]:
# Rede neural com ajuste hyper parameters

rn_new = MLPRegressor()

parameter_space = {
        'hidden_layer_sizes': [(i,) for i in list(range(1, 21))],
        'activation': ['tanh', 'relu'],
        'solver': ['sgd', 'adam', 'lbfgs'], 
        'alpha': [0.0001, 0.05],
        'learning_rate': ['constant', 'adaptive'],
    }

search = GridSearchCV(rn_new, parameter_space, n_jobs=-1, cv=5)

search.fit(X_train,y_train)
clf = search.best_estimator_

pred = search.predict(X_test)

#cd = search.score(X_test, y_test)
cd =r2_score(y_test, pred)
print(search.best_params_)
print(f'Coeficiente de determinação:{cd * 100:.2f}')

In [None]:
# Rede neural com ajuste hyper parameters fixos
rn_adjust = MLPRegressor(activation='tanh', alpha=0.05, hidden_layer_sizes=(3,), learning_rate='constant', solver='sgd')
rn_adjust.fit(X_train, y_train)

pred = rn_adjust.predict(X_test)

#cd = rn_adjust.score(X_test, y_test)
cd =r2_score(y_test, pred)
print(f'Coeficiente de determinação:{cd * 100:.2f}')

In [None]:
# Executando a previsão

pred_lr = lr.predict(X_test)

pred_rn = rn.predict(X_test)

pred_rn_adjust_search = clf.predict(X_test)

pred_rn_adjust = rn_adjust.predict(X_test)

pred_lr_features = lr.predict(features_dummies)

In [None]:
def model_accuracy(y_test, y_pred):
    df_diff = np.absolute(np.round(y_test - y_pred))
    count = 0
    for value in df_diff:
        if value <= 7:
            count += 1
    accuracy = 100 * (count/len(df_diff))
    return accuracy

In [None]:
print('Predição Linear Regression')
print('Erro médio quadrático: ', mean_squared_error(y_test, pred_lr))
print('Erro médio absoluto: ', mean_absolute_error(y_test, pred_lr))
print('Média real: ', y_test.mean())
print('Média predição: ', pred_lr.mean())
print('Acurácia: ', model_accuracy(y_test, pred_lr), '%')
display(pd.DataFrame({'real':y_test, 'previsao':pred_lr}))

In [None]:
print('Predição Rede Neural Ajustada')
print('Erro médio quadrático: ', mean_squared_error(y_test, pred_rn_adjust))
print('Erro médio absoluto: ', mean_absolute_error(y_test, pred_rn_adjust))
print('Média real: ', y_test.mean())
print('Média predição: ', pred_rn_adjust.mean())
print('Acurácia: ', model_accuracy(y_test, pred_rn_adjust), '%')
display(pd.DataFrame({'real':y_test, 'previsao':pred_rn_adjust}))

In [None]:
print('Predição Rede Neural Padrão')
print('Erro médio quadrático: ', mean_squared_error(y_test, pred_rn))
print('Erro médio absoluto: ', mean_absolute_error(y_test, pred_rn))
print('Média real: ', y_test.mean())
print('Média predição: ', pred_rn.mean())
print('Acurácia: ', model_accuracy(y_test, pred_rn), '%')
display(pd.DataFrame({'real':y_test, 'previsao':pred_rn}))

In [None]:
print('Predição Rede Neural Ajustada Busca')
print('Erro médio quadrático: ', mean_squared_error(y_test, pred_rn_adjust_search))
print('Erro médio absoluto: ', mean_absolute_error(y_test, pred_rn_adjust_search))
print('Média real: ', y_test.mean())
print('Média predição: ', pred_rn_adjust_search.mean())
print('Acurácia: ', model_accuracy(y_test, pred_rn_adjust_search), '%')
display(pd.DataFrame({'real':y_test, 'previsao':pred_rn_adjust_search}))

In [None]:
print('Predição Linear Regression all labels')
print('Erro médio quadrático: ', mean_squared_error(labels, pred_lr_features))
print('Erro médio absoluto: ', mean_absolute_error(labels, pred_lr_features))
print('Média real: ', labels.mean())
print('Média predição: ', pred_lr_features.mean())
print('Acurácia: ', model_accuracy(labels, pred_lr_features), '%')
display(pd.DataFrame({'real':labels, 'previsao':pred_lr_features}))

In [None]:
X_train_table, X_test_table, y_train_table, y_test_table = train_test_split(df_predict, df_predict['duration'], test_size=0.15, random_state=10)

In [None]:
X_test_table_cp = X_test_table
X_test_table = X_test_table.drop(['duration', 'quarter', 'month', 'End'], 1)

In [None]:
X_test_table['duration'] = np.absolute(np.round(pred_lr))

X_test_table['End'] = pd.to_datetime(X_test_table['Start']) + pd.to_timedelta(X_test_table['duration'].astype(np.int),'D')
    
X_test_table['quarter'] = pd.PeriodIndex(X_test_table['End'], freq='Q')
    
X_test_table['month'] = pd.PeriodIndex(X_test_table['End'], freq='M')



X_test_table

In [None]:
df_full = pd.concat([X_train_table, X_test_table])
df_full

In [None]:
print('Real')
generate_chart_and_table_amount_delivered_by_period(df_predict, total_fte, 'month')
print('Previsão')
generate_chart_and_table_amount_delivered_by_period(df_full, total_fte, 'month')

In [None]:
print('Real apenas teste')
generate_chart_and_table_amount_delivered_by_period(X_test_table_cp, total_fte, 'month')
print('Previsão apenas teste')
generate_chart_and_table_amount_delivered_by_period(X_test_table, total_fte, 'month')