# Config

> Esta seção tem configurações gerais do projeto, como importações de pacotes, carregamento de variáveis de ambiente entre outros.

In [1]:
# Importação de módulos.
import requests
import pandas as pd
import warnings
import json
import yaml
import gzip
import os
import glob
import importlib
#from sqlalchemy import create_engine, inspect, text
#from sqlalchemy import create_engine
from datetime import datetime, timedelta
from work.bdt_data_integration.src.utils import Utils, WebhookNotifier
from work.bdt_data_integration.src.writers import DataWriter
from work.bdt_data_integration.src.streams import BenditoAPIStream
from work.bdt_data_integration.src.loaders import PostgresLoader
from work.bdt_data_integration.src.transformers import NotionTransformer

In [2]:
# Carregamento do arquivo config.yaml
config = Utils.load_config()

# Carregamento das variáveis de ambiente
token = os.environ['BENDITO_BI_TOKEN']
host = os.environ["NEON_HOST"]
user =os.environ["NEON_ROOT_USER"]
password = os.environ["NEON_ROOT_PASSWORD"]
db_name = os.environ["NEON_DATABASE_NAME"]
notifier_url = os.environ['MAKE_NOTIFICATION_WEBHOOK']

In [3]:
source = 'bendito'

In [4]:
notifier = WebhookNotifier(url=notifier_url,pipeline='bendito_pipeline')
notifier.pipeline_start()

Accepted


# Extract

> Nesta seção é realizada a extração de dados brutos da API, são instanciados uma stream e um writer, que serão responsáveis pela conexão com a fonte de dados e com a escrita dos dados, respectivamente.

In [5]:
try:
    writer = DataWriter(
        source='bendito',
        stream='btd_integration_job_info', 
        config=config
        )
except Exception as e:
    notifier.pipeline_error(e)
    raise e

> Usa-se o parâmetro compression = True para que a saída da stream aconteça em formato gzip, para economizar espaço.

In [6]:
# Instanciar Stream para a extração dos dados
try:
    stream = BenditoAPIStream(
        identifier = source,
        token = token,
        writer = writer)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [7]:
#Executar a extração dos dados
try:
    query = "select * from integration_job_info"
    page_size = 200
    separator = ";"
    records = stream.run(query=query, page_size=page_size, separator=separator)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [8]:
output = writer.get_output_file_path('bendito', 'integration_job_info',target_layer='raw') + '.csv'
os.makedirs(os.path.dirname(output), exist_ok=True)
records.to_csv(output, index=False)

# Transform

> Nesta etapa ocorre a transformação dos dados, como obtenção de chaves e valores, alteração da estrutura, mudanças no texto e aí por diante.

## bdt__integration_job_info

### processing

In [9]:
try:
    output = writer.get_output_file_path('bendito', 'bdt__integration_job_info',target_layer='processing') + '.csv'
    os.makedirs(os.path.dirname(output), exist_ok=True)
    records.to_csv(output, index=False)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

### staging

In [10]:
try:
    # Carregar os arquivos em processing
    bdt__integration_job_info = pd.read_csv("data/processing/bendito/bdt__integration_job_info.csv")
    bdt__integration_job_info
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [11]:
bdt__integration_job_info = (lambda: _deepnote_execute_sql('select id_job,\n       id_customer,\n       integration_type,\n       customer_name,\n       status_id as id_status,\n       status_name as status,\n       job_type,\n       created_at,\n       updated_at,\n       records_to_process,\n       records_processed,\n       duration,\n       duration_pretty,\n       avg_process_time\nfrom bdt__integration_job_info', 'SQL_DEEPNOTE_DATAFRAME_SQL', audit_sql_comment='', sql_cache_mode='cache_disabled') if '_deepnote_execute_sql' in globals() else _dntk.execute_sql('select id_job,\n       id_customer,\n       integration_type,\n       customer_name,\n       status_id as id_status,\n       status_name as status,\n       job_type,\n       created_at,\n       updated_at,\n       records_to_process,\n       records_processed,\n       duration,\n       duration_pretty,\n       avg_process_time\nfrom bdt__integration_job_info', 'SQL_DEEPNOTE_DATAFRAME_SQL', audit_sql_comment='', sql_cache_mode='cache_disabled'))()
bdt__integration_job_info

Unnamed: 0,id_job,id_customer,integration_type,customer_name,id_status,status,job_type,created_at,updated_at,records_to_process,records_processed,duration,duration_pretty,avg_process_time
0,34596,481,Deltacon,MIX COMERCIO DE SUPRIMENTOS E UTENSILIOS LTDA,3,Finalizado,ImportProduct,2024-09-10 13:14:24,2024-09-10 13:34:37,206,206,1213,00 days 00 hours 20 minutes 13 seconds,5.89
1,34595,481,Deltacon,MIX COMERCIO DE SUPRIMENTOS E UTENSILIOS LTDA,3,Finalizado,ImportProduct,2024-09-06 14:28:21,2024-09-06 14:43:28,213,213,906,00 days 00 hours 15 minutes 06 seconds,4.25
2,34594,481,Deltacon,MIX COMERCIO DE SUPRIMENTOS E UTENSILIOS LTDA,3,Finalizado,ImportProduct,2024-09-06 13:55:48,2024-09-06 14:10:06,213,213,857,00 days 00 hours 14 minutes 17 seconds,4.02
3,34589,14,Bling,Teste Bling,3,Finalizado,ImportStock,2024-09-03 18:58:45,2024-09-03 18:59:04,233,233,18,00 days 00 hours 00 minutes 18 seconds,0.08
4,34588,14,Bling,Teste Bling,3,Finalizado,ImportStock,2024-09-03 18:23:40,2024-09-03 18:24:04,233,233,24,00 days 00 hours 00 minutes 24 seconds,0.10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8995,65,21,Omie,CONFEITAR,3,Finalizado,ImportClient,2020-06-03 03:04:15,2020-06-03 03:20:17,2149,2149,961,00 days 00 hours 16 minutes 01 seconds,0.45
8996,20,25,Omie,IKASALIMP,3,Finalizado,ImportProduct,2020-05-29 18:32:34,2020-05-29 19:36:57,4917,4919,3862,00 days 01 hours 04 minutes 22 seconds,0.79
8997,18,21,Omie,CONFEITAR,3,Finalizado,ImportProduct,2020-05-28 21:15:10,2020-05-28 21:15:22,2,2,12,00 days 00 hours 00 minutes 12 seconds,6.00
8998,11,21,Omie,CONFEITAR,3,Finalizado,ImportProduct,2020-05-28 16:37:27,2020-05-28 18:04:05,5392,5392,5197,00 days 01 hours 26 minutes 37 seconds,0.96


In [12]:
try:
    output = writer.get_output_file_path('bendito', 'bdt__integration_job_info',target_layer='staging') + '.csv'
    os.makedirs(os.path.dirname(output), exist_ok=True)
    bdt__integration_job_info.to_csv(output, index=False)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

# Load

In [13]:
# Correcting the instantiation of the PostgresLoader by ensuring parameter names match expected signature
try:
    loader = PostgresLoader(user=user, password=password, host=host, db_name=db_name)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [14]:
try:
    bdt__integration_job_info = pd.read_csv("data/staging/bendito/bdt__integration_job_info.csv")
    loader.load_data(dataframe=bdt__integration_job_info, target_table='bdt__integration_job_info', mode='replace', target_schema='public')
except Exception as e:
    notifier.pipeline_error(e)
    raise e

# Callbacks

In [15]:
notifier.pipeline_end()

Accepted


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=022588a7-e3ac-4acd-8e50-3c39b9590c40' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>