# Config

> Esta seção tem configurações gerais do projeto, como importações de pacotes, carregamento de variáveis de ambiente entre outros.

In [1]:
# Importação de módulos.
import requests
import pandas as pd
import warnings
import json
import yaml
import gzip
import os
import glob
import importlib
#from sqlalchemy import create_engine, inspect, text
#from sqlalchemy import create_engine
from datetime import datetime, timedelta
from work.bdt_data_integration.src.utils import Utils, WebhookNotifier
from work.bdt_data_integration.src.writers import DataWriter
from work.bdt_data_integration.src.extractors import BenditoAPIExtractor
from work.bdt_data_integration.src.loaders import PostgresLoader
from work.bdt_data_integration.src.transformers import NotionTransformer

In [2]:
# Carregamento do arquivo config.yaml
config = Utils.load_config()
schema = config.get("PRODUCTION_SCHEMA")

# Carregamento das variáveis de ambiente
token = os.environ['BENDITO_BI_TOKEN']
host = os.environ["NEON_HOST"]
user =os.environ["NEON_ROOT_USER"]
password = os.environ["NEON_ROOT_PASSWORD"]
db_name = os.environ["NEON_DATABASE_NAME"]
notifier_url = os.environ['MAKE_NOTIFICATION_WEBHOOK']

In [3]:
pipeline = 'bendito_pipeline'
source = 'bendito'

In [4]:
notifier = WebhookNotifier(url=notifier_url,pipeline=pipeline)
# notifier.pipeline_start()

# Integration_job_info

In [5]:
stream_name = 'integration_job_info'
stream_table_name = f'bdt__integration_job_info'

## Extract

In [6]:
try:
    writer = DataWriter(
        source=source,
        stream=stream_name, 
        config=config
        )
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [7]:
# Instanciar Stream para a extração dos dados
try:
    extractor = BenditoAPIExtractor(
        identifier = source,
        token = token,
        writer = writer)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [9]:
#Executar a extração dos dados
try:
    query = "select * from integration_job_info"
    page_size = 200
    separator = ";"
    records = extractor.run(query=query, page_size=page_size, separator=separator)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [10]:
output = writer.get_output_file_path(source, stream_name,target_layer='raw') + '.csv'
os.makedirs(os.path.dirname(output), exist_ok=True)
records.to_csv(output, index=False)

## Transform

### processing

In [11]:
try:
    output = writer.get_output_file_path(source, stream_name, target_layer='processing') + '.csv'
    os.makedirs(os.path.dirname(output), exist_ok=True)
    records.to_csv(output, index=False)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

### staging

In [12]:
try:
    # Carregar os arquivos em processing
    processed_data_path = writer.get_output_file_path(source, stream_name, target_layer='processing') + '.csv'
    processed_data = pd.read_csv(processed_data_path)
    processed_data
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [13]:
staged_data = (lambda: _deepnote_execute_sql('select id_job,\n       id_customer,\n       integration_type,\n       customer_name,\n       status_id as id_status,\n       status_name as status,\n       job_type,\n       created_at,\n       updated_at,\n       records_to_process,\n       records_processed,\n       duration,\n       duration_pretty,\n       avg_process_time\nfrom processed_data', 'SQL_DEEPNOTE_DATAFRAME_SQL', audit_sql_comment='', sql_cache_mode='cache_disabled') if '_deepnote_execute_sql' in globals() else _dntk.execute_sql('select id_job,\n       id_customer,\n       integration_type,\n       customer_name,\n       status_id as id_status,\n       status_name as status,\n       job_type,\n       created_at,\n       updated_at,\n       records_to_process,\n       records_processed,\n       duration,\n       duration_pretty,\n       avg_process_time\nfrom processed_data', 'SQL_DEEPNOTE_DATAFRAME_SQL', audit_sql_comment='', sql_cache_mode='cache_disabled'))()
staged_data

Unnamed: 0,id_job,id_customer,integration_type,customer_name,id_status,status,job_type,created_at,updated_at,records_to_process,records_processed,duration,duration_pretty,avg_process_time
0,34603,13,Tiny,Teste Tiny,3,Finalizado,ImportClient,2024-09-12 14:51:22,2024-09-12 14:53:12,38,38,109,00 days 00 hours 01 minutes 49 seconds,2.87
1,34602,13,Tiny,Teste Tiny,3,Finalizado,ImportClient,2024-09-12 14:49:31,2024-09-12 14:51:13,38,38,101,00 days 00 hours 01 minutes 41 seconds,2.66
2,34601,13,Tiny,Teste Tiny,3,Finalizado,ImportClient,2024-09-12 14:35:31,2024-09-12 14:37:22,38,38,111,00 days 00 hours 01 minutes 51 seconds,2.92
3,34599,13,Tiny,Teste Tiny,3,Finalizado,ImportClient,2024-09-12 13:47:30,2024-09-12 13:51:05,62,62,214,00 days 00 hours 03 minutes 34 seconds,3.45
4,34596,481,Deltacon,MIX COMERCIO DE SUPRIMENTOS E UTENSILIOS LTDA,3,Finalizado,ImportProduct,2024-09-10 13:14:24,2024-09-10 13:34:37,206,206,1213,00 days 00 hours 20 minutes 13 seconds,5.89
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8986,65,21,Omie,CONFEITAR,3,Finalizado,ImportClient,2020-06-03 03:04:15,2020-06-03 03:20:17,2149,2149,961,00 days 00 hours 16 minutes 01 seconds,0.45
8987,20,25,Omie,IKASALIMP,3,Finalizado,ImportProduct,2020-05-29 18:32:34,2020-05-29 19:36:57,4917,4919,3862,00 days 01 hours 04 minutes 22 seconds,0.79
8988,18,21,Omie,CONFEITAR,3,Finalizado,ImportProduct,2020-05-28 21:15:10,2020-05-28 21:15:22,2,2,12,00 days 00 hours 00 minutes 12 seconds,6.00
8989,11,21,Omie,CONFEITAR,3,Finalizado,ImportProduct,2020-05-28 16:37:27,2020-05-28 18:04:05,5392,5392,5197,00 days 01 hours 26 minutes 37 seconds,0.96


In [14]:
try:
    output = writer.get_output_file_path(source, stream_table_name ,target_layer='staging') + '.csv'
    os.makedirs(os.path.dirname(output), exist_ok=True)
    staged_data.to_csv(output, index=False)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

## Load

In [15]:
# Correcting the instantiation of the PostgresLoader by ensuring parameter names match expected signature
try:
    loader = PostgresLoader(user=user, password=password, host=host, db_name=db_name)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [16]:
try:
    staged_data_path = writer.get_output_file_path(source, stream_table_name ,target_layer='staging') + '.csv'
    staged_data = pd.read_csv(staged_data_path)
    loader.load_data(dataframe=staged_data, target_table=stream_table_name, mode='replace', target_schema=schema)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

# customer_stats

In [17]:
stream_name = 'customer_stats'
stream_table_name = f'bdt__customer_stats'

## Extract

In [18]:
try:
    writer = DataWriter(
        source=source,
        stream=stream_name, 
        config=config
        )
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [19]:
# Instanciar Stream para a extração dos dados
try:
    stream = BenditoAPIStream(
        identifier = source,
        token = token,
        writer = writer)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

Accepted


NameError: name 'BenditoAPIStream' is not defined

In [None]:
#Executar a extração dos dados
try:
    query = "select * from v_customer_stats"
    page_size = 200
    separator = ";"
    records = stream.run(query=query, page_size=page_size, separator=separator)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [None]:
output = writer.get_output_file_path(source, stream_name,target_layer='raw') + '.csv'
os.makedirs(os.path.dirname(output), exist_ok=True)
records.to_csv(output, index=False)

## Transform

### processing

In [None]:
try:
    output = writer.get_output_file_path(source, stream_name,target_layer='processing') + '.csv'
    os.makedirs(os.path.dirname(output), exist_ok=True)
    records.to_csv(output, index=False)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

### staging

In [None]:
try:
    # Carregar os arquivos em processing
    processed_data_path = writer.get_output_file_path(source, stream_table_name,target_layer='processing') + '.csv'
    processed_data = pd.read_csv(processed_data_path)
    processed_data
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [None]:
staged_data = (lambda: _deepnote_execute_sql('SELECT\n    id_customer,\n    razao_social,\n    nome_fantasia,\n    customer_interno,\n    status,\n    criado_em,\n    encerrado_em,\n    valor_plano,\n    qtd_invoices,\n    qtd_pedidos_venda_confirmados,\n    vr_pedidos_venda_confirmados,\n    qtd_clientes,\n    qtd_produtos,\n    qtd_tabelas_preco,\n    qtd_politicas_comerciais,\n    usuarios_contratados,\n    usuarios_cadastrados,\n    usuarios_ativos,\n    usuarios_inativos,\n    qtd_usuarios_excedentes,\n    vr_usuarios_excedentes\nFROM\n    processed_data', 'SQL_DEEPNOTE_DATAFRAME_SQL', audit_sql_comment='', sql_cache_mode='cache_disabled') if '_deepnote_execute_sql' in globals() else _dntk.execute_sql('SELECT\n    id_customer,\n    razao_social,\n    nome_fantasia,\n    customer_interno,\n    status,\n    criado_em,\n    encerrado_em,\n    valor_plano,\n    qtd_invoices,\n    qtd_pedidos_venda_confirmados,\n    vr_pedidos_venda_confirmados,\n    qtd_clientes,\n    qtd_produtos,\n    qtd_tabelas_preco,\n    qtd_politicas_comerciais,\n    usuarios_contratados,\n    usuarios_cadastrados,\n    usuarios_ativos,\n    usuarios_inativos,\n    qtd_usuarios_excedentes,\n    vr_usuarios_excedentes\nFROM\n    processed_data', 'SQL_DEEPNOTE_DATAFRAME_SQL', audit_sql_comment='', sql_cache_mode='cache_disabled'))()
staged_data

Unnamed: 0,id_customer,razao_social,nome_fantasia,customer_interno,status,criado_em,encerrado_em,valor_plano,qtd_invoices,qtd_pedidos_venda_confirmados,...,qtd_clientes,qtd_produtos,qtd_tabelas_preco,qtd_politicas_comerciais,usuarios_contratados,usuarios_cadastrados,usuarios_ativos,usuarios_inativos,qtd_usuarios_excedentes,vr_usuarios_excedentes
0,1,BENDITA CRIATIVIDADE COMERCIO DE DECORACAO LTDA,BENDITA FEITURA,False,Ativo,02/06/2020 05:18:17,01/08/2024 20:41:07,2468.80,743,536,...,2297,5027,3,3,105,25,25,0,0,0.0
1,2,Mail Inc,,False,Ignorar,02/07/2020 19:23:21,,149.90,0,0,...,0,0,3,0,5,2,2,0,0,0.0
2,3,Macnario,Macnario Sistemas,True,Ativo,02/10/2020 16:01:17,,599.40,209,129,...,226,335,45,10,155,76,59,17,0,0.0
3,4,Mailmink,,False,Ignorar,02/13/2020 17:47:46,,149.90,0,0,...,1,1,3,0,5,2,2,0,0,0.0
4,5,GUIDUGLI DESIGN DE INTERIORES LTDA,GUIDUGLI DESIGN - GUIDECASA,False,Ignorar,02/21/2020 23:36:40,,149.90,0,0,...,1,1,3,0,5,2,2,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
659,739,Automaxx Borba Alves IndÃºstria de PeÃ§as,,False,Ativo,07/31/2024 17:19:49,,1108.45,0,0,...,380,0,0,0,20,5,5,0,0,0.0
660,741,Automaxx (base inativa),,False,Inativo,07/31/2024 20:14:32,,1108.45,0,0,...,0,0,0,0,17,4,4,0,0,0.0
661,742,Urs - Materiais Para Construcao LTDA,Rio Sul Distribuidora,False,Ativo,08/01/2024 17:59:18,,549.90,0,0,...,0,0,0,0,3,4,4,0,1,39.9
662,743,PIGATTO - COMERCIO DE PAPEIS ESPECIAIS LTDA,,False,Ativo,08/01/2024 18:03:57,,199.50,0,0,...,2167,504,0,0,5,4,4,0,0,0.0


In [None]:
try:
    output = writer.get_output_file_path(source, stream_table_name,target_layer='staging') + '.csv'
    os.makedirs(os.path.dirname(output), exist_ok=True)
    staged_data.to_csv(output, index=False)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

## Load

In [None]:
# Correcting the instantiation of the PostgresLoader by ensuring parameter names match expected signature
try:
    loader = PostgresLoader(user=user, password=password, host=host, db_name=db_name)
except Exception as e:
    notifier.pipeline_error(e)
    raise e

In [None]:
try:
    staged_data_path = writer.get_output_file_path(source, stream_table_name,target_layer='staging') + '.csv'
    staged_data = pd.read_csv(staged_data_path)
    loader.load_data(dataframe=staged_data, target_table='bdt__customer_stats', mode='replace', target_schema='public')
except Exception as e:
    notifier.pipeline_error(e)
    raise e

# Callbacks

In [None]:
notifier.pipeline_end()

Accepted


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=022588a7-e3ac-4acd-8e50-3c39b9590c40' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>