In [4]:
from sqlalchemy import create_engine, MetaData

url = 'data/ecommerce.db'
engine = create_engine(f'sqlite:///{url}')

In [7]:
metadata_obj = MetaData()
metadata_obj.reflect(bind=engine)
metadata_obj.tables.keys()

dict_keys(['Clientes', 'Fornecedores', 'Funcionarios'])

In [9]:
import pandas as pd

df = pd.read_sql_table('Clientes', engine)
df

Unnamed: 0,ID_Cliente,Nome,Email,Rua_Numero,Estado,Valor_gasto
0,1,lucas moura,lucas.moura@email.com,"sitio de cardoso, 49",sao paulo,29017
1,2,julia andrade,julia.andrade@email.com,"trecho mariane teixeira, 90",tocantins,7834
2,3,rafael dias,rafael.dias@email.com,"setor de duarte, 114",paraiba,4071
3,4,carla souza,carla.souza@email.com,"recanto ana livia lopes, 53",para,17512
4,5,felipe neto,felipe.neto@email.com,"jardim de monteiro, 11",sao paulo,24307
...,...,...,...,...,...,...
95,96,ana luiza correia,heitor40@correia.net,"passarela pedro das neves, 806",para,9550
96,97,augusto nascimento,luiz-fernando91@da.br,"nucleo teixeira, 1",mato grosso do sul,22149
97,98,dr. caue pires,azevedomelissa@bol.com.br,ladeira peixoto,parana,33236
98,99,livia novaes,caldeiramelissa@costa.com,"travessa lima, 82",ceara,42653


In [10]:
df = pd.read_sql_table('Fornecedores', engine)
df

Unnamed: 0,ID_Fornecedores,Nome,Contato,Telefone,Email,Rua,Bairro,Cidade,Estado,cep
0,1,technex solutions,lucas martins,11912345678,info@technexsolutions.com,"rua das inovacoes, 123",centro,sao paulo,sp,1000000
1,2,quantum devices,isabela oliveira,11923456789,contact@quantumdevices.net,"avenida tecnologica, 456",jardim futuro,guarulhos,sp,20000000
2,3,innovatetech corporation,rafael pereira,11934567890,support@innovatetechcorp.com,"alameda do progresso, 789",centro das inovacoes,santo andre,sp,30000000
3,4,electrogadget innovations,ana silva,11945678901,sales@electrogadgetinnovations.com,"rua eletronica, 234",vila progresso,sao caetano,sp,40000000
4,5,nexustech systems,pedro almeida,11956789012,info@nexustechsystems.com,"travessa da conexao, 567",parque das maravilhas,sao paulo,sp,88000000
5,6,quantumware technologies,camila santos,11967890123,inquiries@quantumwaretech.com,"rua quantica, 890",bairro techville,guarulhos,sp,90000000


In [11]:
df = pd.read_sql_table('Funcionarios', engine)
df

Unnamed: 0,ID_Funcionarios,Nome,Cargo,DataContratacao,Telefone,Email,Rua,Bairro,Cidade,Estado,cep
0,1,carla souza,vendas,2022-03-15,1134567890,carla.souza@email.com,"rua das flores, 123",vila esperanca,sao paulo,sp,1000001
1,2,paulo silva,vendas,2022-05-10,1123456789,paulo.silva@email.com,"avenida do sol, 456",jardim da harmonia,sao paulo,sp,20000001
2,3,marta rocha,gerencia,2022-07-20,1145678901,marta.rochaa@email.com,"praca da liberdade, 789",parque dos sabias,sao paulo,sp,1000001
3,4,sofia ramos,tecnico,2022-01-12,1198765432,sofia.ramos@email.com,"alameda dos pinheiros, 234",floresta dos pinheiros,sao paulo,sp,40000001
4,5,joao pereira,estoque,2022-09-05,1123456789,joao.pereira@email.com,"travessa dos girassois, 567",centro,sao paulo,sp,20000001
5,6,talita borges,financeiro,2022-04-02,1156789012,talita.borges@email.com,"rodovia do mar, 890",nova aurora,sao paulo,sp,90000001
6,7,larissa freitas,vendas,2023-01-10,1187654321,larissa.freitas@email.com,"beco das estrelas, 1234",centro,sao paulo,sp,20000001


In [12]:
import os
from dotenv import load_dotenv
load_dotenv()

GROQ_API_KEY = os.getenv('GROQ_API_KEY')

In [15]:
from llama_index.core import Settings
from llama_index.llms.groq import Groq
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

llama_model = 'llama-3.1-70b-versatile'
hf_embedings_model = 'BAAI/bge-m3'

Settings.llm = Groq(model="llama3-70b-8192", api_key=GROQ_API_KEY)
Settings.embed_model = HuggingFaceEmbedding(model_name=hf_embedings_model)

In [16]:
from llama_index.core import SQLDatabase
from llama_index.core.objects import SQLTableNodeMapping

sql_dtabase = SQLDatabase(
    engine=engine
)

table_node_map = SQLTableNodeMapping(
    sql_database=sql_dtabase
)

In [17]:
from llama_index.core.objects import SQLTableSchema

table_schema_obj = []

for table in metadata_obj.tables.keys():
    table_schema_obj.append(SQLTableSchema(table_name=table))


In [18]:
from llama_index.core.objects import ObjectIndex
from llama_index.core import VectorStoreIndex

obj_index = ObjectIndex.from_objects(table_schema_obj, table_node_map, VectorStoreIndex)

In [19]:
obj_retriever = obj_index.as_retriever(similarity_top_k=1)

In [22]:
from llama_index.core.indices.struct_store.sql_query import SQLTableRetrieverQueryEngine

query_engine = SQLTableRetrieverQueryEngine(sql_database=sql_dtabase, table_retriever=obj_retriever)

In [25]:
response = query_engine.query('Quais os estados mais frequentes na tabela de clientes?')
print(response)

Based on the query results, the top 3 most frequent states in the clients table are:

1. Paraná and Pará, both with a frequency of 7.
2. Rondônia, with a frequency of 6.

These three states account for the highest number of clients in the table.


In [27]:
print(response.metadata)

{'0d954639-0dbc-413c-88bc-35966812f873': {'sql_query': 'SELECT Estado, COUNT(*) as Frequencia FROM Clientes GROUP BY Estado ORDER BY Frequencia DESC;', 'result': [('parana', 7), ('para', 7), ('rondonia', 6), ('tocantins', 5), ('sergipe', 5), ('rio de janeiro', 5), ('maranhao', 5), ('goias', 5), ('ceara', 5), ('amapa', 5), ('roraima', 4), ('rio grande do norte', 4), ('piaui', 4), ('pernambuco', 4), ('distrito federal', 4), ('sao paulo', 3), ('minas gerais', 3), ('mato grosso do sul', 3), ('bahia', 3), ('santa catarina', 2), ('rio grande do sul', 2), ('paraiba', 2), ('mato grosso', 2), ('amazonas', 2), ('espirito santo', 1), ('alagoas', 1), ('acre', 1)], 'col_keys': ['Estado', 'Frequencia']}, 'sql_query': 'SELECT Estado, COUNT(*) as Frequencia FROM Clientes GROUP BY Estado ORDER BY Frequencia DESC;', 'result': [('parana', 7), ('para', 7), ('rondonia', 6), ('tocantins', 5), ('sergipe', 5), ('rio de janeiro', 5), ('maranhao', 5), ('goias', 5), ('ceara', 5), ('amapa', 5), ('roraima', 4), ('

In [29]:
response = query_engine.query('Quais as fornecedoras localizadas na cidadde de São Paulo?')
print(response)

Since the SQL response is an empty array `[]`, it means that there are no suppliers located in the city of São Paulo. Therefore, the response to the query would be:

"Nenhuma fornecedora foi encontrada na cidade de São Paulo."


In [30]:
response = query_engine.query('Quais funcionarios localizados na cidade de São Paulo?')
print(response)

Since the SQL response is an empty array `[]`, it means that there are no funcionarios (employees) located in the city of São Paulo. Therefore, the response to the query would be:

"Nenhum funcionário foi encontrado na cidade de São Paulo." (No employees were found in the city of São Paulo.)
