In [1]:
import os
import json
from dotenv import load_dotenv
from langchain_core.documents import Document
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_pinecone import PineconeVectorStore
import pinecone
from pinecone import Pinecone, ServerlessSpec


For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  from langchain_pinecone.vectorstores import Pinecone, PineconeVectorStore


#### Carregar variavéis do .env

In [2]:
load_dotenv()
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

#### Tratando caso não encontre a key "PINECONE_API_KEY"


In [3]:
if not PINECONE_API_KEY:
  raise ValueError("PINECONE_API_KEY não encontrada")

#### Inicializar Pinecone

In [4]:
INDEX_NAME = "turismo"
DIMENSION = 384

# Cria instância Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)

# Criar índice se não existir
existing_indexes = [idx["name"] for idx in pc.list_indexes()]
if INDEX_NAME not in existing_indexes:
    pc.create_index(
        name=INDEX_NAME,
        dimension=DIMENSION,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

index = pc.Index(INDEX_NAME)

#### Função para carregar JSONs como docs

In [5]:
def load_city_data(file_path: str):
    docs = []
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    for item in data:
        content = f"{item['nome']} - {item['descricao']}"
        metadata = {
            "cidade": item["cidade"],
            "categoria": item["categoria"],
            "nome": item["nome"]
        }
        docs.append(Document(page_content=content, metadata=metadata))
    return docs

#### Função para criar/recarregar o vector store

In [6]:
def create_vectorstore(documents):
    # Cria embeddings 2048
    embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

    # Cria ou conecta ao índice Pinecone
    vectorstore = PineconeVectorStore.from_documents(
        documents=documents,
        embedding=embeddings_model,
        index_name=INDEX_NAME
    )
    return vectorstore

#### Função principal para carregar dados e retornar o retriever

In [7]:
def get_rag_retriever():
    rio_docs = load_city_data("../data/rio.json")
    paris_docs = load_city_data("../data/paris.json")
    all_docs = rio_docs + paris_docs

    vectorstore = create_vectorstore(all_docs)
    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
    print(f"{len(all_docs)} documentos indexados e retriever pronto.")
    return retriever

In [8]:
retriever = get_rag_retriever()

  embeddings_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")


18 documentos indexados e retriever pronto.
