In [1]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.ollama import OllamaEmbedding
import chromadb
from typing import List
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.core import load_index_from_storage

In [2]:
reader = SimpleDirectoryReader(input_dir="documentos")

docs = reader.load_data()

node_parser = SentenceSplitter(chunk_size=1200)

nodes = node_parser.get_nodes_from_documents(docs)

In [None]:
embed_model = OllamaEmbedding(
    model_name="phi3:mini",
    base_url="http://localhost:11434",
    ollama_additional_kwargs={"mirostat": 0}
)

In [4]:
class LlamaIndexOllamaEmbeddingFunction:
    def __init__(self, embed_model, name):
        self.embed_model = embed_model
        self.inner_name = name
    
    def __call__(self, input: List[str]) -> List[List[float]]:
        try:
            return self.embed_model.get_text_embedding_batch(input)
        except Exception as e:
            print(f"Error in embedding function: {e}")
            return [[] for _ in input]
    
    def name(self) -> str:
        return self.inner_name


embedding_function = LlamaIndexOllamaEmbeddingFunction(embed_model, "llama-index-ollama-embedding")

In [5]:
db = chromadb.PersistentClient(path="./chroma-db")

collection_name = "documentos"

try:
    chroma_collection = db.get_or_create_collection(name=collection_name, embedding_function=embedding_function)

except Exception as e:
    print(f'erro ao carregar ou criar coleção: {e}')

2025-09-02 14:10:53,018 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


In [6]:
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes=nodes, storage_context= storage_context, embed_model=embed_model)

2025-09-02 14:11:28,286 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 14:11:28,646 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 14:11:29,042 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 14:11:29,415 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 14:11:29,759 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 14:11:30,124 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 14:11:30,515 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 14:11:30,902 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 14:11:31,322 - INFO - HTTP Request: POST http://localhost:11434/api/embed "HTTP/1.1 200 OK"
2025-09-02 14:11:31,676 - INFO - HTTP Request: POST http://localhost:1143

In [8]:
index = load_index_from_storage(storage_context, embed_model=embed_model)

2025-09-02 14:23:35,502 - INFO - Loading all indices.
