In [1]:
# Import necessary modules
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from IPython.display import Markdown, display
import chromadb
import logging

In [2]:
# Enable debug logging
logging.basicConfig(level=logging.DEBUG)

# Define embedding function
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")

# Load documents
documents = SimpleDirectoryReader("./dataset").load_data()

# Step 1: Initialize Chroma with PersistentClient and create collection
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_or_create_collection("llama_index_test_collection")

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: BAAI/bge-base-en-v1.5
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /BAAI/bge-base-en-v1.5/resolve/main/modules.json HTTP/11" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /BAAI/bge-base-en-v1.5/resolve/main/config_sentence_transformers.json HTTP/11" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /BAAI/bge-base-en-v1.5/resolve/main/README.md HTTP/11" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /BAAI/bge-base-en-v1.5/resolve/main/modules.json HTTP/11" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /BAAI/bge-base-en-v1.5/resolve/main/sentence_bert_config.json HTTP/11" 200 0
DEBUG:urllib3.connectionpool:https://huggingface.co:443 "HEAD /BAAI/bge-base-en-v1.5/resolve/main/adapter_config.json HTTP/11" 404 0
DEBUG:urllib3.con

In [3]:
# Step 2: Set up ChromaVectorStore and load in data
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context, embed_model=embed_model
)

# Verify that documents were added
print("Document count after initial load:", chroma_collection.count())


DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: 
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: 
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: 
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: 
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: Title Assets; RBA Assets; Banks Assets; RFCs; M...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: 136      
1976-06-30 00:00:00 5.833      
1976-...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: 851  0.5
1985-03-31 00:00:00 19.983  21.465 32....
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: 983 51.001 1.044
1990-09-30 00:00:00 25.576 365...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: 525 465.256 51.177 44.402 95.579 86.056 3.359
1...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: 912 2.352
2000-06-30 00:00:00 55.273 765.268 63...
DEBUG:llama_index.core.node_parser.node_utils:> Adding chunk: 487 1372

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

DEBUG:chromadb.config:Starting component PersistentLocalHnswSegment


Document count after initial load: 678


In [4]:
# Step 3: Query data
from llama_index.llms.ollama import Ollama
query_engine = index.as_query_engine(llm=Ollama(model="llama3.2:1b-instruct-fp16", request_timeout=360.0))
response = query_engine.query("How to save money?")
display(Markdown(f"<b>{response}</b>"))

DEBUG:httpx:load_ssl_context verify=True cert=None trust_env=True http2=False
DEBUG:httpx:load_verify_locations cafile='C:\\Users\\sumit\\.conda\\envs\\llm\\Library\\ssl\\cacert.pem'
DEBUG:urllib3.connectionpool:Resetting dropped connection: us.i.posthog.com


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

DEBUG:llama_index.vector_stores.chroma.base:> Top 2 nodes:
DEBUG:llama_index.vector_stores.chroma.base:> [Node 813cac0e-0279-426a-862d-d65e4f94d23a] [Similarity score: 0.414281044187905] carer responsibilities or a disability, you should discuss ways your
employer can support you. To...
DEBUG:llama_index.vector_stores.chroma.base:> [Node f29e7124-3fd0-41a0-8092-f5b174911500] [Similarity score: 0.414281044187905] carer responsibilities or a disability, you should discuss ways your
employer can support you. To...
DEBUG:llama_index.core.indices.utils:> Top 2 nodes:
> [Node 813cac0e-0279-426a-862d-d65e4f94d23a] [Similarity score:             0.414281] carer responsibilities or a disability, you should discuss ways your
employer can support you. To...
> [Node f29e7124-3fd0-41a0-8092-f5b174911500] [Similarity score:             0.414281] carer responsibilities or a disability, you should discuss ways your
employer can support you. To...
DEBUG:httpx:load_ssl_context verify=True cert=None trus

<b>To start saving money, consider exploring ways to reduce expenses or increase income. One option is to review your budget to identify areas where you can cut back on unnecessary spending. You might also think about finding additional sources of income, such as taking on a side job or selling items you no longer need.

You could also try implementing cost-saving strategies like cooking at home instead of eating out, canceling subscription services you don't use, and shopping around for deals on everyday items.

It may be helpful to set up automatic transfers from your checking account to a dedicated savings account, so you can prioritize saving over other financial obligations.</b>