In [1]:
from haystack import Document
from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.utils import Secret
from getpass import getpass
import pandas as pd
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["MONGO_CONNECTION_STRING"] = getpass("Masukkan MongoDB Connection String Anda: ")
os.environ["OPENAI_API_KEY"] = getpass("Masukkan OpenAI API Key Anda: ")

In [3]:
# Load Dataset
file_path = "/Users/mhdfarhanali/Documents/Farhan SmartShopper/data/common_info.csv" 
df = pd.read_csv(file_path)

print(f"Dataset loaded successfully — shape: {df.shape}")
display(df.head())

Dataset loaded successfully — shape: (40, 2)


Unnamed: 0,question,answer
0,How to buy a product?,You can buy products by adding them to your ca...
1,How long is the delivery process?,Delivery usually takes 2–5 working days depend...
2,How to request a refund?,You can request a refund within 7 days through...
3,Is Cash on Delivery available?,"Yes, Cash on Delivery (COD) is available for s..."
4,How can I track my order?,You can track your order from the “My Orders” ...


In [4]:
# Build Haystack Documents

documents = []
for _, row in df.iterrows():
    content = f"{row['question']}\n\n{row['answer']}"
    doc = Document(
        content=content,
        meta={
            "topic": row.get("topic", ""),
            "category": row.get("category", ""),
            "source": row.get("source", ""),
            "question": row.get("question", ""),
        },
    )
    documents.append(doc)

print(f"Built {len(documents)} documents.")

Built 40 documents.


In [5]:
# Initialize MongoDB Atlas Document Store

from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore

document_store = MongoDBAtlasDocumentStore(
    database_name="depato_store",
    collection_name="common_information",
    vector_search_index="vector_index_common",
    full_text_search_index=None
)

print("Connected to MongoDB Atlas successfully!")

Connected to MongoDB Atlas successfully!


In [6]:
# Embed Documents 

from haystack.components.embedders import SentenceTransformersDocumentEmbedder

embedder = SentenceTransformersDocumentEmbedder(
    model="sentence-transformers/all-mpnet-base-v2"
)

embedder.warm_up()

print("Generating embeddings...")
results = embedder.run(documents=documents)
embedded_docs = results["documents"]

Generating embeddings...


Batches: 100%|██████████| 2/2 [00:00<00:00,  8.56it/s]


In [7]:
# Write to MongoDB Atlas

document_store.write_documents(embedded_docs)
print("Successfully stored all documents in MongoDB Atlas (vector_index_common).")

Successfully stored all documents in MongoDB Atlas (vector_index_common).


In [8]:
# Verify Data in MongoDB

from pymongo import MongoClient
import os

client = MongoClient(os.environ["MONGO_CONNECTION_STRING"])
db = client["depato_store"]
collection = db["common_information"]

count = collection.count_documents({})
print(f"Total documents in 'common_data': {count}")

if count > 0:
    sample = collection.find_one()
    print("\nExample document:")
    print(sample["content"])
    print(sample["meta"])
else:
    print("No documents found — please check your embedding step.")

Total documents in 'common_data': 40

Example document:
How to buy a product?

You can buy products by adding them to your cart and completing the checkout process securely.
{'topic': '', 'category': '', 'source': '', 'question': 'How to buy a product?'}
