# Retriever


## Buat Haystack Pipeline untuk retrieve data dari mongodb atlas

In [1]:
# Setup Environment & Connection


import os
from getpass import getpass

if "MONGO_CONNECTION_STRING" not in os.environ:
    os.environ["MONGO_CONNECTION_STRING"] = getpass("Masukkan MongoDB Connection String Anda: ")

In [2]:
from haystack import Pipeline
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore
from haystack_integrations.components.retrievers.mongodb_atlas import MongoDBAtlasEmbeddingRetriever

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Connect to MongoDBAtlas Document Store

document_store = MongoDBAtlasDocumentStore(
    database_name="depato_store",
    collection_name="products",
    vector_search_index="vector_index", 
    full_text_search_index="search_index",
)

print("Connected to MongoDB Atlas")

Connected to MongoDB Atlas


In [4]:
# Build Retrieval Pipeline

pipeline = Pipeline()
embedder_model = "sentence-transformers/all-mpnet-base-v2"

pipeline.add_component("embedder", SentenceTransformersTextEmbedder(model=embedder_model))
pipeline.add_component("retriever", MongoDBAtlasEmbeddingRetriever(document_store=document_store, top_k=5))
pipeline.connect("embedder", "retriever")

print(f"Pipeline siap dengan model embedding: {embedder_model}")

Pipeline siap dengan model embedding: sentence-transformers/all-mpnet-base-v2


In [5]:
# Test Basic Retrieval (tanpa filter)

query = "comfortable dress for going out with friends"

result = pipeline.run({
    "embedder": {"text": query}
})

docs = result["retriever"]["documents"]

print(f"Retrieved {len(docs)} documents for query: '{query}'\n")
for i, doc in enumerate(docs[:3], start=1):
    print(f"{i}. {doc.meta.get('title')} — {doc.meta.get('brand')}")
    print(f"   Price: ${doc.meta.get('price')} | Material: {doc.meta.get('material')} | Category: {doc.meta.get('category')}")
    print(f"   Preview: {doc.content[:150]}...\n")

Batches: 100%|██████████| 1/1 [00:00<00:00,  9.81it/s]


Retrieved 5 documents for query: 'comfortable dress for going out with friends'

1. Rasta Imposta Flag Dress USA — Rasta Imposta
   Price: $24.33 | Material: Polyester | Category: Dresses/Jumpsuits
   Preview: Rasta Imposta Flag Dress USA
The usa flag dress is the best outfit to wear if you want to show off your patriotism...

2. Style J Sweet Grace Denim Skirt — Style J
   Price: $35.0 | Material: Cotton | Category: Bottoms
   Preview: Style J Sweet Grace Denim Skirt
Caual style long skirt, great price and high quality...

3. Style J Flowing Chambray Long Denim Skirt — Style J
   Price: $48.0 | Material: Cotton | Category: Bottoms
   Preview: Style J Flowing Chambray Long Denim Skirt
Caual style long skirt, great price and high quality...



In [6]:
# Single Filter Example

filters = {
    "field": "meta.category",
    "operator": "==",
    "value": "Dresses/Jumpsuits"
}

result = pipeline.run({
    "embedder": {"text": query},
    "retriever": {"filters": filters}
})

docs = result["retriever"]["documents"]

print(f"\nSingle Filter — Category: 'Dresses/Jumpsuits'")
print(f"Retrieved {len(docs)} documents.\n")
for i, doc in enumerate(docs[:3], start=1):
    print(f"{i}. {doc.meta.get('title')} — {doc.meta.get('brand')}")
    print(f"   Price: ${doc.meta.get('price')} | Material: {doc.meta.get('material')}\n")

Batches: 100%|██████████| 1/1 [00:00<00:00, 18.91it/s]



Single Filter — Category: 'Dresses/Jumpsuits'
Retrieved 5 documents.

1. Rasta Imposta Flag Dress USA — Rasta Imposta
   Price: $24.33 | Material: Polyester

2. Ivanka Trump Women's Lace Dress — nan
   Price: $98.6 | Material: Polyester

3. likemary Maxi Wrap Dress Printed V-Neck — likemary
   Price: $52.0 | Material: Viscose



In [8]:
# Multiple Filter Example

filters = {
    "operator": "AND",
    "conditions": [
        {"field": "meta.category", "operator": "==", "value": "Dresses/Jumpsuits"},
        {"field": "meta.price", "operator": "<=", "value": 50}
    ]
}

result = pipeline.run({
    "embedder": {"text": query},
    "retriever": {"filters": filters}
})

docs = result["retriever"]["documents"]

print(f"\nMultiple Filter — Category + Price <= 50")
print(f"Retrieved {len(docs)} documents.\n")
for i, doc in enumerate(docs[:3], start=1):
    print(f"{i}. {doc.meta.get('title')} — ${doc.meta.get('price')}\n")

Batches: 100%|██████████| 1/1 [00:00<00:00, 19.48it/s]



Multiple Filter — Category + Price <= 50
Retrieved 5 documents.

1. Rasta Imposta Flag Dress USA — $24.33

2. A. Byer Juniors' Cable-Knit Fashion Sweater Dress — $34.99

3. Women's Sexy Sequin Costume Long Dress — $35.77



In [10]:
# Advanced Filter

filters = {
    "operator": "AND",
    "conditions": [
        {"field": "meta.category", "operator": "==", "value": "Dresses/Jumpsuits"},
        {"field": "meta.price", "operator": "<=", "value": 50},
        {
            "operator": "OR",
            "conditions": [
                {"field": "meta.material", "operator": "==", "value": "Cotton"},
                {"field": "meta.material", "operator": "==", "value": "Polyester"}
            ]
        }
    ]
}

result = pipeline.run({
    "embedder": {"text": query},
    "retriever": {"filters": filters}
})

docs = result["retriever"]["documents"]

print(f"\nAdvanced Filter — Category + Price <= 50 + Material in [Cotton, Polyester]")
print(f"Retrieved {len(docs)} documents.\n")
for i, doc in enumerate(docs[:3], start=1):
    print(f"{i}. {doc.meta.get('title')} — {doc.meta.get('material')} (${doc.meta.get('price')})\n")

Batches: 100%|██████████| 1/1 [00:00<00:00, 21.65it/s]



Advanced Filter — Category + Price <= 50 + Material in [Cotton, Polyester]
Retrieved 5 documents.

1. Rasta Imposta Flag Dress USA — Polyester ($24.33)

2. A. Byer Juniors' Cable-Knit Fashion Sweater Dress — Polyester ($34.99)

3. Women's Sexy Sequin Costume Long Dress — Polyester ($35.77)

