# Shop Recommendation

In [1]:
from haystack import Pipeline, component
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.agents import Agent
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack_integrations.document_stores.mongodb_atlas import MongoDBAtlasDocumentStore
from haystack.utils import Secret
from haystack.components.builders import ChatPromptBuilder, PromptBuilder
from haystack.dataclasses import ChatMessage
from haystack.tools.tool import Tool
from haystack_experimental.chat_message_stores.in_memory import InMemoryChatMessageStore
from haystack_experimental.components.retrievers import ChatMessageRetriever
from haystack_experimental.components.writers import ChatMessageWriter
from haystack_integrations.components.retrievers.mongodb_atlas import MongoDBAtlasEmbeddingRetriever
from haystack.components.generators import OpenAIGenerator
from pymongo import MongoClient
from typing import List, Annotated
from getpass import getpass
import re, json, os, logging
from haystack import tracing
from haystack.tracing.logging_tracer import LoggingTracer

logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
logging.getLogger("haystack").setLevel(logging.INFO)

# Enable tracing for debugging (optional)
tracing.tracer.is_content_tracing_enabled = False
tracing.enable_tracing(
    LoggingTracer(tags_color_strings={"haystack.component.name": "\x1b[1;34m"})
)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load API keys & MongoDB credentials
os.environ["OPENAI_API_KEY"] = getpass("Masukkan OpenAI API Key Anda: ")
os.environ["MONGO_CONNECTION_STRING"] = getpass("Masukkan MongoDB Connection String Anda: ")

chat_message_store = InMemoryChatMessageStore()

document_store = MongoDBAtlasDocumentStore(
    database_name="depato_store",
    collection_name="products",
    vector_search_index="vector_index",
    full_text_search_index="search_index",
)

In [3]:
class ParaphraserPipeline:
    def __init__(self, chat_message_store):
        self.pipeline = Pipeline()
        self.pipeline.add_component("memory_retriever", ChatMessageRetriever(chat_message_store))
        self.pipeline.add_component(
            "prompt_builder",
            ChatPromptBuilder(variables=["query", "memories"], required_variables=["query", "memories"]),
        )
        self.pipeline.add_component(
            "generator",
            OpenAIChatGenerator(model="gpt-4.1", api_key=Secret.from_token(os.environ["OPENAI_API_KEY"])),
        )
        self.pipeline.connect("memory_retriever", "prompt_builder.memories")
        self.pipeline.connect("prompt_builder.prompt", "generator.messages")

    def run(self, query):
        messages = [
            ChatMessage.from_system("You are a helpful assistant that paraphrases user queries."),
            ChatMessage.from_user(
                """
                Please paraphrase the query below using previous conversation context if available.
                history:
                {% for memory in memories %}
                    {{memory.text}}
                {% endfor %}
                query: {{query}}
                answer:
                """
            ),
        ]
        res = self.pipeline.run(
            data={"prompt_builder": {"query": query, "template": messages}},
            include_outputs_from=["generator"],
        )
        return res["generator"]["replies"][0].text

In [4]:
class ChatHistoryPipeline:
    def __init__(self, chat_message_store):
        self.pipeline = Pipeline()
        self.pipeline.add_component("memory_retriever", ChatMessageRetriever(chat_message_store))
        self.pipeline.add_component(
            "prompt_builder",
            PromptBuilder(
                variables=["memories"],
                required_variables=["memories"],
                template="""
                Previous Conversations:
                {% for memory in memories %}
                {{memory.text}}
                {% endfor %}
                """,
            ),
        )
        self.pipeline.connect("memory_retriever", "prompt_builder.memories")

    def run(self):
        res = self.pipeline.run({}, include_outputs_from=["prompt_builder"])
        return res["prompt_builder"]["prompt"]

In [5]:
@component
class GetMaterials:
    def __init__(self):
        self.client = MongoClient(os.environ["MONGO_CONNECTION_STRING"])
        self.db = self.client.depato_store

    @component.output_types(materials=List[str])
    def run(self):
        materials = [doc["name"] for doc in self.db.materials.find()]
        return {"materials": materials}


@component
class GetCategories:
    def __init__(self):
        self.client = MongoClient(os.environ["MONGO_CONNECTION_STRING"])
        self.db = self.client.depato_store

    @component.output_types(categories=List[str])
    def run(self):
        categories = [doc["name"] for doc in self.db.categories.find()]
        return {"categories": categories}

In [6]:
METADATA_FILTER_TEMPLATE = """
You are a json generator that have a job to generate json based on the input.
The return json should be in the format:
```json
{
    "operator": "AND",
    "conditions":[
        {"field": "meta.category", "operator":"==", "value": <category>},
        {"field": "meta.material", "operator":"==", "value": <material>},
        {"filed": "meta.gender", "operator":"==", "value" : <male|female|unisex>},
        {"field": "meta.price", "operator":<"<="|">="|"==">, "value": <price>}
    ]
}
```
The json key above can be omiitted if the value is not provided in the input, so please make sure to only return the keys that are provided in the input.

For the material and category, you can only use the material and category that are provided below:
Materials: [ {% for material in materials %} {{ material }} {% if not loop.last %}, {% endif %} {% endfor %} ]

Categories: [ {% for category in categories %} {{ category }} {% if not loop.last %}, {% endif %} {% endfor %} ]

if the input does not contain any of the keys above, you should return an empty json object like this:
```json
{}
```
Sometimes the material and category can be negated, so you should also handle that by using the operator "!=" for material and category. 

Sometimes the material and category is not explicitly mentioned, you should analyze which material and category is the most suitable based on the input, and return the json with the material and category that you think is the most suitable.

Nestede conditions are allowed, for nested conditions, you can use "OR" and "AND" as the operator, and the conditions should be in the "conditions" array.

if user said the price around some value, please find the price between those value -10 and value +10.

The example of the result are expected to be like this:

1. Input: "can you give me a adress with cotton material?"
output:
```json
{
    "operator": "AND",
    "conditions": [
        {"field": "meta.material", "operator": "==", "value": "Cotton"},
        {"field": "meta.category", "operator": "==", "value": "Dresses/Jumpsuits"}
    ]
}
```

2. Input: "Give me Shirt that is not made of cotton and has a price less than $100"
output:
```json
{
    "operator": "AND",
    "conditions": [
        {"field": "meta.category", "operator": "==", "value": "Tops"},
        {"field": "meta.material", "operator": "!=", "value": "Cotton"},
        {"field": "meta.price", "operator": "<=", "value": 100}
    ]
}
3. Input: "I want a dress that is not hot and has a price greater than $50"
output:
```json
{
    "operator": "AND",
    "conditions": [
        {"field": "meta.category", "operator": "==", "value": "Dresses/Jumpsuits"},
        {"field": "meta.price", "operator": ">=", "value": 50},
        {
            "operator": "OR",
            "conditions": [
                {"field": "meta.material", "operator": "==", "value": "Cotton"},
                {"field": "meta.material", "operator": "==", "value": "Polyester"}
            ]
        }
    ]
}

4. Input i want tops that have price between $20 and $50
output:
```json
{
    "operator": "AND",
    "conditions": [
        {"field": "meta.category", "operator": "==", "value": "Tops"},
        {
            "operator": "AND",
            "conditions":[
                {"field": "meta.price", "operator": ">=", "value": 20},
                {"field": "meta.price", "operator": "<=", "value": 50}
            ]
        }
    ]
}
```
5. Input: I want the dress price around $50
output: 
```json
{
    "operator": "AND",
    "conditions": [
        {"field": "meta.category", "operator": "==", "value": "Dresses/Jumpsuits"},
        {
            "operator": "AND",
            "conditions":[
                {"field": "meta.price", "operator": ">=", "value": 40},
                {"field": "meta.price", "operator": "<=", "value": 60}
            ]
        }
    ]
}
```
6. Input: {{input}}
output:

```

"""

In [7]:
class MetaDataFilterPipeline:
    def __init__(self, template):
        self.pipeline = Pipeline()
        self.pipeline.add_component("materials", GetMaterials())
        self.pipeline.add_component("categories", GetCategories())
        self.pipeline.add_component(
            "prompt_builder",
            PromptBuilder(template=template, required_variables=["input", "materials", "categories"]),
        )
        self.pipeline.add_component(
            "generator",
            OpenAIGenerator(model="gpt-4.1", api_key=Secret.from_token(os.environ["OPENAI_API_KEY"])),
        )

        self.pipeline.connect("materials.materials", "prompt_builder.materials")
        self.pipeline.connect("categories.categories", "prompt_builder.categories")
        self.pipeline.connect("prompt_builder", "generator")

    def run(self, query: str):
        res = self.pipeline.run({"prompt_builder": {"input": query}}, include_outputs_from=["generator"])
        return res["generator"]["replies"][0]

In [8]:
class RetrieveAndGenerateAnswerPipeline:
    def __init__(self, document_store):
        self.pipeline = Pipeline()
        self.pipeline.add_component("embedder",SentenceTransformersTextEmbedder(model="sentence-transformers/all-mpnet-base-v2"))
        self.pipeline.add_component("retriever", MongoDBAtlasEmbeddingRetriever(document_store=document_store, top_k=10))
        self.pipeline.add_component(
            "prompt_builder",
            ChatPromptBuilder(variables=["query", "documents"], required_variables=["query", "documents"]),
        )
        self.pipeline.add_component(
            "generator",
            OpenAIChatGenerator(model="gpt-4.1", api_key=Secret.from_token(os.environ["OPENAI_API_KEY"])),
        )
        self.pipeline.connect("embedder", "retriever")
        self.pipeline.connect("retriever", "prompt_builder.documents")
        self.pipeline.connect("prompt_builder.prompt", "generator.messages")

    def run(self, query: str, filter: dict = {}):
        messages = [
            ChatMessage.from_system("You are a shop assistant that recommends products."),
            ChatMessage.from_user(
                """
                The query is: {{query}}
                {% if documents|length > 0 %}
                Products:
                {% for product in documents %}
                {{loop.index}}. {{product.meta.title}} — ${{product.meta.price}} | {{product.meta.material}} | {{product.meta.category}}
                {% endfor %}
                {% else %}
                No matching products.
                {% endif %}
                Answer:
                """
            ),
        ]
        res = self.pipeline.run(
            {
                "embedder": {"text": query},
                "retriever": {"filters": filter},
                "prompt_builder": {"query": query, "template": messages},
            },
            include_outputs_from=["generator"],
        )
        return res["generator"]["replies"][0].text

In [9]:
# Initialize pipelines
paraphraser = ParaphraserPipeline(chat_message_store)
history = ChatHistoryPipeline(chat_message_store)
filter_pipeline = MetaDataFilterPipeline(template=METADATA_FILTER_TEMPLATE)
rag_pipeline = RetrieveAndGenerateAnswerPipeline(document_store)

# Define helper
def retrieve_and_generate(query: Annotated[str, "User query"]):
    q = paraphraser.run(query)
    raw = filter_pipeline.run(q)
    match = re.search(r"```json\n(.*?)\n```", raw, re.DOTALL)
    filters = json.loads(match.group(1)) if match else {}
    return rag_pipeline.run(q, filters)

In [10]:
retrieve_tool = Tool(
    name="retrieve_and_generate_recommendation",
    description="Retrieve product info and recommend items.",
    function=retrieve_and_generate,
    parameters={
        "type": "object",
        "properties": {"query": {"type": "string"}},
        "required": ["query"],
    },
)

In [11]:
agent = Agent(
    chat_generator=OpenAIChatGenerator(model="gpt-4.1", api_key=Secret.from_token(os.environ["OPENAI_API_KEY"])),
    tools=[retrieve_tool],
    system_prompt="""
    You are a helpful SmartShopper assistant. 
    - If user asks product-related questions → use retrieve_and_generate_recommendation.
    - If user asks general info → respond politely.
    - Always analyze conversation context first.
    """,
    exit_conditions=["text"],
    max_agent_steps=10,
)
agent.warm_up()
chat_message_writer = ChatMessageWriter(chat_message_store)

In [12]:
while True:
    query = input("\nYou: ")
    if query.lower() == "exit":
        break

    hist = history.run()
    messages = [ChatMessage.from_system(hist), ChatMessage.from_user(query)]

    chat_message_writer.run([ChatMessage.from_user(query)])
    result = agent.run(messages=messages)
    reply = result["messages"][-1].text

    chat_message_writer.run([ChatMessage.from_assistant(reply)])
    print(f"AI: {reply}")

INFO - haystack.core.pipeline.pipeline -  Running component memory_retriever
INFO - haystack.core.pipeline.pipeline -  Running component prompt_builder
INFO - haystack.core.pipeline.pipeline -  Running component chat_generator
INFO - haystack.core.pipeline.pipeline -  Running component tool_invoker
INFO - haystack.core.pipeline.pipeline -  Running component memory_retriever
INFO - haystack.core.pipeline.pipeline -  Running component prompt_builder
INFO - haystack.core.pipeline.pipeline -  Running component generator
INFO - haystack.core.pipeline.pipeline -  Running component categories
INFO - haystack.core.pipeline.pipeline -  Running component materials
INFO - haystack.core.pipeline.pipeline -  Running component prompt_builder
INFO - haystack.core.pipeline.pipeline -  Running component generator
INFO - haystack.core.pipeline.base -  Warming up component embedder...
INFO - haystack.core.pipeline.pipeline -  Running component embedder
Batches: 100%|██████████| 1/1 [00:00<00:00,  8.36it/

AI: Here are some jackets made of polyester:

1. Patagonia Women's Better Sweater Fleece Jacket — $179.48
2. Nike Comp 12 Poly Jacket — $64.99
3. GM Chevrolet Unisex Bonded All-Season Jacket — $64.99
4. Trespass Packup Kids P-Way Jacket — $10.18
5. Kryptek Men's Waterproof Dalibor II Jacket, Mandrake — $92.99
6. SafetyShirtz SS360 Seattle Safety Hoody ANSI Class 3 — $49.99
7. Sport Tek Colorblock Raglan Jacket — $32.02

If you'd like more information about any specific jacket, just let me know!
