In [6]:
# %%
import sys, os
try:
    # ✅ Running from a Python script (.py file)
    TOOLS_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "..",))
except NameError:
    # ✅ Running from a Jupyter notebook (__file__ is not defined)
    TOOLS_PATH = os.path.abspath(os.path.join(os.getcwd(), ".."))

SRC_PATH = os.path.join(TOOLS_PATH)

if SRC_PATH not in sys.path:
    sys.path.insert(0, SRC_PATH)
    print(f"✅ SRC path added: {SRC_PATH}")
else:
    print(f"🔁 SRC path already in sys.path: {SRC_PATH}")

🔁 SRC path already in sys.path: /home/prashant-agrawal/projects/netflix_talk2data/src


In [7]:
# 2) Import LangChain and your tools
from langchain_openai import OpenAI
from langchain.agents import create_react_agent
from langchain_core.messages import HumanMessage
from langgraph.prebuilt import ToolNode
from langgraph.types import Command
from typing import Literal
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate

In [8]:
# 🔁 Import all tools from registry
from tools.qdrant_tools_registry import qdrant_search_tool

In [16]:
# Define tools for the enhancer agent
qdrant_agent_tools = [
    qdrant_search_tool,
]

# Define tool names for the agent
tool_names = [tool.name for tool in qdrant_agent_tools]

# Define the tool descriptions
tool_descriptions = [tool.description for tool in qdrant_agent_tools]

# Build readable tool help text for the prompt
tool_help_text = "\n".join(
    [f"{i+1}. {tool.name} - {tool.description}" for i, tool in enumerate(qdrant_agent_tools)]
)

# Define system prompt used during agent creation
qdrant_agent_prompt_template = PromptTemplate.from_template(
    """
Role:
You are the Qdrant Search Tool, a micro-service that combines high-fidelity vector embeddings with rich, structured metadata filtering to retrieve the most relevant records from a Qdrant collection.

Description & Purpose:
- Given a natural-language query, an optional dictionary of filters (exact matches or numeric ranges), and a desired result count k, your job is to:
    - Embed the query via OpenAI.
    - Translate filters into Qdrant payload conditions.
    - Execute a hybrid semantic + metadata search.
    - Return the top-k hits, each with its id, similarity score, and full payload metadata.

Inputs (Parameters):
- query (string): free-text search string.
- filters (dict): e.g.
    Exact: {{ "state": "delhi", "industry_sector": "saas" }}
    Range: {{ "year_founded": {{"gte": 2000, "lte": 2010}} }}
- k (integer): the number of top results to return.

Examples:
Pure semantic (no filters)
qdrant_search(
    query="emerging agritech startups",
    filters=None,
    k=5
)
# → returns top-5 agritech vectors by relevance

Metadata only
qdrant_search(
    query="",
    filters={{ "state": "karnataka", "industry_sector": "fintech" }},
    k=10
)
# → returns any fintech startups in Karnataka, ordered by vector‐default rank

Hybrid (semantic + filters + range)
qdrant_search(
    query="best B2B platforms",
    filters={{ 
        "state": "delhi",
        "year_founded": {{"gte": 2015}},
        "industry_sector": "saas"
    }},
    k=3
)
# → returns top-3 SaaS B2B startups in Delhi founded ≥2015

Guidelines & Constraints:
- Must apply both vector similarity and all payload filters.
- For textual filters use exact keyword match.
- For numeric filters support gte / lte semantics.
- If filters=None, perform a pure semantic lookup.
- Always return at most k results.
- Never omit an entry’s payload.
- Ensure consistent lower-casing of filter values and field names.
- Never return more than k results, even if multiple entries have the same score.
- If no results match, return an empty list [].

**Output Format:**
- The final answer MUST be a valid Python dictionary (not a JSON string).
- Do NOT wrap the output in quotes or format as a string.
- Example:
    Final Answer: [
        {{
            "id": "company_123",
            "score": 0.92,
            "payload": {{
                "company_name": "Acme Corp",
                "industry_sector": "saas",
                ...
            }}
        }},
        ...
    ]

Format:
Question: the input query
Thought: think step-by-step about what to extract
Action: the tool to use, from [{tool_names}]
Action Input: JSON string or plain text input to the tool
Observation: result returned by the tool
... (repeat Thought/Action/Observation as needed)
Thought: I have gathered all necessary structured data.
Final Answer: a Python dictionary of all extracted metadata and filters

Constraints:
- NEVER ask the user again
- ONLY use tools
- NEVER hallucinate missing data

Begin!

Question: {input}
{agent_scratchpad}
"""
)


# Format the prompt with tool descriptions and names
formatted_prompt = qdrant_agent_prompt_template.partial(
    tools=tool_help_text,
    tool_names=", ".join(tool.name for tool in qdrant_agent_tools),
)

# 🔧 Define the React-style agent
llm = ChatOpenAI(model="gpt-4o") 


# Create the agent
qdrant_agent = create_react_agent(
    llm=llm,
    tools=qdrant_agent_tools,
    prompt=formatted_prompt,
)


In [19]:
from langchain.agents import AgentExecutor

executor = AgentExecutor(agent=qdrant_agent, 
                         tools=qdrant_agent_tools, 
                         verbose=True, 
                         handle_parsing_errors=True)

import json
structured_input = {
    "query": "D2C companies with Sequoia or Accel as lead investors and more than 200 employees",
    "filters": {
        "product_categories": "d2c",
        
        },
    "k": 5
}


# 3️⃣ Invoke!
result = executor.invoke({"input": structured_input})
print(result["output"])





[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mThought: The task is to find D2C companies with Sequoia or Accel as lead investors, and more than 200 employees. We have filters for 'product_categories' as 'd2c', but there's no explicit filter for investors or employee count. Since these additional details aren't provided in the filters field, I'll proceed with available information. The query is also provided for semantic search.

Action: qdrant_search
Action Input: {
    "query": "D2C companies with Sequoia or Accel as lead investors and more than 200 employees",
    "filters": {"product_categories": "d2c"},
    "k": 5
}[0m
[DEBUG] Query: D2C companies with Sequoia or Accel as lead investors and more than 200 employees
[DEBUG] Filters: {'product_categories': 'd2c'}
[DEBUG] Top K: 5
[DEBUG] Raw results: [{'id': 321, 'score': 0.768844, 'payload': {'company_name': 'practically', 'legal_entity_type': 'proprietorship', 'state': 'tamil nadu', 'headquarters_city': 'mumbai', 'ye