In [1]:
import os
import json
import getpass
from dotenv import load_dotenv

from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader, JSONLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_bus_json_with_metadata(path: str):
    documents = []

    with open(path, "r") as f:
        data = json.load(f)

    # ➤ Load districts and dropping points
    for district in data["districts"]:
        district_name = district["name"]

        for drop in district["dropping_points"]:
            doc_text = (
                f"type: dropping_point\n"
                f"District: {district_name}\n"
                f"Dropping Point: {drop['name']}\n"
                f"Ticket Price: {drop['price']}"
            )

            metadata = {
                "type": "dropping_point",
                "district": district_name,
                "dropping_point": drop["name"],
                "price": drop["price"]
            }

            documents.append(Document(page_content=doc_text, metadata=metadata))

    for provider in data["bus_providers"]:
        provider_name = provider["name"]
        coverage = ", ".join(provider["coverage_districts"])

        doc_text = (
            f"type: bus provider\n"
            f"Bus Provider: {provider_name}\n"
            f"Coverage Districts: {coverage}"
        )

        metadata = {
            "type": "bus provider",
            "provider_name": provider_name,
            "coverage_districts": coverage
        }

        documents.append(Document(page_content=doc_text, metadata=metadata))

    return documents

In [3]:
loader1 = DirectoryLoader("../data/attachment", glob="*.txt")
loader2 = load_bus_json_with_metadata("../data/data.json")
# loader1

In [4]:
text_document = loader1.load()
text_document.extend(loader2)
# text_document

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True )

documents = text_splitter.split_documents(text_document)
# documents

In [6]:
load_dotenv()

if not os.getenv("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API Key: ")

# if not os.getenv("OPENAI_API_KEY"):
#     os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

In [7]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

vector_store = Chroma(
    collection_name="bus_infomation",
    embedding_function=embeddings,
    persist_directory="../data/bus_db"
)

In [8]:
document_ids = vector_store.add_documents(documents=documents)

In [9]:
query = "“What are the contact details of Hanif Bus?” "
retireved_results=vector_store.similarity_search(query)
[i.page_content for i in retireved_results]

['Hanif Privacy Policy\n\nHanif is committed to protecting the privacy and personal data of our customers. We collect personal details such as names, phone numbers, travel history, and booking preferences to provide efficient booking and travel services. This information helps us improve service quality, offer relevant promotions, and ensure smooth operational management.\n\nOfficial Address: Gabtoli / Mirpur region, Dhaka Contact Information: Customer Support: 16460, Counter: 01713-049540 Privacy Policy / Terms Link: https://hanifenterprisebd.com/privacy-policy?utm_source=chatgpt.com\n\nAll collected information is stored securely, with access limited to authorized personnel only. Data sharing with external entities occurs strictly when required for legal or operational needs. Security protocols include encryption, access restrictions, and regular audits.',
 'type: bus provider\nBus Provider: Hanif\nCoverage Districts: Dhaka, Khulna, Mymensingh, Comilla',
 'By utilizing Hanif services

In [10]:
retriever = vector_store.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000002022FA18440>, search_kwargs={})

In [11]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [12]:
from langchain.agents import create_agent
from langchain.chat_models import init_chat_model

tools = [retrieve_context]
# If desired, specify custom instructions
prompt = (
    "You have access to a tool that retrieves context from bus info database."
    "Use the tool to help answer user queries."
)
model = init_chat_model("google_genai:gemini-2.5-flash-lite")
agent = create_agent(model, tools, system_prompt=prompt)

In [13]:
query = (
    "Show all bus providers operating from Chittagong to Sylhet."
)

for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


Show all bus providers operating from Chittagong to Sylhet.
Tool Calls:
  retrieve_context (cefcd2b8-d311-4086-9683-a1b7eacb734c)
 Call ID: cefcd2b8-d311-4086-9683-a1b7eacb734c
  Args:
    query: Bus providers operating from Chittagong to Sylhet
Name: retrieve_context

Source: {'type': 'bus provider', 'start_index': 0, 'coverage_districts': 'Chattogram, Khulna, Sylhet, Bogra', 'provider_name': 'Shyamoli'}
Content: type: bus provider
Bus Provider: Shyamoli
Coverage Districts: Chattogram, Khulna, Sylhet, Bogra

Source: {'provider_name': 'Ena', 'start_index': 0, 'coverage_districts': 'Chattogram, Sylhet, Barishal, Bogra', 'type': 'bus provider'}
Content: type: bus provider
Bus Provider: Ena
Coverage Districts: Chattogram, Sylhet, Barishal, Bogra


In [14]:
from langchain.agents.middleware import dynamic_prompt, ModelRequest

@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    """Inject context into state messages."""
    last_query = request.state["messages"][-1].text
    retrieved_docs = vector_store.similarity_search(last_query)

    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

    system_message = (
        "You are a helpful assistant. Use the following context in your response:"
        f"\n\n{docs_content}"
    )

    return system_message


agent = create_agent(model, tools=[], middleware=[prompt_with_context])

In [15]:
query = "Are there any buses from Dhaka to Rajshahi under 500 taka?"
for step in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


Are there any buses from Dhaka to Rajshahi under 500 taka?

Yes, there is a bus from Dhaka to Rajshahi for 480 taka.
