In [11]:
import os
import json
import getpass
from dotenv import load_dotenv

from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader, JSONLoader

In [12]:
def load_bus_json_with_metadata(path: str):
    documents = []

    with open(path, "r") as f:
        data = json.load(f)

    # ➤ Load districts and dropping points
    for district in data["districts"]:
        district_name = district["name"]

        for drop in district["dropping_points"]:
            doc_text = (
                f"District: {district_name}\n"
                f"Dropping Point: {drop['name']}\n"
                f"Price: {drop['price']}"
            )

            metadata = {
                "type": "dropping_point",
                "district": district_name,
                "dropping_point": drop["name"],
                "price": drop["price"]
            }

            documents.append(Document(page_content=doc_text, metadata=metadata))

    for provider in data["bus_providers"]:
        provider_name = provider["name"]
        coverage = provider["coverage_districts"]

        doc_text = (
            f"Bus Provider: {provider_name}\n"
            f"Coverage Districts: {', '.join(coverage)}"
        )

        metadata = {
            "type": "bus provider",
            "provider_name": provider_name,
            "coverage_districts": ", ".join(coverage)
        }

        documents.append(Document(page_content=doc_text, metadata=metadata))

    return documents

In [13]:
loader1 = DirectoryLoader("../data/attachment", glob="*.txt")
loader2 = load_bus_json_with_metadata("../data/data.json")
loader1

<langchain_community.document_loaders.directory.DirectoryLoader at 0x1c2ddebae90>

In [14]:
text_document = loader1.load()
text_document.extend(loader2)
text_document

[Document(metadata={'source': '..\\data\\attachment\\desh travel.txt'}, page_content='Desh Travel Privacy Policy\n\nDesh Travel is committed to protecting the privacy and personal data of our customers. We collect personal details such as names, phone numbers, travel history, and booking preferences to provide efficient booking and travel services. This information helps us improve service quality, offer relevant promotions, and ensure smooth operational management.\n\nOfficial Address: 70A Progoti Shoroni, Lift 4, Rupayan Millennium Square, Dhaka 1212, Bangladesh Contact Information: Email: info@deshtravelbd.com Privacy Policy / Terms Link: https://www.deshtravelbd.com/termsandcondition?utm_source=chatgpt.com\n\nAll collected information is stored securely, with access limited to authorized personnel only. Data sharing with external entities occurs strictly when required for legal or operational needs. Security protocols include encryption, access restrictions, and regular audits.\n\n

In [15]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200, add_start_index=True )

documents = text_splitter.split_documents(text_document)
documents

[Document(metadata={'source': '..\\data\\attachment\\desh travel.txt', 'start_index': 0}, page_content='Desh Travel Privacy Policy\n\nDesh Travel is committed to protecting the privacy and personal data of our customers. We collect personal details such as names, phone numbers, travel history, and booking preferences to provide efficient booking and travel services. This information helps us improve service quality, offer relevant promotions, and ensure smooth operational management.\n\nOfficial Address: 70A Progoti Shoroni, Lift 4, Rupayan Millennium Square, Dhaka 1212, Bangladesh Contact Information: Email: info@deshtravelbd.com Privacy Policy / Terms Link: https://www.deshtravelbd.com/termsandcondition?utm_source=chatgpt.com\n\nAll collected information is stored securely, with access limited to authorized personnel only. Data sharing with external entities occurs strictly when required for legal or operational needs. Security protocols include encryption, access restrictions, and r

In [16]:
load_dotenv()

if not os.getenv("GOOGLE_API_KEY"):
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google API Key: ")

# if not os.getenv("OPENAI_API_KEY"):
#     os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")

In [17]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")

vector_store = Chroma(
    collection_name="bus_infomation",
    embedding_function=embeddings,
    persist_directory="../data/bus_db"
)

In [18]:
for i in documents[20:]:
    print(i.metadata)

{'type': 'dropping_point', 'district': 'Rajshahi', 'dropping_point': 'Shah Makhdum', 'price': 480, 'start_index': 0}
{'type': 'dropping_point', 'district': 'Rajshahi', 'dropping_point': 'Bagha', 'price': 500, 'start_index': 0}
{'type': 'dropping_point', 'district': 'Sylhet', 'dropping_point': 'Zindabazar', 'price': 700, 'start_index': 0}
{'type': 'dropping_point', 'district': 'Sylhet', 'dropping_point': 'Bimanbandar', 'price': 720, 'start_index': 0}
{'type': 'dropping_point', 'district': 'Barishal', 'dropping_point': 'Khanjahan', 'price': 450, 'start_index': 0}
{'type': 'dropping_point', 'district': 'Barishal', 'dropping_point': 'Bakerganj', 'price': 470, 'start_index': 0}
{'type': 'dropping_point', 'district': 'Rangpur', 'dropping_point': 'Kachari', 'price': 480, 'start_index': 0}
{'type': 'dropping_point', 'district': 'Rangpur', 'dropping_point': 'Cantonment', 'price': 500, 'start_index': 0}
{'type': 'dropping_point', 'district': 'Mymensingh', 'dropping_point': 'Shahbazpur', 'price':

In [None]:
document_ids = vector_store.add_documents(documents=documents)

['b8695309-cb39-4cb1-bfed-7cf2b67307db',
 '136733f5-0675-422e-b58a-9038884d4109',
 'a64304a1-0a27-4433-9792-05da00897b79',
 'fab519d0-3610-41ec-a9f5-c5a8da7f3100',
 'e1fdcdb0-2eba-41a6-a422-42bebd600a86',
 'aab3df78-c3e8-418f-9429-f42a05528d55',
 '89630da8-20c1-49e3-b7e0-3d88f59dc52c',
 '3dc2a198-edef-426a-86a1-135cef98b9c4',
 '21fa1e5e-e7b2-4387-95e8-74c0204698df',
 '7c2888d1-31f7-4b5b-bb8b-556f54e95ab4',
 'c1da947a-db66-4c09-ace9-615d7679b1e2',
 'c8c82f48-6391-4526-b8ef-a8a3b1c0a71b',
 '7a52fe6e-5771-4e23-b837-f7a09990739f',
 '128c32ff-03a4-4d27-8a41-74ec95773ad2',
 'c5344f75-55bb-450e-966d-7465ab38fb7d',
 '1107ca6f-4e7e-48b2-b6cd-5f1ce3018ffc',
 '6c3cfd95-e7bf-4c15-a0eb-38e2ae458f12',
 '9be9aa7d-30e2-4317-b112-4578668ec543',
 '8cc95011-5d91-4bc9-b02a-a7199f00c32d',
 '958797b0-5625-42e3-8828-5d686da91781',
 '6f4893d6-2ccd-4440-98ce-624e8de4b118',
 '726d01ba-caf1-43f9-967e-c0dc378382e4',
 '5734599a-9aa5-4858-a32e-ec1f327924ee',
 '56800c80-ab16-4fa0-94ca-72ba00470e22',
 '663571c1-bbab-

In [22]:
query = "“What are the contact details of Hanif Bus?” "
retireved_results=vector_store.similarity_search(query)
[i.page_content for i in retireved_results]

['Bus Provider: Hanif\nCoverage Districts: Dhaka, Khulna, Mymensingh, Comilla',
 'Hanif Privacy Policy\n\nHanif is committed to protecting the privacy and personal data of our customers. We collect personal details such as names, phone numbers, travel history, and booking preferences to provide efficient booking and travel services. This information helps us improve service quality, offer relevant promotions, and ensure smooth operational management.\n\nOfficial Address: Gabtoli / Mirpur region, Dhaka Contact Information: Customer Support: 16460, Counter: 01713-049540 Privacy Policy / Terms Link: https://hanifenterprisebd.com/privacy-policy?utm_source=chatgpt.com\n\nAll collected information is stored securely, with access limited to authorized personnel only. Data sharing with external entities occurs strictly when required for legal or operational needs. Security protocols include encryption, access restrictions, and regular audits.',
 'By utilizing Hanif services, you consent to the

In [23]:
retriever = vector_store.as_retriever()
retriever

VectorStoreRetriever(tags=['Chroma', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x000001C2DDFB8550>, search_kwargs={})

In [27]:
from langchain.tools import tool

@tool(response_format="content_and_artifact")
def retrieve_context(query: str):
    """Retrieve information to help answer a query."""
    retrieved_docs = vector_store.similarity_search(query, k=2)
    serialized = "\n\n".join(
        (f"Source: {doc.metadata}\nContent: {doc.page_content}")
        for doc in retrieved_docs
    )
    return serialized, retrieved_docs

In [30]:
from langchain.agents import create_agent
from langchain.chat_models import init_chat_model

tools = [retrieve_context]
# If desired, specify custom instructions
prompt = (
    "You have access to a tool that retrieves context from bus info database."
    "Use the tool to help answer user queries."
)
model = init_chat_model("google_genai:gemini-2.5-flash-lite")
agent = create_agent(model, tools, system_prompt=prompt)

In [36]:
query = (
    "Show all bus providers operating from Chittagong to Sylhet."
)

for event in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    event["messages"][-1].pretty_print()


Show all bus providers operating from Chittagong to Sylhet.
Tool Calls:
  retrieve_context (f4e3384a-708a-45e3-91c1-8a969035a4ed)
 Call ID: f4e3384a-708a-45e3-91c1-8a969035a4ed
  Args:
    query: Bus providers operating from Chittagong to Sylhet
Name: retrieve_context

Source: {'provider_name': 'Ena', 'type': 'bus provider', 'start_index': 0, 'coverage_districts': 'Chattogram, Sylhet, Barishal, Bogra'}
Content: Bus Provider: Ena
Coverage Districts: Chattogram, Sylhet, Barishal, Bogra

Source: {'type': 'bus provider', 'start_index': 0, 'provider_name': 'Shyamoli', 'coverage_districts': 'Chattogram, Khulna, Sylhet, Bogra'}
Content: Bus Provider: Shyamoli
Coverage Districts: Chattogram, Khulna, Sylhet, Bogra


In [37]:
from langchain.agents.middleware import dynamic_prompt, ModelRequest

@dynamic_prompt
def prompt_with_context(request: ModelRequest) -> str:
    """Inject context into state messages."""
    last_query = request.state["messages"][-1].text
    retrieved_docs = vector_store.similarity_search(last_query)

    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)

    system_message = (
        "You are a helpful assistant. Use the following context in your response:"
        f"\n\n{docs_content}"
    )

    return system_message


agent = create_agent(model, tools=[], middleware=[prompt_with_context])

In [38]:
query = "WShow all bus providers operating from Chittagong to Sylhet."
for step in agent.stream(
    {"messages": [{"role": "user", "content": query}]},
    stream_mode="values",
):
    step["messages"][-1].pretty_print()


WShow all bus providers operating from Chittagong to Sylhet.

The following bus providers operate from Chittagong to Sylhet:

*   **Ena**
*   **Shyamoli**
*   **Desh Travel**
