In [None]:

# Install dependencies
!pip install --quiet  chromadb  gradio sentence-transformers

import os
import textwrap

import pandas as pd
import chromadb
from chromadb.config import Settings
from sentence_transformers import SentenceTransformer
from google.colab import userdata
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gradio as gr

model_name = "Qwen/Qwen2.5-1.5B-Instruct"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)


csv_path = "/content/5000TravelQuestionsDataset.csv"

df = pd.read_csv(csv_path, encoding="latin1", header=None)

print("Columns:", df.columns)
df.head()


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.8/20.8 MB[0m [31m99.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m73.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m103.3/103.3 kB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.4/17.4 MB[0m [31m76.8 MB/s[0m eta [36m0:00

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Columns: Index([0, 1, 2], dtype='int64')


Unnamed: 0,0,1,2
0,What are the special things we (husband and me...,TTD,TTDSIG
1,What are the companies which organize shark fe...,TTD,TTDOTH
2,Is it safe for female traveller to go alone to...,TGU,TGUHEA
3,What are the best places around Cape Town for ...,TTD,TTDSIG
4,What are the best places to stay for a family ...,ACM,ACMOTH


In [None]:
# Limit how many rows you use while testing to keep it snappy
N_DOCS = 4000

df_small = df.head(N_DOCS)

documents = []

for i, row in df_small.iterrows():
    question_text = str(row[0])
    subcat = str(row[2]) if 2 in df.columns else ""

    combined_text = f"""
    Question: {question_text}
    Subcategory: {subcat}
    """.strip()

    documents.append({
        "id": f"travel_doc_{i}",
        "text": combined_text
    })

# Quick sanity check
for d in documents[:5]:
    print(d)


{'id': 'travel_doc_0', 'text': 'Question: What are the special things we (husband and me) can do during a 5 day stay at Cape Town?\n    Subcategory: TTDSIG'}
{'id': 'travel_doc_1', 'text': 'Question: What are the companies which organize shark feeding events for scuba divers?\n    Subcategory: TTDOTH'}
{'id': 'travel_doc_2', 'text': 'Question: Is it safe for female traveller to go alone to Cape Town?\n    Subcategory: TGUHEA'}
{'id': 'travel_doc_3', 'text': 'Question: What are the best places around Cape Town for safari?\n    Subcategory: TTDSIG'}
{'id': 'travel_doc_4', 'text': 'Question: What are the best places to stay for a family to stay away from nightlife?\n    Subcategory: ACMOTH'}


In [None]:
def split_into_chunks(text, max_chars=400):
    paragraphs = text.split("\n")
    chunks = []
    current = ""

    for p in paragraphs:
        if len(current) + len(p) + 1 > max_chars:
            if current:
                chunks.append(current.strip())
                current = ""
        current += " " + p
    if current:
        chunks.append(current.strip())

    return chunks

# Helper used in debug printing below
def wrap(text, width=80):
    return textwrap.fill(text, width)

# Test on first doc (if any)
if documents:
    print(split_into_chunks(documents[0]["text"]))


['Question: What are the special things we (husband and me) can do during a 5 day stay at Cape Town?     Subcategory: TTDSIG']


In [None]:

# Local embedding model for Chroma + retrieval
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

chroma_client = chromadb.Client(
    Settings(
        anonymized_telemetry=False
    )
)

collection = chroma_client.get_or_create_collection(name="rag_travel")

texts = []
ids = []
metadatas = []

for doc in documents:
    chunks = split_into_chunks(doc["text"], max_chars=400)
    for i, chunk in enumerate(chunks):
        chunk_id = f"{doc['id']}_chunk_{i}"
        texts.append(chunk)
        ids.append(chunk_id)
        metadatas.append({"source": doc["id"]})

print("Number of chunks:", len(texts))


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Number of chunks: 4000


In [None]:
# ----- 5.3 Embed locally (no OpenAI) -----
embeddings = embedding_model.encode(texts, convert_to_numpy=True).tolist()

# ----- 5.4 Add to Chroma -----
collection.add(
    documents=texts,
    metadatas=metadatas,
    ids=ids,
    embeddings=embeddings
)

print("Indexed", len(texts), "chunks into Chroma.")


Indexed 4000 chunks into Chroma.


In [None]:
def retrieve_relevant_chunks(query: str, k: int = 5):
    # Embed query with the same local model
    query_vec = embedding_model.encode([query], convert_to_numpy=True).tolist()[0]

    results = collection.query(
        query_embeddings=[query_vec],
        n_results=k,
        include=["documents", "metadatas", "distances"]
    )

    docs = results["documents"][0]
    metas = results["metadatas"][0]
    dists = results["distances"][0]

    retrieved = []
    for doc_text, meta, dist in zip(docs, metas, dists):
        retrieved.append({
            "text": doc_text,
            "source": meta["source"],
            "score": dist
        })
    return retrieved

# Quick test
test_query = "What can I do in sandiego?"
for r in retrieve_relevant_chunks(test_query, k=3):
    print("Score:", r["score"])
    print(wrap(r["text"]))
    print("-----")


Score: 0.9796572923660278
Question: Any beach parties to go to while in Varadero during end of October?
Subcategory: ENTFES
-----
Score: 1.0600512027740479
Question: What is the most convenient way to go from Vernazza to the beach in
Monterosso?     Subcategory: TRSOTH
-----
Score: 1.060908317565918
Question: What are the best beach areas to stay in SL?     Subcategory: ACMOTH
-----


In [None]:
def qwen_generate(system_prompt: str, user_prompt: str, max_new_tokens: int = 512, temperature: float = 0.7):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user",   "content": user_prompt}
    ]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tokenizer([text], return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature
        )
    # strip the prompt tokens
    gen_ids = outputs[0][inputs.input_ids.shape[1]:]
    return tokenizer.decode(gen_ids, skip_special_tokens=True)

In [None]:
def answer_with_rag(question: str):
    retrieved = retrieve_relevant_chunks(question, k=5)
    context_block = "\n\n".join(
        f"[Source: {r['source']} | score={r['score']:.3f}]\n{r['text']}"
        for r in retrieved
    )

    system_prompt = (
        "You are a helpful travel assistant. "
        "Use the provided retrieved context to answer the user's question. "
        "Be specific, practical, and concise. If context is helpful, cite key facts inline like [Source: id]."
    )

    user_prompt = (
        f"User question:\n{question}\n\n"
        f"Retrieved context:\n{context_block}\n\n"
        "Answer using the context when helpful. If the context is not useful, rely on your general knowledge."
    )

    answer = qwen_generate(system_prompt, user_prompt, max_new_tokens=512, temperature=0.6)
    return answer, retrieved

In [None]:
_TRAVEL_KEYWORDS = {
    "travel","trip","trips","vacation","tour","tourist","tourism",
    "hotel","hostel","airbnb","flight","flights","airport","visa",
    "itinerary","sightseeing","attraction","attractions","landmark",
    "city","cities","museum","temple","park","beach","island",
    "hotel recommendations","things to do","where to stay"
}


In [None]:
def decide_use_rag(question: str) -> bool:
    q = question.lower()
    return any(kw in q for kw in _TRAVEL_KEYWORDS)


In [None]:
def agent_answer(question: str):
    if decide_use_rag(question):
        print(" Using RAG")
        answer, retrieved = answer_with_rag(question)
        mode = "RAG"
    else:
        print("Not  Using RAG")
        # Direct mode: general answer with Qwen (no RAG)
        system_prompt = "You are a helpful general AI assistant. Be clear and concise."
        user_prompt = f"User question:\n{question}"
        answer = qwen_generate(system_prompt, user_prompt, max_new_tokens=384, temperature=0.7)
        retrieved = []
        mode = "DIRECT"
    return answer, {"mode": mode, "retrieved": retrieved}

In [None]:
def gradio_chat_fn(message, history):
    answer, debug = agent_answer(message)
    # Optionally show mode:
    # answer += f"\n\n_(Mode: {debug['mode']})_"
    return answer

demo = gr.ChatInterface(
    fn=gradio_chat_fn,
    title="Travel Agentic RAG Assistant (Qwen)",
    description="Ask travel questions. The agent decides RAG vs. general knowledge using Qwen."
)
demo.launch(debug=True)

  self.chatbot = Chatbot(


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://4bc572bdc39e28b260.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


 Using RAG
Not  Using RAG
 Using RAG
 Using RAG
Not  Using RAG
 Using RAG
 Using RAG
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://4bc572bdc39e28b260.gradio.live




In [1]:
!apt-get install git


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
git is already the newest version (1:2.34.1-1ubuntu1.15).
0 upgraded, 0 newly installed, 0 to remove and 41 not upgraded.


In [2]:
!git config --global user.email "lokeshlok2@gmail.com"
!git config --global user.name "lokeshbalasubramaniam"


In [3]:
!git clone https://github.com/lokeshbalasubramaniam/Agentic-AI


Cloning into 'Agentic-AI'...


In [4]:
!cp TravelEnquriesChatbot.ipynb Agentic-AI/


cp: cannot stat 'TravelEnquriesChatbot.ipynb': No such file or directory


In [6]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
!ls


Agentic-AI  sample_data
