In [None]:
# Install required packages
!pip install -qU langchain==0.3.12 langchain-chroma langchain-community pypdf langchain-openai wikipedia langgraph

  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m63.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m304.2/304.2 kB[0m [31m19.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0

In [None]:
# Environment setup
import os
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

# PDF Load

In [None]:
# PDF Loading
from langchain.document_loaders import PyPDFLoader
pdf_files = ["Meditation_Techniques.pdf", "how-to-meditate.pdf"]
all_documents = []
for pdf_file in pdf_files:
    loader = PyPDFLoader(pdf_file)
    docs = loader.load()
    all_documents.extend(docs)
documents = all_documents
print(f"Loaded {len(documents)} documents from {len(pdf_files)} PDFs.")

Loaded 39 documents from 2 PDFs.


In [None]:
len(documents)

39

# Text Split

In [None]:
# Text Splitting
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)
print(f"Split into {len(texts)} chunks.")

Split into 89 chunks.


# Embedding

In [None]:
# Embedding
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

# Vector Store

In [None]:
# Vectorstore (ChromaDB)
from langchain_chroma import Chroma as ch
persist_directory = "chroma_db"
db = ch.from_documents(
    documents=texts,
    embedding=embeddings,
    persist_directory=persist_directory
)

# Retriever

In [None]:
# Retriever with similarity score threshold
retriever = db.as_retriever(
    search_type="similarity", #change
    search_kwargs={"k": 4}
)

In [None]:
# RAG Prompt
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.chat_models import ChatOpenAI

custom_prompt_template = """
Use ONLY the pieces of information provided in the context to answer the user's question.
If the answer is not present in the context, explicitly return: I don't know.
Do NOT try to make up an answer or use outside knowledge.
Question: {question}
Context: {context}
Answer:
"""
prompt = ChatPromptTemplate.from_template(custom_prompt_template)
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

# RAG LCEL chain
rag_chain = (
    {
        "context": lambda x: retriever.invoke(x["question"]),
        "question": RunnablePassthrough(),
    }
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
rag_chain.invoke({"question": "What is meditation?"})

'Meditation is commonly described as a training of mental attention that awakens us beyond the conditioned mind and habitual thinking, and reveals the nature of reality. It is understood as Natural Presence, which is a mindful, clear recognition of what is happening—here, now—and the open, allowing space that includes all experience.'

In [None]:
rag_chain.invoke({"question": "How to do breathing exercises?"})

"To do breathing exercises, you can follow these steps:\n\n1. **Proper Breathing**: Breathe in slowly and quietly, so that a tiny thread placed in front of your nose does not move. Breathe out even more slowly than you breathed in. If possible, leave a short pause between your exhalation and the next inhalation. You can hold your breath for a few seconds if it's comfortable.\n\n2. **Life-Energy in the Chakras**: Breathe in and hold your breath at the third eye for a couple of seconds, then at the heart center during the second inhalation.\n\n3. **One-Four-Two Breathing**: Inhale for one count while repeating a name or mantra, hold your breath for four counts repeating the same name four times, and exhale for two counts repeating the name or mantra twice.\n\n4. **Gradual Increase**: As you progress, you can try breathing in for four counts, holding for sixteen, and breathing out for eight, but do this gradually.\n\n5. **Alternate Nostril Breathing**: Press the right nostril with your th

# Agent 1

In [None]:
from pydantic import BaseModel
from langchain_openai import ChatOpenAI
from google.colab import userdata
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from langchain.agents import create_tool_calling_agent, AgentExecutor
# from langchain_community.tools import WikipediaQueryRun
# from langchain_community.utilities import WikipediaAPIWrapper
from langchain.tools import Tool

# Define a RAG tool that takes a query and returns the RAG answer
# def is_rag_no_answer(result):
#     result_str = str(result).strip().lower()
#     return (
#         result_str in ["i don't know", "i do not know", "not present", "no relevant information", "sorry", ""]
#         or len(result_str) < 10
#     )

class RAGTool:
    def __init__(self, qa_chain):
        self.qa_chain = qa_chain
    def __call__(self, input_dict) -> str:
        # Accepts a dict, extracts the 'query' key
        query = input_dict["query"] if isinstance(input_dict, dict) else input_dict
        # Always pass as {"question": ...} to the chain
        result = self.qa_chain.invoke({"question": query})
        # Normalize and check for "I don't know" or similar
        result_str = str(result).strip().lower()
        if result_str in [
            "i don't know", "i do not know", "not present", "no relevant information", "sorry", ""
        ] or len(result_str) < 10:
            return "__RAG_NO_ANSWER__"
        return result

rag_tool = Tool(
    name="RAG",
    func=RAGTool(rag_chain),
    description="Use this tool to answer questions using the provided PDF documents. Always try this tool first. If it returns __RAG_NO_ANSWER__, then try other tools."
)

# api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=100)
# wiki_tool = WikipediaQueryRun(api_wrapper=api_wrapper)

tools = [rag_tool]

# Custom agent logic: always call RAG first, if it returns __RAG_NO_ANSWER__, then call Wikipedia
key = userdata.get('OPENAI_API_KEY')
llm = ChatOpenAI(api_key=key, model="gpt-4o-mini")

class ResearchResponse(BaseModel):
    topic: str
    answer: str
    sources: list[str]
    tools_used: list[str]

parser = PydanticOutputParser(pydantic_object=ResearchResponse)

prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            '''
            You are a research assistant with access to two tools:
            - "RAG": Answers questions using the provided PDF documents. Always try this tool first.
              If it returns "__RAG_NO_ANSWER__", then return I don't know.
            '''
        ),
        ("placeholder", "{chat_history}"),
        ("human", "{query}"),
        ("placeholder", "{agent_scratchpad}"),
    ]
).partial(format_instructions=parser.get_format_instructions())
agent = create_tool_calling_agent(
    llm=llm,
    prompt=prompt,
    tools=tools
)

agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)



In [None]:
# Now, just call the agent_executor. The agent will decide which tool(s) to use.
user_query = input("What can I help you research? ")
response = agent_executor.invoke({"query": user_query})
print(response)
#explain the distance from the main sequence and its formula.

What can I help you research? give me breathing exerciese steps


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `RAG` with `breathing exercises steps`


[0m[36;1m[1;3mI don't know.[0m[32;1m[1;3mI don't know.[0m

[1m> Finished chain.[0m
{'query': 'give me breathing exerciese steps', 'output': "I don't know."}
