In [None]:
# From the previous notebook
# --------------- LANGCHAIN + OLLAMA ---------------
from langchain_community.llms import Ollama
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.chains import RetrievalQA

loader = TextLoader("climate.txt")
raw_docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
docs = splitter.split_documents(raw_docs)

embeddings = OllamaEmbeddings(model="mxbai-embed-large")
vs = Chroma.from_documents(docs, embeddings, persist_directory="lc_chroma")
retriever = vs.as_retriever(search_type="mmr")

llm = Ollama(model="llama3")
qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
print("[LangChain]", qa.run("What causes the most CO2 emissions?"))
# --------------- LANGCHAIN + OLLAMA -------------------

# Load the required LangChain components
from langchain_community.llms import Ollama                    # Local LLM interface (e.g., llama3)
from langchain.embeddings import OllamaEmbeddings              # Embedding model via Ollama (e.g., mxbai-embed-large)
from langchain.vectorstores import Chroma                      # Vector store to store and search chunk embeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter  # Splits long text into manageable chunks
from langchain.document_loaders import TextLoader              # Utility to load text files as documents
from langchain.chains import RetrievalQA                       # Retrieval-augmented QA pipeline

# ------------------- Perception: Load and Prepare Knowledge -------------------

# Load the raw document from file
loader = TextLoader("climate.txt")
raw_docs = loader.load()  # List of Document objects

# Split the document into smaller overlapping chunks for better semantic matching
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
docs = splitter.split_documents(raw_docs)

# ------------------- Representation: Embed and Store Chunks -------------------

# Load a local embedding model via Ollama (can be changed to any compatible embedding model)
embeddings = OllamaEmbeddings(model="mxbai-embed-large")

# Create or load a Chroma vector store from the split chunks
vs = Chroma.from_documents(docs, embeddings, persist_directory="lc_chroma")

# Create a retriever using Max Marginal Relevance (MMR) to reduce redundancy
retriever = vs.as_retriever(search_type="mmr", search_kwargs={"k": 4})

# ------------------- Reasoning: Setup the LLM -------------------

# Load the local language model (e.g., LLaMA 3 via Ollama)
llm = Ollama(model="llama3")

# Wrap everything in a RetrievalQA chain: retrieve relevant chunk, pass to LLM, and get answer
qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

# ------------------- Action: Ask a Question -------------------

# Run the full agentic pipeline
print("[LangChain]", qa.invoke("What causes the most CO2 emissions?"))




This LangChain RAG setup behaves like an agent in a sandbox: it perceives its environment (a fixed document), chooses the most relevant information (via retrieval), and takes a single goal-driven action (answering the question). It’s not a fully autonomous agent — but it captures the core structure of one.

Let's increase the agentic side, by leveraging a proper agent call, and providing to the agent the choice between several tools.

In [None]:
from langchain.agents import Tool, initialize_agent
from langchain.agents.agent_types import AgentType
from langchain.llms import Ollama
from langchain.utilities import WikipediaAPIWrapper
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.embeddings import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader

# 1. Load and embed local document
loader = TextLoader("climate.txt")
docs = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100).split_documents(loader.load())
vs = Chroma.from_documents(docs, OllamaEmbeddings(model="mxbai-embed-large"), persist_directory="lc_agent_chroma")
retriever = vs.as_retriever(search_type="mmr", search_kwargs={"k": 4})

# 2. Build QA chain on top of retriever
llm = Ollama(model="llama3")
rag_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

# 3. Add Wikipedia as external tool
wiki_tool = WikipediaAPIWrapper()

# 4. Wrap both tools as LangChain Tool objects
tools = [
    Tool(
        name="LocalDocQA",
        func=rag_chain.run,
        description="Use this tool to answer questions about climate change using the local document."
    ),
    Tool(
        name="WikipediaSearch",
        func=wiki_tool.run,
        description="Use this when the local document does not seem to contain the information."
    )
]

# 5. Initialize the agent
agent_executor = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=False
)

# 6. Run the agent
response = agent_executor.run("What causes the most CO2 emissions?")
print("[Agentic LangChain]", response)

What happens in the background? Let's expose the call that the "agent" program makes, the responses of the LLM, and the loops that occur until the LLM surfaces the expected answer.

In [None]:
from langchain.agents import Tool, initialize_agent
from langchain.agents.agent_types import AgentType
from langchain.llms import Ollama
from langchain.utilities import WikipediaAPIWrapper
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma
from langchain.embeddings import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader

# Load and prepare documents
loader = TextLoader("climate.txt")
docs = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100).split_documents(loader.load())
vs = Chroma.from_documents(docs, OllamaEmbeddings(model="mxbai-embed-large"), persist_directory="lc_agent_chroma")
retriever = vs.as_retriever(search_type="mmr", search_kwargs={"k": 4})
llm = Ollama(model="llama3")

# Define tools
rag_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
wiki_tool = WikipediaAPIWrapper()

tools = [
    Tool(
        name="LocalDocQA",
        func=rag_chain.run,
        description="Use this tool to answer questions about climate change using the local document."
    ),
    Tool(
        name="WikipediaSearch",
        func=wiki_tool.run,
        description="Use this when the local document does not contain the needed information."
    )
]

# Initialize the agent
agent_executor = initialize_agent(
    tools=tools,
    llm=llm,
    agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True
)

# Insert this to reveal the internal LLM prompt
from langchain.agents import AgentOutputParser

# Show the base ReAct-style prompt template
print("=== Prompt Template (ReAct Format with Placeholders) ===")
print(agent_executor.agent.llm_chain.prompt.template)

# Show the full rendered prompt as it would be sent to the LLM
formatted_prompt = agent_executor.agent.llm_chain.prompt.format_prompt(
    input="What causes the most CO2 emissions?",
    tools=tools,
    tool_names=[tool.name for tool in tools],
    agent_scratchpad=""  # This avoids the KeyError
)
print("\n=== Rendered Prompt Sent to the LLM ===")
print(formatted_prompt.to_string())


# Run the agent
response = agent_executor.run("What causes the most CO2 emissions?")
print("[Agentic LangChain]", response)




## 🧠 LangSmith Integration

The following cells enable LangSmith observability for this notebook. We configure the environment,
wrap key functions with `@traceable`, and route calls through LangSmith so traces appear in your dashboard.

👉 You need a LangSmith account and an API key from https://smith.langchain.com


In [None]:

import os

# Enable LangSmith tracing
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = "your-langsmith-api-key"  # Replace with your actual key


In [None]:

from langsmith.trace import traceable

@traceable(name="RAG Chain: Climate CO2 QA")
def run_rag(query):
    return qa.run(query)

@traceable(name="Agent Execution: Tool-augmented QA")
def run_agent(query):
    return agent_executor.run(query)

# Run the agent and trace the call
response = run_agent("What causes the most CO2 emissions?")
print("[LangSmith Traced Agent]", response)


## Building a Custom Agent with LangGraph (Step-by-Step)

This section of the notebook demonstrates how to build a **simple, explainable agent** using **LangGraph**, mimicking the behavior of a LangChain `ZERO_SHOT_REACT_DESCRIPTION` agent — but with full control over its logic, flow, and execution.

The effect is the same as the previous example, so you are on familiar ground, but the interest is that LangGraph allows you to build your own agent and its structure, so you can go beyond pre-canned tools.
---

##  Step 1: Set Up Tools

This step loads and prepares everything needed to power the agent's capabilities.

**Functions and Components Used:**
- `TextLoader`: Loads the `climate.txt` document from disk.
- `RecursiveCharacterTextSplitter`: Splits the document into overlapping chunks for better semantic search.
- `OllamaEmbeddings`: Creates vector embeddings using a local model (e.g., `mxbai-embed-large`).
- `Chroma`: Stores those embeddings for efficient retrieval.
- `Ollama`: Runs the local language model (`llama3`) used for answering.
- `RetrievalQA`: A wrapper that connects the retriever and the LLM.
- `WikipediaAPIWrapper`: A tool that performs a Wikipedia search as a fallback.

 **Purpose:** These tools form the basis for all reasoning and information retrieval used later by the agent.


##  Step 2: Define the Agent's State

LangGraph requires a formal declaration of what data flows through your graph. This step defines that shared memory.

**Structure: `QAState`**
- `question`: The user’s query.
- `rag_answer`: Stores the answer if the RAG tool was used.
- `wiki_answer`: Stores the answer if the Wikipedia tool was used.
- `answer`: The final answer to return. This key is marked with `Annotated[..., "output"]` so LangGraph knows to treat it as a **single-write endpoint**.

 **Purpose:** Controls the input, intermediate, and output values handled by each node in the graph.


##  Step 3: Routing Logic

This is the "agentic brain" — a custom Python function that decides *which tool to use* based on the question.

**Function: `tool_selector(state)`**
- Inspects `state["question"]`
- If the question contains the word `"climate"`, it returns `{"__next__": "rag"}`
- Otherwise, it returns `{"__next__": "wiki"}`

 **Purpose:** Simulates an intelligent decision-maker — your own custom routing logic instead of relying on a hardcoded or LLM-parsed policy.


##  Step 4: Define Graph Nodes

Each node is a callable function that:
- Accepts the current state
- Returns a dictionary with updated values

**Node Functions:**
- `rag_node(state)`: Calls the RAG tool and stores the output in `rag_answer`.
- `wiki_node(state)`: Calls Wikipedia and stores the output in `wiki_answer`.
- `merger(state)`: Consolidates the answers. If `rag_answer` is present, it uses it. Otherwise, it falls back to `wiki_answer`. The result is stored in `answer`.

 **Purpose:** These nodes act like "agent limbs" — executing concrete actions depending on what the brain (`tool_selector`) decided.


##  Step 5: Build the Graph

Here we assemble the agent as a directed graph of reasoning steps.

**Steps:**
- Instantiate the graph: `graph = StateGraph(QAState)`
- Add nodes: router, rag, wiki, merger
- Connect edges:
  - `router → rag`
  - `router → wiki`
  - `rag → merger`
  - `wiki → merger`
  - `merger → END`
- Compile the graph: `agent = graph.compile()`

 **Purpose:** This defines the possible paths an input can take — making the reasoning traceable and testable.


##  Step 6: Run the Agent

We finally invoke the agent with a natural language question.

**Invocation:**
```python
question = "What causes the most CO2 emissions?"
result = agent.invoke({"question": question})




In [None]:
# ---------------- LangGraph Agentic Example ----------------

from typing import TypedDict, Annotated, Optional, Union
from langchain_community.llms import Ollama
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import Chroma
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.utilities import WikipediaAPIWrapper
from langchain_core.runnables import RunnableLambda
from langgraph.graph import StateGraph, END

# === Step 1: Set up tools ===
docs = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100).split_documents(
    TextLoader("climate.txt").load()
)
vs = Chroma.from_documents(docs, OllamaEmbeddings(model="mxbai-embed-large"), persist_directory="graph_chroma")
retriever = vs.as_retriever()
llm = Ollama(model="llama3")
rag_tool = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
wiki_tool = WikipediaAPIWrapper()

# === Step 2: Define state ===
class QAState(TypedDict):
    question: str
    rag_answer: Optional[str]
    wiki_answer: Optional[str]
    answer: Annotated[Optional[str], "output"]  # Final output goes here

# === Step 3: Routing logic ===
def tool_selector(state: QAState):
    if "climate" in state["question"].lower():
        return {"__next__": "rag"}
    else:
        return {"__next__": "wiki"}

# === Step 4: Nodes ===
def rag_node(state: QAState):
    return {"rag_answer": rag_tool.run(state["question"])}

def wiki_node(state: QAState):
    return {"wiki_answer": wiki_tool.run(state["question"])}

def merger(state: QAState):
    return {"answer": state.get("rag_answer") or state.get("wiki_answer")}

# === Step 5: Build graph ===
graph = StateGraph(QAState)
graph.add_node("router", RunnableLambda(tool_selector))
graph.add_node("rag", rag_node)
graph.add_node("wiki", wiki_node)
graph.add_node("merger", merger)

graph.set_entry_point("router")
graph.add_edge("router", "rag")
graph.add_edge("router", "wiki")
graph.add_edge("rag", "merger")
graph.add_edge("wiki", "merger")
graph.add_edge("merger", END)

agent = graph.compile()

# === Step 6: Run it ===
question = "What causes the most CO2 emissions?"
result = agent.invoke({"question": question})
print("[LangGraph Agentic]", result["answer"])
