### Initialize the ChromaDB

In [2]:
# Install all required LangChain packages
import subprocess
import sys

packages = [
    "langchain",
    "langchain-chroma",
    "langchain-openai",
    "langchain-core",
    "python-dotenv",
    "chromadb"
]

print("Installing required packages...\n")

for package in packages:
    print(f"Installing {package}...")
    subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
    print(f"✓ {package} installed successfully")

print("\n✓ All packages installed successfully!")
print("\nInstalled packages:")
for package in packages:
    print(f"  - {package}")


Installing required packages...

Installing langchain...
✓ langchain installed successfully
Installing langchain-chroma...
✓ langchain-chroma installed successfully
Installing langchain-openai...
✓ langchain-openai installed successfully
Installing langchain-core...
✓ langchain-core installed successfully
Installing python-dotenv...
✓ python-dotenv installed successfully
Installing chromadb...
✓ chromadb installed successfully

✓ All packages installed successfully!

Installed packages:
  - langchain
  - langchain-chroma
  - langchain-openai
  - langchain-core
  - python-dotenv
  - chromadb


In [3]:
# imports
from dotenv import load_dotenv
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_text_splitters import CharacterTextSplitter

# load .env file
load_dotenv('../.env')

ModuleNotFoundError: No module named 'langchain_text_splitters'

In [2]:
# Get Embeddings Model
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

# Initialize ChromaDB as Vector Store
vector_store = Chroma(
    collection_name="test_collection",
    embedding_function=embeddings
)

### Split the File into LangChain Documents & Save to Vector Store

In [3]:
# Read in State of the Union Address File
with open("2024_state_of_the_union.txt") as f:
    state_of_the_union = f.read()

# Initialize Text Splitter
text_splitter = CharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)

# Create Documents (Chunks) From File
texts = text_splitter.create_documents([state_of_the_union])

# Save Document Chunks to Vector Store
ids = vector_store.add_documents(texts)

### Semantic Similarity Check with Vector Store

In [4]:
# Query the Vector Store
results = vector_store.similarity_search(
    'Who invaded Ukraine?',
    k=2
)

# Print Resulting Chunks
for res in results:
    print(f"* {res.page_content} [{res.metadata}]\n\n")

* And yes, my purpose tonight is to both wake up this Congress, and alert the American people that this is no ordinary moment either. 

Not since President Lincoln and the Civil War have freedom and democracy been under assault here at home as they are today. 

What makes our moment rare is that freedom and democracy are under attack, both at home and overseas, at the very same time. 

Overseas, Putin of Russia is on the march, invading Ukraine and sowing chaos throughout Europe and beyond. 

If anybody in this room thinks Putin will stop at Ukraine, I assure you, he will not. 

But Ukraine can stop Putin if we stand with Ukraine and provide the weapons it needs to defend itself. That is all Ukraine is asking. They are not asking for American soldiers. 

In fact, there are no American soldiers at war in Ukraine. And I am determined to keep it that way. 

But now assistance for Ukraine is being blocked by those who want us to walk away from our leadership in the world. [{}]


* But now 

### RAG Pipeline

In [5]:
# Create Document Parsing Function to String
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [6]:
# Set Chroma as the Retriever
retriever = vector_store.as_retriever()

In [7]:
# Initialize the LLM instance
llm = ChatOpenAI(model="gpt-4o-mini")

In [8]:
# Create the Prompt Template
prompt_template = """Use the context provided to answer the user's question below. If you do not know the answer based on the context provided, tell the user that you do not know the answer to their question based on the context provided and that you are sorry.
context: {context}

question: {query}

answer: """

# Create Prompt Instance from template
custom_rag_prompt = PromptTemplate.from_template(prompt_template)

In [9]:
# Create the RAG Chain
rag_chain = (
    {"context": retriever | format_docs, "query": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | StrOutputParser()
)

In [10]:
# Query the RAG Chain
rag_chain.invoke("According to the 2024 state of the union address, Who invaded Ukraine?")

'According to the 2024 State of the Union address, Putin of Russia invaded Ukraine.'

In [11]:
# Get an I don't know from the Model
rag_chain.invoke("What is the purpose of life?")

"I'm sorry, but I do not know the answer to your question based on the context provided."

# Agentic AI - Basic Starter Code

This section covers foundational concepts of building AI agents step-by-step:
1. **Tools**: Functions that agents can use
2. **Agent**: Decision-making entity that chooses which tools to use
3. **Tool Calling**: How agents invoke tools
4. **Reasoning Loop**: Agent thinking and acting iteratively

## Step 1: Understanding Tools

A **tool** is a function that an agent can call to perform specific tasks. Tools define:
- **Name**: Identifier for the tool
- **Description**: What the tool does (helps agent decide when to use it)
- **Parameters**: Input schema for the tool

In [None]:
from langchain_core.tools import tool
from typing import Union

# Example Tool 1: Calculator
@tool
def add_numbers(a: float, b: float) -> float:
    """Add two numbers together. Use this for addition operations."""
    return a + b

@tool
def multiply_numbers(a: float, b: float) -> float:
    """Multiply two numbers. Use this for multiplication operations."""
    return a * b

# Example Tool 2: Get Weather (simulated)
@tool
def get_weather(city: str) -> str:
    """Get the current weather for a city. Returns weather description."""
    # In real world, this would call a weather API
    weather_data = {
        "New York": "Sunny, 72°F",
        "London": "Rainy, 55°F",
        "Tokyo": "Cloudy, 68°F"
    }
    return weather_data.get(city, "Weather data not available")

# Example Tool 3: Get Current Time (simulated)
@tool
def get_current_time() -> str:
    """Get the current date and time."""
    from datetime import datetime
    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

# Create a list of tools for the agent
tools = [add_numbers, multiply_numbers, get_weather, get_current_time]

print("✓ Created 4 tools:")
print("  1. add_numbers(a, b) - Addition")
print("  2. multiply_numbers(a, b) - Multiplication")
print("  3. get_weather(city) - Get weather for a city")
print("  4. get_current_time() - Get current time")

## Step 2: Initialize the Agent

An **agent** combines:
- **LLM (Language Model)**: Makes decisions about which tools to use
- **Tools**: The toolkit available to the agent
- **Agent Type**: How the agent processes information and decides actions

LangChain provides different agent architectures. We'll use `tool_calling` which is supported by GPT-4.

In [None]:
from langchain_core.messages import BaseMessage, HumanMessage
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

# Define the system prompt for the agent
# This tells the agent its role and how to behave
system_prompt = """You are a helpful AI assistant with access to various tools.
When a user asks a question, analyze what they're asking and determine which tools to use.
Use the tools available to you to answer the user's question accurately.
Always explain your reasoning when using tools.
Be conversational and helpful."""

# Create a prompt template with placeholders for messages
prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    MessagesPlaceholder(variable_name="messages"),
])

# Initialize the LLM (we already created this earlier)
# llm = ChatOpenAI(model="gpt-4o-mini")

# Bind tools to the LLM
# This tells the LLM about the available tools
llm_with_tools = llm.bind_tools(tools)

# Create the agent
agent = create_tool_calling_agent(llm_with_tools, tools, prompt)

# Create an AgentExecutor
# This executes the agent's decisions in a loop
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True, max_iterations=10)

print("✓ Agent created and ready to use!")
print("\nAgent Components:")
print("  - LLM Model: GPT-4o-mini")
print("  - Available Tools: 4 tools bound to agent")
print("  - Agent Type: Tool-calling agent")
print("  - Max Iterations: 10 (safety limit)")

## Step 3: Agent Reasoning Loop

The agent follows this loop:
1. **Observation**: Receives user query
2. **Thinking**: Analyzes what needs to be done
3. **Action**: Decides which tool(s) to use
4. **Observation**: Gets tool results
5. **Response**: Generates answer based on results

Let's see this in action with simple queries:

In [None]:
# Example 1: Simple math question
print("=" * 60)
print("EXAMPLE 1: Simple Math Question")
print("=" * 60)
result1 = agent_executor.invoke({
    "messages": [HumanMessage(content="What is 25 multiplied by 4?")]
})
print("\nFinal Answer:")
print(result1["output"])

In [None]:
# Example 2: Query requiring multiple steps
print("\n" + "=" * 60)
print("EXAMPLE 2: Multi-Step Reasoning")
print("=" * 60)
result2 = agent_executor.invoke({
    "messages": [HumanMessage(content="Add 10 and 5, then multiply the result by 3")]
})
print("\nFinal Answer:")
print(result2["output"])

In [None]:
# Example 3: Query using different tools
print("\n" + "=" * 60)
print("EXAMPLE 3: Using Different Tools")
print("=" * 60)
result3 = agent_executor.invoke({
    "messages": [HumanMessage(content="What time is it now and what's the weather in New York?")]
})
print("\nFinal Answer:")
print(result3["output"])

## Step 4: Understanding Agent Decision-Making

The agent's thinking process:
- **Perception**: Reads the user query
- **Tool Selection**: Evaluates which tools are relevant
- **Parameter Mapping**: Extracts correct parameters for tools
- **Sequential Execution**: If one tool depends on another, the agent chains them
- **Response Generation**: Synthesizes results into a natural answer

In [None]:
# Let's see what tools the agent has available
print("Available Tools Information:\n")
for tool in tools:
    print(f"Tool Name: {tool.name}")
    print(f"Description: {tool.description}")
    # Get the tool's parameters
    if hasattr(tool, 'args_schema'):
        print(f"Parameters: {tool.args_schema}")
    print("-" * 50)

## Step 5: Advanced - Creating Custom Tools

You can create domain-specific tools for your use case. Here are some practical examples:

In [None]:
# Example: Custom tools for a customer service agent
@tool
def search_knowledge_base(query: str) -> str:
    """Search our knowledge base for relevant articles and documentation."""
    # Simulate a knowledge base search
    knowledge_base = {
        "billing": "You can manage your billing at settings > billing",
        "password": "To reset your password, click 'forgot password' on login",
        "account": "You can update your account info in settings > profile",
    }
    
    for key, value in knowledge_base.items():
        if key in query.lower():
            return value
    return "No matching articles found. Please contact support."

@tool
def create_support_ticket(issue: str, email: str) -> str:
    """Create a support ticket for the user."""
    # Simulate ticket creation
    ticket_id = f"TKT-{hash(issue + email) % 10000}"
    return f"Support ticket created! Ticket ID: {ticket_id}. We'll respond to {email} within 24 hours."

@tool
def check_account_status(email: str) -> str:
    """Check the status of a user account."""
    # Simulate account status check
    return f"Account {email} is active and in good standing."

# Create a customer service agent
customer_service_tools = [search_knowledge_base, create_support_ticket, check_account_status]

# Bind these tools to the LLM
llm_with_customer_tools = llm.bind_tools(customer_service_tools)

# Create customer service agent
customer_service_agent = create_tool_calling_agent(
    llm_with_customer_tools, 
    customer_service_tools, 
    ChatPromptTemplate.from_messages([
        ("system", "You are a helpful customer service AI. Use available tools to help customers."),
        MessagesPlaceholder(variable_name="messages"),
    ])
)

customer_service_executor = AgentExecutor(
    agent=customer_service_agent, 
    tools=customer_service_tools, 
    verbose=True, 
    max_iterations=10
)

print("✓ Created specialized customer service agent with 3 custom tools")
print("  Tools available:")
print("  1. search_knowledge_base - Search documentation")
print("  2. create_support_ticket - Create support tickets")
print("  3. check_account_status - Check account status")

In [None]:
# Test the customer service agent
print("\n" + "=" * 60)
print("CUSTOMER SERVICE AGENT TEST")
print("=" * 60)
result_cs = customer_service_executor.invoke({
    "messages": [HumanMessage(content="I forgot my password, can you help?")]
})
print("\nFinal Answer:")
print(result_cs["output"])