###**Agentic Workflow**


**1. Installing the dependencies**

In [2]:
!pip install -qU langchain langchain-google-genai langchain-community langchain-chroma langchain-huggingface langgraph pymysql sentence-transformers
print("Dependencies installed successfully.")

Dependencies installed successfully.


**2. Config & Database Setup**

Here we set up the environment variables and connect to your Amazon RDS instance of *classicmodels*  mysql database.

In [3]:
# Setup Environment & Database
import os
import pymysql
from langchain_community.utilities import SQLDatabase
from langchain_google_genai import ChatGoogleGenerativeAI

In [4]:
# Langsmith for tracing
os.environ["LANGCHAIN_TRACING_V2"] = ""
os.environ["LANGCHAIN_PROJECT"] = ""
os.environ["LANGCHAIN_API_KEY"] = ""

In [5]:
# Database connectivity
db_user = ""
db_password = ""
db_host = ""
db_name = "classicmodels"

In [6]:
# database connection
db = SQLDatabase.from_uri(f"mysql+pymysql://{db_user}:{db_password}@{db_host}/{db_name}")

# verifying connection
print("Database Connected:", db.dialect)
print(db.get_usable_table_names())
print(db.get_table_info())

Database Connected: mysql
['customers', 'employees', 'offices', 'orderdetails', 'orders', 'payments', 'productlines', 'products']

CREATE TABLE customers (
	`customerNumber` INTEGER NOT NULL, 
	`customerName` VARCHAR(50) NOT NULL, 
	`contactLastName` VARCHAR(50) NOT NULL, 
	`contactFirstName` VARCHAR(50) NOT NULL, 
	phone VARCHAR(50) NOT NULL, 
	`addressLine1` VARCHAR(50) NOT NULL, 
	`addressLine2` VARCHAR(50), 
	city VARCHAR(50) NOT NULL, 
	state VARCHAR(50), 
	`postalCode` VARCHAR(15), 
	country VARCHAR(50) NOT NULL, 
	`salesRepEmployeeNumber` INTEGER, 
	`creditLimit` DECIMAL(10, 2), 
	PRIMARY KEY (`customerNumber`), 
	CONSTRAINT customers_ibfk_1 FOREIGN KEY(`salesRepEmployeeNumber`) REFERENCES employees (`employeeNumber`)
)DEFAULT CHARSET=utf8mb4 ENGINE=InnoDB COLLATE utf8mb4_0900_ai_ci

/*
3 rows from customers table:
customerNumber	customerName	contactLastName	contactFirstName	phone	addressLine1	addressLine2	city	state	postalCode	country	salesRepEmployeeNumber	creditLimit
103	Atel

In [7]:
# llm api key
os.environ["GOOGLE_API_KEY"] = ""

# LLM initializatoin
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

**3. Vector Store for Dynamic Few-Shot Examples**

Here we create a agentic rag type tool. The agent can search this vector store to find similar SQL examples when it gets stuck.

In [8]:
# Create Example Retriever (RAG Tool)
from langchain_chroma import Chroma
from langchain_core.tools import tool
from langchain_huggingface import HuggingFaceEmbeddings

In [9]:
# Define Examples (Curated few shot lists)
examples = [
    {"input": "List all customers from the USA.", "query": "SELECT * FROM customers WHERE country = 'USA';"},
    {"input": "Find the number of customers in each country.", "query": "SELECT country, COUNT(*) FROM customers GROUP BY country;"},
    {"input": "Show customers whose credit limit is between 50000 and 100000.", "query": "SELECT customerName, creditLimit FROM customers WHERE creditLimit BETWEEN 50000 AND 100000;"},
    {"input": "List all employees working in the Sales department.", "query": "SELECT firstName, lastName FROM employees WHERE jobTitle LIKE '%Sales%';"},
    {"input": "Find employees working in the San Francisco office.", "query": "SELECT e.firstName, e.lastName FROM employees e JOIN offices o ON e.officeCode = o.officeCode WHERE o.city = 'San Francisco';"},
    {"input": "List customer names along with their order numbers.", "query": "SELECT c.customerName, o.orderNumber FROM customers c JOIN orders o ON c.customerNumber = o.customerNumber;"},
    {"input": "Find customers who have placed more than 5 orders.", "query": "SELECT customerNumber FROM orders GROUP BY customerNumber HAVING COUNT(*) > 5;"}
]

In [10]:
# Vectorizing Examples
print("Embedding examples into ChromaDB...")
texts = [f"Question: {ex['input']}\nSQL: {ex['query']}" for ex in examples]
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = Chroma.from_texts(texts, embeddings, collection_name="sql_examples")
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

Embedding examples into ChromaDB...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [12]:
# Tool for selecting from the few shots
@tool
def search_sql_examples(query: str) -> str:
    """
    Search for similar natural language questions and their corresponding SQL queries.
    Use this when you are unsure how to construct a complex query or want to follow best practices.
    """
    docs = retriever.invoke(query)
    return "\n\n".join([doc.page_content for doc in docs])

print(" 'search_sql_examples' tool created successfully.")

 'search_sql_examples' tool created successfully.


**4. Define Agent Tools & System Prompt**

We create our own custom tools and let the agent call them as needed.

In [64]:
from langchain_community.agent_toolkits import SQLDatabaseToolkit

In [91]:
@tool
def get_db_schema(table_names: list[str]) -> str:
    """Get the schema for specific tables. Use this before writing any SQL."""
    # Manually fetches table structures to keep token count low [cite: 9]
    return db.get_table_info(table_names=table_names)

@tool
def execute_sql(query: str) -> str:
    """Execute a SQL query against the database and return results."""
    # Direct execution tool replacing legacy chains
    try:
        return db.run(query)
    except Exception as e:
        return f"Error: {e}"

# Combine with our RAG Tool
manual_tools = [search_sql_examples, get_db_schema, execute_sql]

In [90]:
from langgraph.graph import MessagesState
from langchain_core.messages import SystemMessage

def sql_agent_middleware(state: MessagesState):
    # 1. Dynamic table injection
    table_list = db.get_usable_table_names()

    # 2. System message construction
    system_message = SystemMessage(content=f"""
        You are a SQL expert. Available tables: {table_list}
        Follow ReAct pattern: 1. Search, 2. Schema, 3. Execute.
        Only return final natural language answers.
    """)

    # In Middleware, you modify the list of messages before it hits the LLM
    # We prepend the system message to the current message history
    return {"messages": [system_message] + state["messages"]}

**5. Build & Run the Agent**

This uses create_react_agent (the LangGraph implementation) and MemorySaver to handle conversation history

In [97]:
from langchain.agents import create_agent
from langgraph.checkpoint.memory import MemorySaver

memory = MemorySaver()

# Creating the agent with our manual tools and middleware function
agent_executor = create_react_agent(
    llm,
    manual_tools,
    checkpointer=memory,
    pre_model_hook=sql_agent_middleware # Use pre_model_hook for middleware that modifies the state
)

/tmp/ipython-input-2936040577.py:7: LangGraphDeprecatedSinceV10: create_react_agent has been moved to `langchain.agents`. Please update your import to `from langchain.agents import create_agent`. Deprecated in LangGraph V1.0 to be removed in V2.0.
  agent_executor = create_react_agent(


In [92]:
import uuid

def run_query(question: str, thread_id: str):
    print(f"\n USER: {question}")
    config = {"configurable": {"thread_id": thread_id}}

    # Streaming the reasoning process for better visibility
    for event in agent_executor.stream(
        {"messages": [("user", question)]},
        config,
        stream_mode="values"
    ):
        last_msg = event["messages"][-1]

        # Displaying intermediate tool usage
        if hasattr(last_msg, "tool_calls") and len(last_msg.tool_calls) > 0:
            for tc in last_msg.tool_calls:
                print(f" AGENT CALLING TOOL: {tc['name']}")

        if last_msg.type == "tool":
            print(f" TOOL OUTPUT: {str(last_msg.content)[:100]}...")

    # Extract the actual text content for the final agent reply
    agent_final_content = event['messages'][-1].content
    if isinstance(agent_final_content, list) and len(agent_final_content) > 0 and 'text' in agent_final_content[0]:
        print(f" AGENT: {agent_final_content[0]['text']}")
    else:
        print(f" AGENT: {agent_final_content}")

# testing
session_id = str(uuid.uuid4())
run_query("Which customers have placed the most orders?", session_id)


 USER: Which customers have placed the most orders?
 AGENT CALLING TOOL: get_db_schema
 TOOL OUTPUT: 
CREATE TABLE customers (
	`customerNumber` INTEGER NOT NULL, 
	`customerName` VARCHAR(50) NOT NULL,...
 AGENT CALLING TOOL: execute_sql
 TOOL OUTPUT: [('Euro+ Shopping Channel', 26), ('Mini Gifts Distributors Ltd.', 17), ('Danish Wholesale Imports', ...
 AGENT: The customers who have placed the most orders are:

*   Euro+ Shopping Channel: 26 orders
*   Mini Gifts Distributors Ltd.: 17 orders
*   Danish Wholesale Imports: 5 orders
*   Australian Collectors, Co.: 5 orders
*   Dragon Souveniers, Ltd.: 5 orders


In [62]:

# Context Aware test
session_id = str(uuid.uuid4()) # Unique ID for this conversation

# Test 1: Simple Question
run_query("How many customers do we have in usa", session_id)

# Test 2: Complex Question (Should trigger RAG Search)
run_query("what about france?", session_id)


 USER: How many customers do we have in usa
 AGENT CALLING TOOL: get_db_schema
 TOOL OUTPUT: 
CREATE TABLE customers (
	`customerNumber` INTEGER NOT NULL, 
	`customerName` VARCHAR(50) NOT NULL,...
 AGENT CALLING TOOL: execute_sql
 TOOL OUTPUT: [(36,)]...
 AGENT: We have 36 customers in the USA.

 USER: what about france?
 AGENT CALLING TOOL: execute_sql
 TOOL OUTPUT: [(12,)]...
 AGENT: We have 12 customers in France.
