In [None]:
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Agentic RAG with MCP and ADK

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ayoisio/genai-on-google-cloud/blob/main/chapter-2/colabs/08_agentic_rag_mcp_adk.ipynb)

**Estimated Time**: 30-45 minutes

**Prerequisites**: Google Cloud project with billing enabled, Spanner API and Vertex AI API enabled

---

## Overview

This notebook demonstrates **Agentic RAG** - the evolution of RAG where an intelligent agent orchestrates retrieval and generation using the **Model Context Protocol (MCP)** and **Agent Development Kit (ADK)**.

### Key Concepts

- **MCP (Model Context Protocol)**: A standardized protocol for tools to communicate with LLMs
- **ADK (Agent Development Kit)**: Google's toolkit for building production-ready AI agents
- **Agentic RAG**: Agents that autonomously decide when and how to retrieve information

### What You'll Build

1. **Knowledge Graph Setup**: Spanner Graph with entities and relationships
2. **MCP Server**: Tools exposed via Model Context Protocol
3. **ADK Agent**: Intelligent agent that uses MCP tools for retrieval
4. **Interactive Q&A**: Agent-powered question answering system

### Learning Goals

By the end of this notebook, you will be able to:
- Understand the Agentic RAG pattern and its advantages
- Implement MCP servers for tool exposure
- Build ADK agents with custom tools
- Create autonomous retrieval workflows

---

**Original Author**: [Tristan Li](https://github.com/codingphun)

## Before you begin

1. In the Google Cloud console, on the project selector page, select or [create a Google Cloud project](https://cloud.google.com/resource-manager/docs/creating-managing-projects).
1. [Make sure that billing is enabled for your Google Cloud project](https://cloud.google.com/billing/docs/how-to/verify-billing-enabled#console).
1. [Make sure Cloud Spanner API is enabled](https://console.cloud.google.com/flows/enableapi?apiid=spanner.googleapis.com)

### Required roles

To get the permissions that you need to complete the tutorial, ask your administrator to grant you the [Owner](https://cloud.google.com/iam/docs/understanding-roles#owner) (`roles/owner`) IAM role on your project. For more information about granting roles, see [Manage access](https://cloud.google.com/iam/docs/granting-changing-revoking-access).


### Install Python Libraries

In [None]:
%pip install --upgrade --quiet google-adk==1.11.0 google-genai==1.31.0
%pip install --upgrade --quiet mcp==1.13.0
%pip install --quiet google-cloud-spanner==3.57.0
%pip install --quiet langchain-google-spanner==0.9.0
%pip install --quiet langchain-google-vertexai==2.0.28
%pip install --quiet json-repair networkx==3.5 langchain-text-splitters==0.3.9 langchain-experimental==0.3.4

### Authenticating your notebook environment
* If you are using **Colab** to run this notebook, uncomment the cell below and continue.
* If you are using **Vertex AI Workbench**, check out the setup instructions [here](https://github.com/GoogleCloudPlatform/generative-ai/tree/main/setup-env).

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth as google_auth

    google_auth.authenticate_user()
print(sys.version)
# If using local jupyter instance, uncomment and run:
# !gcloud auth login

## Initialize and Import

In [None]:
GCP_PROJECT_ID = ""  # @param {type:"string"}
REGION = "us-central1"  # @param {type:"string"}
MODEL_NAME = "gemini-2.5-flash"  # @param {type:"string"}
EMBEDDING_MODEL_NAME = "text-embedding-005"  # @param {type:"string"}
TASK_TYPE = "SEMANTIC_SIMILARITY"  # @param {type:"string"}
SPANNER_INSTANCE_ID = "graphrag-instance"  # @param {type:"string"}
SPANNER_DATABASE_ID = "graphrag"  # @param {type:"string"}
SPANNER_GRAPH_NAME = "wikigraph"  # @param {type:"string"}

In [None]:
# Set the project id
!gcloud config set project {GCP_PROJECT_ID} --quiet
%env GOOGLE_CLOUD_PROJECT={GCP_PROJECT_ID}

### Import Packages

In [None]:
import os

from langchain_core.documents import Document
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_google_vertexai import VertexAI
from langchain_text_splitters import RecursiveCharacterTextSplitter

## Create Spanner Instance and Database

To prepare for future queries, we'll now store our newly created knowledge graph in a Google Cloud Spanner database. We'll also store the accompanying embeddings in Spanner's Vector Database to enable efficient semantic search.


In [None]:
!gcloud config set project {GCP_PROJECT_ID}
!gcloud services enable spanner.googleapis.com
!gcloud spanner instances create {SPANNER_INSTANCE_ID} --config=regional-us-central1 --description="Graph RAG Instance" --nodes=1 --edition=ENTERPRISE

In [None]:
def create_database(project_id, instance_id, database_id):
    """Creates a database and tables for sample data."""
    from google.cloud import spanner
    from google.cloud.spanner_admin_database_v1.types import spanner_database_admin

    spanner_client = spanner.Client(project_id)
    database_admin_api = spanner_client.database_admin_api

    request = spanner_database_admin.CreateDatabaseRequest(
        parent=database_admin_api.instance_path(spanner_client.project, instance_id),
        create_statement=f"CREATE DATABASE `{database_id}`",
        extra_statements=[
            """CREATE TABLE KgNode (
            DocId        INT64 NOT NULL,
            Name STRING(1024),
            DOC STRING(1024),
            DocEmbedding ARRAY<FLOAT64>
            ) PRIMARY KEY (DocId)"""
        ],
    )

    operation = database_admin_api.create_database(request=request)

    print("Waiting for operation to complete...")
    OPERATION_TIMEOUT_SECONDS = 60
    database = operation.result(OPERATION_TIMEOUT_SECONDS)

    print(
        "Created database {} on instance {}".format(
            database.name,
            database_admin_api.instance_path(spanner_client.project, instance_id),
        )
    )

In [None]:
from google.cloud import spanner

create_database(GCP_PROJECT_ID, SPANNER_INSTANCE_ID, SPANNER_DATABASE_ID)

## Create a Knowledge Graph With LangChain and Gemini

These texts extracted from Wikipedia are about [Larry Page](https://en.wikipedia.org/wiki/Larry_Page), co-founder of Google. These texts will be used to create a knowledge graph about Larry Page as well as embedding vectors for semantic search.

In [None]:
text = """Lawrence Edward Page (born March 26, 1973) is an American businessman and computer scientist best known for co-founding Google with Sergey Brin.
Lawrence Edward Page was chief executive officer of Google from 1997 until August 2001 when he stepped down in favor of Eric Schmidt,
and then again from April 2011 until July 2015 when he became CEO of its newly formed parent organization Alphabet Inc.
He held that post until December 4, 2019, when he and Brin stepped down from all executive positions and day-to-day roles within the company.
He remains an Alphabet board member, employee, and controlling shareholder. Lawrence Edward Page has an estimated net worth of $156 billion as of June 2024,
according to the Bloomberg Billionaires Index, and $145.2 billion according to Forbes, making him the fifth-richest person in the world.
He has also invested in flying car startups Kitty Hawk and Opener. Like his Google co-founder, Sergey Brin, Page attended Montessori schools until he entered high school.
They both cite the educational method of Maria Montessori as the major influence in how they designed Google's work systems.
Maria Montessori believed that the liberty of the child was of utmost importance. In some sense, I feel like music training led to the high-speed legacy of Google for me"""

We will use Gemini and Langchain LLMGraphTransformer to parse the texts and generate a knowledge graph.

Leveraging Gemini's capabilities, Langchain will use them to identify and extract key information from the text, such as people, countries, and their nationalities, to construct a comprehensive knowledge graph from the texts based on the nodes and relationships we define.

In [None]:
documents = [Document(page_content=text)]

llm = VertexAI(model_name=MODEL_NAME, project=GCP_PROJECT_ID, location=REGION)

llm_transformer_filtered = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=["Person", "Country", "Organization", "Asset"],
    allowed_relationships=[
        "NATIONALITY",
        "LOCATED_IN",
        "WORKED_AT",
        "SPOUSE",
        "NET_WORTH",
        "INVESTMENT",
        "INFLUENCED_BY",
    ],
)
graph_documents_filtered = llm_transformer_filtered.convert_to_graph_documents(
    documents
)

## Store the Knowledge Graph in Spanner

Now that the Spanner database is created, we will store the Knowledge Graph in the Spanner Graph Store

In [None]:
from langchain_google_spanner import SpannerGraphStore

graph_store = SpannerGraphStore(
    instance_id=SPANNER_INSTANCE_ID,
    database_id=SPANNER_DATABASE_ID,
    graph_name=SPANNER_GRAPH_NAME,
)

In [None]:
# Uncomment the line below, if you want to cleanup from previous iterations.
# BeWARE - THIS COULD REMOVE DATA FROM YOUR DATABASE !!!
graph_store.cleanup()

for graph_document in graph_documents_filtered:
    graph_store.add_graph_documents([graph_document])

#### Generate the embeddings

We will now generate embeddings for vector search part of the GraphRAG. For more information on how this works, check out [GraphRAG infrastructure using Vertex AI and Spanner Graph](https://cloud.google.com/architecture/gen-ai-graphrag-spanner) paper.

In [None]:
import json
from google import genai
from langchain.text_splitter import RecursiveCharacterTextSplitter

def get_embedding(embtext, client):
    try:
        response = client.models.embed_content(
          model=EMBEDDING_MODEL_NAME, contents=embtext, config=genai.types.EmbedContentConfig(task_type=TASK_TYPE)
        )
        return response.embeddings[0].values
    except:
        return []

client = genai.Client(vertexai=True, project=GCP_PROJECT_ID, location=REGION)
documents = [Document(page_content=text)]
spanner_embedding_values = []
splitter = RecursiveCharacterTextSplitter(
    chunk_size=500, chunk_overlap=100, separators=["\n\n", "\n", r"(?<=\. )", " ", ""]
)
splitted_text = splitter.split_documents(documents)
for chunk in splitted_text:
    chunk_content = chunk.page_content
    embedding = get_embedding(chunk_content, client)
    user_prompt_content = (
        "Find person's names but ignore any pronoun in the following sentence \n"
        + chunk_content
    )
    response = client.models.generate_content(
        model=MODEL_NAME,
        contents=user_prompt_content,
        config=genai.types.GenerateContentConfig(
            temperature=0.1,
            response_mime_type="application/json",
            response_schema={
                "type": "OBJECT",
                "properties": {"nodes": {"type": "STRING"}},
            },
            thinking_config=genai.types.ThinkingConfig(
                thinking_budget=-1
            )
        ),
    )
    response_content = json.loads(response.candidates[0].content.parts[0].text)["nodes"]
    print(embedding[:10], chunk_content[:10], response_content)
    spanner_embedding_values.append([embedding, chunk_content, response_content])

#### Save the embeddings into Spanner

In [None]:
from google.cloud import spanner

spanner_client = spanner.Client(GCP_PROJECT_ID)
instance = spanner_client.instance(SPANNER_INSTANCE_ID)
database = instance.database(SPANNER_DATABASE_ID)

def insert_values(transaction):
    value1 = 0
    for sub_list in spanner_embedding_values:
        table_name = "KgNode"
        col_name1 = "docID"
        col_name2 = "Name"
        col_name3 = "Doc"
        col_name4 = "DocEmbedding"
        value1 += 1
        value2 = sub_list[2]
        value3 = sub_list[1]
        value4 = sub_list[0]
        # print(col_name1, col_name2, col_name3, col_name4, value1, value2, value3, value4[:10])
        row_ct1 = transaction.execute_update(
            f"INSERT INTO {table_name} ({col_name1}, {col_name2}, {col_name3}, {col_name4}) VALUES (@value1, @value2, @value3, @value4)",
            params={
                "value1": value1,
                "value2": value2,
                "value3": value3,
                "value4": value4,
            },
            param_types={
                "value1": spanner.param_types.INT64,
                "value2": spanner.param_types.STRING,
                "value3": spanner.param_types.STRING,
                "value4": spanner.param_types.Array(spanner.param_types.FLOAT64),
            },
        )  # Adjust types if needed

        print(f"{row_ct1} record(s) inserted.")

# print(insert_values)  # This just prints the function object, remove this line
database.run_in_transaction(insert_values)

## Build a MCP server and ADK Q&A agent

Now that the knowledge graph and embeddings are stored in Spanner. We will go ahead and build a MCP server to retrieve the data and return the result back to an ADK Q&A agent.

#### Generate the MCP server code

***Be sure*** to add your project ID in the server code below. For production deployment, these parameters can be set in either enviornment variable or through configuration file. See [Enviornment-Specific Configurations](https://google.github.io/adk-docs/tools/mcp-tools/#cloud-run_1) for more detail.

In [None]:
%%writefile my_mcp_server.py

# my_mcp_server.py - Can be deployed to Cloud Run service using Streamable HTTP
import contextlib
import logging
from collections.abc import AsyncIterator
from typing import Any

import anyio
import click
import mcp.types as types
from mcp.server.lowlevel import Server
from mcp.server.streamable_http_manager import StreamableHTTPSessionManager
from starlette.applications import Starlette
from starlette.routing import Mount
from starlette.types import Receive, Scope, Send

import json
import os
from google.cloud import spanner
from google import genai
from langchain_google_spanner import SpannerGraphQAChain, SpannerGraphStore
from langchain_google_vertexai import ChatVertexAI

logger = logging.getLogger(__name__)

def create_mcp_server():
    """Create and configure the MCP server."""
    app = Server("adk-mcp-streamable-server")

    async def ask_graph(query: str) -> dict:

      GCP_PROJECT_ID = ""
      REGION = "us-central1"
      MODEL_NAME = "gemini-2.5-flash"
      EMBEDDING_MODEL_NAME = "text-embedding-005"
      TASK_TYPE = "SEMANTIC_SIMILARITY"
      SPANNER_INSTANCE_ID = "graphrag-instance"
      SPANNER_DATABASE_ID = "graphrag"
      SPANNER_GRAPH_NAME = "wikigraph"

      graph_store = SpannerGraphStore(
          instance_id=SPANNER_INSTANCE_ID,
          database_id=SPANNER_DATABASE_ID,
          graph_name=SPANNER_GRAPH_NAME,
      )

      # Initialize llm object
      llm = ChatVertexAI(model=MODEL_NAME, temperature=0)

      # Initialize GraphQAChain
      chain = SpannerGraphQAChain.from_llm(
          llm,
          graph=graph_store,
          allow_dangerous_requests=True,
          verbose=False,
          return_intermediate_steps=True,
      )
      client = genai.Client(vertexai=True, project=GCP_PROJECT_ID, location=REGION)
      response = client.models.embed_content(
          model=EMBEDDING_MODEL_NAME, contents=query, config=genai.types.EmbedContentConfig(task_type=TASK_TYPE)
      )
      q_emb = response.embeddings[0].values
      spanner_client = spanner.Client(GCP_PROJECT_ID)
      instance = spanner_client.instance(SPANNER_INSTANCE_ID)
      database = instance.database(SPANNER_DATABASE_ID)
      kgnodename = ""
      with database.snapshot() as snapshot:
          results = snapshot.execute_sql(
              """SELECT DocId, NAME, Doc FROM KgNode ORDER BY COSINE_DISTANCE(DocEmbedding, @q_emb) limit 1""",
              params={"q_emb": q_emb},
              param_types={
                  "q_emb": spanner.param_types.Array(spanner.param_types.FLOAT64)
              },  # Adjust FLOAT64 if needed
          )
          for row in results:
              kgnodename = str(row[1])

      user_prompt_content = (
          "Find and replace entities such as person's name, place, nationality in the following sentence \n"
          + query
          + "with entities defined below \n"
          + kgnodename
          + "\n only replace matching person's name \n output the best replacement in a string"
      )
      response = client.models.generate_content(
          model=MODEL_NAME,
          contents=user_prompt_content,
          config=genai.types.GenerateContentConfig(
              temperature=0.1,
              response_mime_type="application/json",
              response_schema={
                  "type": "OBJECT",
                  "properties": {"sentence": {"type": "STRING"}},
              },
              thinking_config=genai.types.ThinkingConfig(
                thinking_budget=-1
              )
          ),
      )
      response_content = json.loads(response.candidates[0].content.parts[0].text)["sentence"]
      response = chain.invoke("query=" + response_content)
      response["result"]
      return response["result"]

    @app.call_tool()
    async def call_tool(name: str, arguments: dict[str, Any]) -> list[types.ContentBlock]:
        """Handle tool calls from MCP clients."""

        # Example tool implementation - replace with your actual ADK tools
        if name == "ask_graph":
            query = arguments.get("query", "No input provided")
            answer = await ask_graph(query)
            logger.info(answer)
            return [
                types.TextContent(
                    type="text",
                    text=answer
                )
            ]
        else:
            raise ValueError(f"Unknown tool: {name}")

    @app.list_tools()
    async def list_tools() -> list[types.Tool]:
        logger.info("List available tools.")


        return [
            types.Tool(
                name="ask_graph",
                description="Tool to call Knowledge Graph stored in the Spanner database",
                inputSchema={
                    "type": "object",
                    "properties": {
                        "query": {
                            "type": "string",
                            "description": "The query string to process."
                        }
                    },
                    "required": ["query"]
                }
            )
        ]

    return app

def main(port: int = 8081, json_response: bool = False):
    """Main server function."""
    logging.basicConfig(level=logging.INFO)

    app = create_mcp_server()

    # Create session manager with stateless mode for scalability
    session_manager = StreamableHTTPSessionManager(
        app=app,
        event_store=None,
        json_response=json_response,
        stateless=True,  # Important for Cloud Run scalability
    )

    async def handle_streamable_http(scope: Scope, receive: Receive, send: Send) -> None:
        await session_manager.handle_request(scope, receive, send)

    @contextlib.asynccontextmanager
    async def lifespan(app: Starlette) -> AsyncIterator[None]:
        """Manage session manager lifecycle."""
        async with session_manager.run():
            logger.info("MCP Streamable HTTP server started!")
            try:
                yield
            finally:
                logger.info("MCP server shutting down...")

    # Create ASGI application
    starlette_app = Starlette(
        debug=False,  # Set to False for production
        routes=[
            Mount("/mcp", app=handle_streamable_http),
        ],
        lifespan=lifespan,
    )

    import uvicorn
    uvicorn.run(starlette_app, host="0.0.0.0", port=port)

if __name__ == "__main__":
    main()

#### Run the MCP server locally

Start a terminal and navigate to the the folder where this notebook is stored. Run the following commands to setup the enviornment for the MCP server code to run properly:


`gcloud auth application-default login`

`gcloud auth application-default set-quota-project your_project_id`

`gcloud config set project your_project_id`

`uv run my_mcp_server.py`

#### Setup ADK client code

With the MCP server running, we can now setup an Agent with ADK that can get information stored in Spanner Graph through MCP tool calling.

In [None]:
import os
os.environ["GOOGLE_GENAI_USE_VERTEXAI"] = "1"
os.environ["GOOGLE_CLOUD_PROJECT"] = GCP_PROJECT_ID
os.environ["GOOGLE_CLOUD_LOCATION"] = REGION

In [None]:
from google.adk.agents.llm_agent import LlmAgent
from google.adk.tools.mcp_tool.mcp_toolset import (
    MCPToolset,
    StreamableHTTPConnectionParams
)

toolset = MCPToolset(
    connection_params=StreamableHTTPConnectionParams(
        url="http://localhost:8081/mcp",
        headers={'Accept': 'text/event-stream'}
        #add "Authorization": "Bearer your-auth-token" for authroization
    )
)
root_agent = LlmAgent(
    model=MODEL_NAME,
    name="graph_rag_agent",
    description=(
        "Agent to answer questions from a MCP knowledge graph server"
    ),
    instruction=(
        """You are a helpful information retrieval agent that can answer user's query from a MCP
        knowledge graph server provided by the following tool call.
          - If the query can be answered from the graph, then call the ask_graph tool.
          - Always be courteous and dont assume anything.
          - If you dont know the answer, say I dont know the answer."""
    ),
    tools=[toolset]
)

In [None]:
from google.adk.agents import Agent
from google.adk.tools import google_search
from google.genai import types

from google.adk.sessions import InMemorySessionService
from google.adk.runners import Runner

APP_NAME="google_search_agent"
USER_ID="user1234"
SESSION_ID="1234"

async def run_agent():
  try:
    session_service = InMemorySessionService()
    session = await session_service.create_session(app_name=APP_NAME, user_id=USER_ID, session_id=SESSION_ID)
    runner = Runner(agent=root_agent, app_name=APP_NAME, session_service=session_service)

    query="What businesses does Larry Page invest in?"
    content = types.Content(role="user", parts=[types.Part(text=query)])
    async for event in runner.run_async(
        user_id=USER_ID, session_id=SESSION_ID, new_message=content
    ):
        if event.is_final_response():
            final_response = event.content.parts[0].text
            print("Agent Response: ", final_response)
  except Exception as e:
      print(f"A general error occurred: {e}")

await run_agent()

## Clean Up

*   Delete the Spanner instance

In [None]:
!gcloud spanner instances delete {SPANNER_INSTANCE_ID} --quiet

## What's next

* GraphRAG infrastructure for generative AI using [Vertex AI and Spanner Graph](https://cloud.google.com/architecture/gen-ai-graphrag-spanner).
* Dive deeper into [LangChain with Spanner](https://github.com/googleapis/langchain-google-spanner-python/tree/main).
* Learn more about [Spanner](https://cloud.google.com/spanner/docs/getting-started/python).
* Explore other [Spanner Graph Notebooks](https://github.com/cloudspannerecosystem/spanner-graph-notebook/blob/main/README.md).
* Learn more about [Agent Development Kit](https://google.github.io/adk-docs/).