In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Build with RAG Engine in Vertex AI

| | |
|-|-|
| Author(s) | [Laxmi Harikumar](https://github.com/laxmih-genai) |

## Overview

**Retrieval Augmented Generation (RAG)** improves large language models by allowing them to access and process external information sources during generation. This ensures the model's responses are grounded in factual data and avoids hallucinations.

RAG Engine API



For more information, refer to the public documentation for [Vertex AI RAG Engine](https://cloud.google.com/vertex-ai/generative-ai/docs/rag-overview).

This notebook aims at providing a hands on tutorial for RAG Engine API with the following steps.

- Initialize Vertex AI Vector Search as the vector database
- Create a RAG corpus by specifying an embedding model and vector database
- Upload a local PDF file to the corpus
- Import Alphabet PDFs from TODO
- Import a scanned PDF
- Set up a retrieval tool
- Use your RAG retrieval tool to add context to  Gemini's responses to user queries


## Get started

### Install Vertex AI SDK and other required packages


In [None]:
%pip install --upgrade --user --quiet google-cloud-aiplatform

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.5/6.5 MB[0m [31m55.9 MB/s[0m eta [36m0:00:00[0m
[0m

### Restart runtime

To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which restarts the current kernel.

The restart might take a minute or longer. After it's restarted, continue to the next step.

In [None]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

<div class="alert alert-block alert-warning">
<b>⚠️ The kernel is going to restart. Wait until it's finished before continuing to the next step. ⚠️</b>
</div>


### Authenticate your notebook environment (Colab only)

If you're running this notebook on Google Colab, run the cell below to authenticate your environment.

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()



### Set Google Cloud project information and initialize Vertex AI SDK

To get started using Vertex AI, you must have an existing Google Cloud project and [enable the Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com).

Learn more about [setting up a project and a development environment](https://cloud.google.com/vertex-ai/docs/start/cloud-environment).

In [27]:
# Use the environment variable if the user doesn't provide Project ID.
import os

import vertexai

PROJECT_ID = "demos-vertex"  # @param {type:"string", isTemplate: true}
if PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

vertexai.init(project=PROJECT_ID, location=LOCATION)

### Import libraries

In [28]:
from IPython.display import Markdown
from vertexai.preview import rag
from vertexai.preview.generative_models import GenerativeModel, Tool

### Create a RAG Corpus

Configure the Embedding model

In [65]:
EMBEDDING_MODEL = "text-embedding-004"  # @param {type:"string", isTemplate: true}
embedding_model_config = rag.EmbeddingModelConfig(publisher_model=f"""publishers/google/models/{EMBEDDING_MODEL}""")

In [66]:
embedding_model_config

EmbeddingModelConfig(publisher_model='publishers/google/models/text-embedding-004', endpoint=None, model=None, model_version_id=None)

In [67]:
CORPUS_DISPLAY_NAME = "rag-corpus-for-demo"

In [68]:
rag_corpus = rag.create_corpus(
    display_name=CORPUS_DISPLAY_NAME,
    embedding_model_config=embedding_model_config
)

### Check the corpus just created

In [69]:
rag.list_corpora()

ListRagCorporaPager<rag_corpora {
  name: "projects/demos-vertex/locations/us-central1/ragCorpora/6917529027641081856"
  display_name: "rag-corpus-for-demo"
  create_time {
    seconds: 1733395040
    nanos: 536560000
  }
  update_time {
    seconds: 1733395040
    nanos: 536560000
  }
  rag_embedding_model_config {
    vertex_prediction_endpoint {
      endpoint: "projects/demos-vertex/locations/us-central1/publishers/google/models/text-embedding-004"
    }
  }
  rag_vector_db_config {
    rag_managed_db {
    }
    rag_embedding_model_config {
      vertex_prediction_endpoint {
        endpoint: "projects/demos-vertex/locations/us-central1/publishers/google/models/text-embedding-004"
      }
    }
  }
  corpus_status {
    state: ACTIVE
  }
  vector_db_config {
    rag_managed_db {
    }
    rag_embedding_model_config {
      vertex_prediction_endpoint {
        endpoint: "projects/demos-vertex/locations/us-central1/publishers/google/models/text-embedding-004"
      }
    }
  }
}
rag

### Upload a local file to the corpus

In [70]:
rag_file = rag.upload_file(
    corpus_name=rag_corpus.name,
    path="/content/contents/veo-imagen-blog.pdf",
    display_name="veo-imagen-blog.pdf",
    description="Veo and Imagen3 announcement",
)

### Import a scanned PDF file to the corpus

Make sure to set the advanced parsing option

In [72]:
INPUT_GCS_BUCKET = (
    "gs://rag-agent-demo/"
)

response = rag.import_files(
    corpus_name=rag_corpus.name,
    paths=[INPUT_GCS_BUCKET],
    use_advanced_pdf_parsing=True,
    chunk_size=1024,  # Optional
    chunk_overlap=100,  # Optional
    max_embedding_requests_per_min=900,  # Optional
)


In [73]:
list(rag.list_files(corpus_name=rag_corpus.name))

[name: "projects/756696270058/locations/us-central1/ragCorpora/5188146770730811392/ragFiles/5317619296936551822"
 display_name: "veo-imagen-blog.pdf"
 description: "Veo and Imagen3 announcement"
 create_time {
   seconds: 1733421246
   nanos: 339103000
 }
 update_time {
   seconds: 1733421246
   nanos: 339103000
 }
 direct_upload_source {
 }
 file_status {
   state: ACTIVE
 },
 name: "projects/756696270058/locations/us-central1/ragCorpora/5188146770730811392/ragFiles/5317619445781233038"
 display_name: "contract_1.pdf"
 create_time {
   seconds: 1733421264
   nanos: 80022000
 }
 update_time {
   seconds: 1733421264
   nanos: 80022000
 }
 gcs_source {
   uris: "gs://rag-agent-demo/contract_1.pdf"
 }
 file_status {
   state: ACTIVE
 }]

### Import files from Google Cloud Storage

For this demo, we'll use a public GCS bucket containing earning reports from Alphabet.

In [77]:
INPUT_GCS_BUCKET = (
    "gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/"
)

response = rag.import_files(
    corpus_name=rag_corpus.name,
    paths=[INPUT_GCS_BUCKET],
    chunk_size=1024,  # Optional
    chunk_overlap=100,  # Optional
    max_embedding_requests_per_min=900,  # Optional
)

In [79]:
# List the files
# list(rag.list_files(corpus_name=rag_corpus.name))

### Optional: Perform direct context retrieval

### Create RAG Retrieval Tool

In [74]:
# Create a tool for the RAG Corpus
rag_retrieval_tool = Tool.from_retrieval(
    retrieval=rag.Retrieval(
        source=rag.VertexRagStore(
            rag_corpora=[rag_corpus.name],
            similarity_top_k=2,
            vector_distance_threshold=0.5,
        ),
    )
)

### Generate Content with Gemini using Rag Retrieval Tool

In [75]:
# Load tool into Gemini model
rag_gemini_model = GenerativeModel(
    "gemini-1.5-flash-002",  # your self-deployed endpoint
    tools=[rag_retrieval_tool]
)

Question from the uploaded file

In [55]:
response = rag_gemini_model.generate_content("What is Google's video generation model?")

display(Markdown(response.text))

Google's video generation model is called Veo.  It's described as their most advanced video generation model, capable of generating high-quality, high-definition videos from text or image prompts in a variety of styles.  It's currently available in private preview on Vertex AI.


Question from the Alphabet docs in Google GCS Bucket

In [59]:
response = rag_gemini_model.generate_content("What is the total assets and total liabilities as of December 31 2021?")
display(Markdown(response.text))

As of December 31, 2021, Alphabet Inc. had total assets of $359,268 million and total liabilities of $107,633 million.


Question from the scanned PDF

In [78]:
response = rag_gemini_model.generate_content("What is the price per unit of the office supplies")
display(Markdown(response.text))

The price per unit of the office supplies is $15.00.
