In [None]:
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# RAG Engine 簡介：使用 Vertex AI 建立可擴展且模組化的 RAG 系統

<table align="left">
  <td style="text-align: center">
    <a href="https://colab.research.google.com/github/GoogleCloudPlatform/generative-ai/blob/main/gemini/rag-engine/intro_rag_engine.ipynb">
      <img width="32px" src="https://www.gstatic.com/pantheon/images/bigquery/welcome_page/colab-logo.svg" alt="Google Colaboratory logo"><br> 在 Colab 中開啟
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/colab/import/https%3A%2F%2Fraw.githubusercontent.com%2Fmosil%2Fgenerative-ai-colab-tw%2Frefs%2Fheads%2Fmain%2Fgemini%2Fintro_rag_engine.ipynb">
      <img width="32px" src="https://lh3.googleusercontent.com/JmcxdQi-qOpctIvWKgPtrzZdJJK-J3sWE1RsfjZNwshCFgE_9fULcNpuXYTilIR2hjwN" alt="Google Cloud Colab Enterprise logo"><br> 在 Colab Enterprise 中開啟
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://console.cloud.google.com/vertex-ai/workbench/deploy-notebook?download_url=https%3A%2F%2Fraw.githubusercontent.com%2Fmosil%2Fgenerative-ai-colab-tw%2Frefs%2Fheads%2Fmain%2Fgemini%2Fintro_rag_engine.ipynb">
      <img src="https://www.gstatic.com/images/branding/gcpiconscolors/vertexai/v1/32px.svg" alt="Vertex AI logo"><br> 在 Vertex AI Workbench 中開啟
    </a>
  </td>
  <td style="text-align: center">
    <a href="https://github.com/mosil/generative-ai-colab-tw/blob/main/gemini/intro_rag_engine.ipynb">
      <img width="32px" src="https:///www.svgrepo.com/download/217753/github.svg" alt="GitHub logo"><br> 在 GitHub 上檢視
    </a>
  </td>
</table>

<div style="clear: both;"></div>

<b>分享至：</b>

<a href="https://www.linkedin.com/sharing/share-offsite/?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/rag-engine/intro_rag_engine.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/8/81/LinkedIn_icon.svg" alt="LinkedIn logo">
</a>

<a href="https://bsky.app/intent/compose?text=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/rag-engine/intro_rag_engine.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/7/7a/Bluesky_Logo.svg" alt="Bluesky logo">
</a>

<a href="https://twitter.com/intent/tweet?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/rag-engine/intro_rag_engine.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/5a/X_icon_2.svg" alt="X logo">
</a>

<a href="https://reddit.com/submit?url=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/rag-engine/intro_rag_engine.ipynb" target="_blank">
  <img width="20px" src="https://redditinc.com/hubfs/Reddit%20Inc/Brand/Reddit_Logo.png" alt="Reddit logo">
</a>

<a href="https://www.facebook.com/sharer/sharer.php?u=https%3A//github.com/GoogleCloudPlatform/generative-ai/blob/main/gemini/rag-engine/intro_rag_engine.ipynb" target="_blank">
  <img width="20px" src="https://upload.wikimedia.org/wikipedia/commons/5/51/Facebook_f_logo_%282019%29.svg" alt="Facebook logo">
</a>            

| | |
|-|-|
| 作者 | [Holt Skinner](https://github.com/holtskinner) |
| 翻譯 | [Ahdaa](https://github.com/mosil) |

## 總覽

檢索擴增生成 (RAG) 透過允許大型語言模型 (LLM) 在生成期間存取和處理外部資訊來源來改善它們。這確保了模型的回答基於事實數據，並避免產生幻覺。

LLM 的一個常見問題是它們不理解私有知識，也就是說，您組織的資料。使用 RAG Engine，您可以豐富 LLM 的上下文，其中包含額外的私人資訊，因為該模型可以減少幻覺並更準確地回答問題。

透過將額外的知識來源與 LLM 已有的知識相結合，可以提供更好的上下文。改進的上下文以及查詢增強了 LLM 回應的品質。

以下概念是理解 Vertex AI RAG Engine 的關鍵。這些概念按檢索擴增生成 (RAG) 流程的順序排列。

1. **資料擷取**：從不同的資料來源擷取資料。例如，本地檔案、Google Cloud Storage 和 Google Drive。

2. **資料轉換**：預備索引的資料轉換。例如，資料被分割成語塊（chunks）。

3. **Embedding**：將單字或文字片段轉換成數值化表示。這些數字捕捉了文字的語義以及上下文特徵。相似或是關聯度高的文字或是單字往往擁有相似的 Embedding 特徵，這意著它們在高維度的向量空間中相當地接近。

4. **資料索引**：RAG Engine 建立一個稱為語料庫（corpus）的索引。這些索引建構而成知識庫，讓搜尋得以最佳化。就像是在一本大型參數書中，其詳細目錄的索引。

5. **檢索**：當使用者提出問題或提供 prompt 時，RAG Engine 中的檢索元件會從知識庫中，找出和使用者提出的查詢有關之資訊。

6. **生成**：檢索到的資訊，是一個會被加在使用者原始查詢的上下文中，它作為指南，讓生成式 AI 模型在可以產生基於事實且相關回應。

有關更多資訊，請參閱 [Vertex AI RAG Engine](https://cloud.google.com/vertex-ai/generative-ai/docs/rag-overview) 的官方文件。

## 開始使用

### 安裝 Vertex AI SDK 和 Google Gen AI SDK


In [None]:
%pip install --upgrade --quiet google-cloud-aiplatform google-genai

### 重新啟動執行階段

若要在這個 Jupyter 執行階段中使用新安裝的套件，您必須重新啟動執行階段。您可以透過執行下方的儲存格來執行此動作，這個儲存格會重新啟動目前的 kernel。

重新啟動可能需要一分鐘或更長時間。重新啟動後，請繼續進行下一步。

In [None]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

<div class="alert alert-block alert-warning">
<b>⚠️ 核心將會重新啟動。請等待它完成後再繼續下一步。⚠️</b>
</div>


### 驗證您的 notebook 環境（僅限 Colab）

如果您正在 Google Colab 上執行此 notebook，請執行下方的儲存格來驗證您的環境。

In [None]:
import sys

if "google.colab" in sys.modules:
    from google.colab import auth

    auth.authenticate_user()

### 設定 Google Cloud 專案資訊並初始化 Vertex AI SDK

若要開始使用 Vertex AI，您必須擁有現有的 Google Cloud 專案並[啟用 Vertex AI API](https://console.cloud.google.com/flows/enableapi?apiid=aiplatform.googleapis.com)。

瞭解更多關於[設定專案和開發環境](https://cloud.google.com/vertex-ai/docs/start/cloud-environment)的資訊。

In [None]:
# 如果使用者未提供專案 ID，請使用環境變數。
import os

from google import genai
import vertexai

PROJECT_ID = "[your-project-id]"  # @param {type: "string", placeholder: "[your-project-id]", isTemplate: true}
if not PROJECT_ID or PROJECT_ID == "[your-project-id]":
    PROJECT_ID = str(os.environ.get("GOOGLE_CLOUD_PROJECT"))

LOCATION = os.environ.get("GOOGLE_CLOUD_REGION", "us-central1")

vertexai.init(project=PROJECT_ID, location=LOCATION)
client = genai.Client(vertexai=True, project=PROJECT_ID, location=LOCATION)

### 匯入函式庫

In [None]:
from IPython.display import Markdown, display
from google.genai.types import GenerateContentConfig, Retrieval, Tool, VertexRagStore
from vertexai import rag

### 建立 RAG Corpus

In [None]:
# 目前支援 Google 第一方嵌入模型
EMBEDDING_MODEL = "publishers/google/models/text-embedding-004"  # @param {type:"string", isTemplate: true}

rag_corpus = rag.create_corpus(
    display_name="my-rag-corpus",
    backend_config=rag.RagVectorDbConfig(
        rag_embedding_model_config=rag.RagEmbeddingModelConfig(
            vertex_prediction_endpoint=rag.VertexPredictionEndpoint(
                publisher_model=EMBEDDING_MODEL
            )
        )
    ),
)

### 檢查剛建立的語料庫

In [None]:
rag.list_corpora()

### 上傳本機檔案到語料庫

In [None]:
%%writefile test.md

Retrieval-Augmented Generation (RAG) is a technique that enhances the capabilities of large language models (LLMs) by allowing them to access and incorporate external data sources when generating responses. Here's a breakdown:

**What it is:**

* **Combining Retrieval and Generation:**
    * RAG combines the strengths of information retrieval systems (like search engines) with the generative power of LLMs.
    * It enables LLMs to go beyond their pre-trained data and access up-to-date and specific information.
* **How it works:**
    * When a user asks a question, the RAG system first retrieves relevant information from external data sources (e.g., databases, documents, web pages).
    * This retrieved information is then provided to the LLM as additional context.
    * The LLM uses this augmented context to generate a more accurate and informative response.

**Why it's helpful:**

* **Access to Up-to-Date Information:**
    * LLMs are trained on static datasets, so their knowledge can become outdated. RAG allows them to access real-time or frequently updated information.
* **Improved Accuracy and Factual Grounding:**
    * RAG reduces the risk of LLM "hallucinations" (generating false or misleading information) by grounding responses in verified external data.
* **Enhanced Contextual Relevance:**
    * By providing relevant context, RAG enables LLMs to generate more precise and tailored responses to specific queries.
* **Increased Trust and Transparency:**
    * RAG can provide source citations, allowing users to verify the information and increasing trust in the LLM's responses.
* **Cost Efficiency:**
    * Rather than constantly retraining large language models, RAG allows for the introduction of new data in a more cost effective way.

In essence, RAG bridges the gap between the vast knowledge of LLMs and the need for accurate, current, and contextually relevant information.


In [None]:
rag_file = rag.upload_file(
    corpus_name=rag_corpus.name,
    path="test.md",
    display_name="test.md",
    description="my test file",
)

### 從 Google Cloud Storage 匯入檔案

請記得為您的 Google Cloud Storage bucket，授予「檢視者」存取權給「Vertex RAG 資料服務代理程式」（格式為 `service-{project_number}@gcp-sa-vertex-rag.iam.gserviceaccount.com`）。

在本範例中，我們會使用包含 Alphabet 收益報告的公開 GCS bucket。

In [None]:
INPUT_GCS_BUCKET = (
    "gs://cloud-samples-data/gen-app-builder/search/alphabet-investor-pdfs/"
)

response = rag.import_files(
    corpus_name=rag_corpus.name,
    paths=[INPUT_GCS_BUCKET],
    # Optional
    transformation_config=rag.TransformationConfig(
        chunking_config=rag.ChunkingConfig(chunk_size=1024, chunk_overlap=100)
    ),
    max_embedding_requests_per_min=900,  # Optional
)

### 從 Google Drive 匯入檔案

符合資格的路徑可以格式化為：

- `https://drive.google.com/drive/folders/{folder_id}`
- `https://drive.google.com/file/d/{file_id}`。

請記得為您的 Drive 資料夾/檔案，授予「檢視者」存取權給「Vertex RAG 資料服務代理程式」（格式為 `service-{project_number}@gcp-sa-vertex-rag.iam.gserviceaccount.com`）。

In [None]:
response = rag.import_files(
    corpus_name=rag_corpus.name,
    paths=["https://drive.google.com/drive/folders/{folder_id}"],
    # Optional
    transformation_config=rag.TransformationConfig(
        chunking_config=rag.ChunkingConfig(chunk_size=512, chunk_overlap=50)
    ),
)

### 可選：執行直接上下文檢索

In [None]:
# 直接上下文檢索
response = rag.retrieval_query(
    rag_resources=[
        rag.RagResource(
            rag_corpus=rag_corpus.name,
            # Optional: supply IDs from `rag.list_files()`.
            # rag_file_ids=["rag-file-1", "rag-file-2", ...],
        )
    ],
    rag_retrieval_config=rag.RagRetrievalConfig(
        top_k=10,  # Optional
        filter=rag.Filter(
            vector_distance_threshold=0.5,  # Optional
        ),
    ),
    text="什麼是 RAG 以及它為何有用？",
)
print(response)

# Optional: The retrieved context can be passed to any SDK or model generation API to generate final results.
# context = " ".join([context.text for context in response.contexts.contexts]).replace("\n", "")

### 建立 RAG 檢索工具

In [None]:
# 為 RAG Corpus 建立工具
rag_retrieval_tool = Tool(
    retrieval=Retrieval(
        vertex_rag_store=VertexRagStore(
            rag_corpora=[rag_corpus.name],
            similarity_top_k=10,
            vector_distance_threshold=0.5,
        )
    )
)

### 使用 RAG 檢索工具，透過 Gemini 生成內容

In [None]:
MODEL_ID = "gemini-2.0-flash-001"

In [None]:
response = client.models.generate_content(
    model=MODEL_ID,
    contents="什麼是 RAG？",
    config=GenerateContentConfig(tools=[rag_retrieval_tool]),
)

display(Markdown(response.text))

### 使用 RAG 檢索工具，透過 Llama3 生成內容

In [None]:
from vertexai import generative_models

# 將工具載入 Llama 模型
rag_retrieval_tool = generative_models.Tool.from_retrieval(
    retrieval=rag.Retrieval(
        source=rag.VertexRagStore(
            rag_resources=[rag.RagResource(rag_corpus=rag_corpus.name)],
            rag_retrieval_config=rag.RagRetrievalConfig(
                top_k=10,  # Optional
                filter=rag.Filter(
                    vector_distance_threshold=0.5,  # Optional
                ),
            ),
        ),
    )
)

llama_model = generative_models.GenerativeModel(
    # 您的 Llama3 自行部署端點
    "projects/{project}/locations/{location}/endpoints/{endpoint_resource_id}",
    tools=[rag_retrieval_tool],
)

In [None]:
response = llama_model.generate_content("什麼是 RAG？")

display(Markdown(response.text))