# Vertex SDK LLM Usage

**Comprehensive Tutorial for using Vertex GenAI SDK for Gemini**

# Authenticating

In [None]:
from google.colab import auth as google_auth
google_auth.authenticate_user()

# Installing the package

In [1]:
!pip install google-cloud-aiplatform --upgrade --user
!pip install langchain langchain-core langchain-google-vertexai
!pip install faiss-cpu

Collecting langchain
  Downloading langchain-0.1.14-py3-none-any.whl (812 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m812.8/812.8 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core
  Downloading langchain_core-0.1.40-py3-none-any.whl (276 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m276.8/276.8 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-google-vertexai
  Downloading langchain_google_vertexai-0.1.2-py3-none-any.whl (47 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.8/47.8 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.30 (from langchain)
  Downloading langchain_community-0.0.31-py3-none-any.whl (1.9 

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.0/27.0 MB[0m [31m44.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: faiss-cpu
Successfully installed faiss-cpu-1.8.0


#### ! ^^^^ Do not forget to click the "Restart runtime" button above.

# Authenticating again

In [1]:
from google.colab import auth as google_auth
google_auth.authenticate_user()

In [2]:
PROJECT_ID = "<YOUR PROJECT ID>"  # @param {type:"string"}
LOCATION = "us-central1"  # @param {type:"string"}

In [3]:
from google.cloud import aiplatform
# from vertexai.preview.language_models import TextGenerationModel, ChatModel
from vertexai.generative_models import GenerativeModel, Part, ChatSession, Image, FunctionDeclaration, Tool, Content
from langchain_google_vertexai import VertexAI
import http.client
import typing
import urllib.request

from google.cloud import aiplatform, storage
from vertexai.preview.language_models import TextGenerationModel, ChatModel, TextEmbeddingModel
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import pprint
import re
import io

from langchain.llms import VertexAI
from langchain import hub
from langchain.text_splitter import TextSplitter, CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.vectorstores import FAISS
from langchain.document_loaders import TextLoader, GCSFileLoader
from langchain.embeddings import VertexAIEmbeddings
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.summarize import load_summarize_chain
from langchain.chains.mapreduce import MapReduceChain
from langchain.chains import ReduceDocumentsChain, MapReduceDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

aiplatform.init(project=PROJECT_ID, location=LOCATION)

# Text generation ( no stream )

In [4]:
from google.cloud import aiplatform
# from vertexai.preview.language_models import TextGenerationModel, ChatModel
from vertexai.generative_models import GenerativeModel, Part, ChatSession, Image, FunctionDeclaration, Tool, Content

aiplatform.init(project=PROJECT_ID, location=LOCATION)

In [5]:
model = GenerativeModel("gemini-1.0-pro")

response = model.generate_content(
    "What do you know about the universe?",
    # Optional:
    generation_config={
        'temperature': 0,
        'top_p': 0,
        'max_output_tokens': 1000
    }
)

response.text

'**Origin and Evolution:**\n\n* The universe is estimated to be 13.8 billion years old.\n* It originated from a singularity, an infinitely dense and hot point.\n* It expanded rapidly in the Big Bang, forming the first atoms and galaxies.\n* Over time, galaxies clustered together to form larger structures, such as superclusters and filaments.\n\n**Structure and Composition:**\n\n* The universe is vast and mostly empty space.\n* It is composed of approximately 68% dark energy, 27% dark matter, and 5% ordinary matter.\n* Dark energy is a mysterious force that causes the expansion of the universe to accelerate.\n* Dark matter is an invisible substance that interacts only through gravity.\n* Ordinary matter includes stars, planets, galaxies, and all living things.\n\n**Galaxies:**\n\n* Galaxies are vast collections of stars, gas, and dust.\n* They come in various shapes and sizes, including spirals, ellipticals, and irregulars.\n* Our galaxy, the Milky Way, is a spiral galaxy with an estima

# Text generation ( streaming )

In [8]:
model = GenerativeModel("gemini-1.0-pro")

response = model.generate_content(
    "What do you know about the universe?",
    stream = True,
    # Optional:
    generation_config={
        'temperature': 0,
        'max_output_tokens': 1000
    }
)

for resp in response:
  print(resp.text)

**Origin and Evolution
:**

* The universe is estimated to be 13.8 billion years old.
* It originated from a singularity, an infinitely dense and hot point.

* It expanded rapidly in the Big Bang, forming the first atoms and galaxies.
* Over time, galaxies clustered together to form larger structures, such as
 superclusters and filaments.

**Structure and Composition:**

* The universe is vast and mostly empty space.
* It is composed of approximately 68% dark energy, 27% dark matter, and 5% ordinary matter.
* Dark energy is a mysterious force that
 causes the expansion of the universe to accelerate.
* Dark matter is an invisible substance that interacts only through gravity.
* Ordinary matter includes stars, planets, galaxies, and all living things.

**Galaxies:**

* Galaxies are vast collections
 of stars, gas, and dust.
* They come in various shapes and sizes, including spirals, ellipticals, and irregulars.
* Our galaxy, the Milky Way, is a spiral galaxy with an estimated 100-400 bil

# Chat

In [None]:
model = GenerativeModel("gemini-1.0-pro")
chat = model.start_chat()

parameters = {
    "temperature": 0.2,
    "max_output_tokens": 256,
    "top_p": 0.95,
    "top_k": 40,
    }

def send_msg(msg):
  return chat.send_message(msg, stream=False).text

prompt = "Hello my name is hasan"
send_msg(prompt)

In [None]:
send_msg("so what is your name")

In [None]:
send_msg("what can you do for me ?")

# Multimodal prompt

Send images / files in prompt along with text

In [None]:
# create helper function
def load_image_from_url(image_url: str) -> Image:
    with urllib.request.urlopen(image_url) as response:
        response = typing.cast(http.client.HTTPResponse, response)
        image_bytes = response.read()
    return Image.from_bytes(image_bytes)

# Load images from Cloud Storage URI
landmark1 = load_image_from_url(
    "https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark1.png"
)
landmark2 = load_image_from_url(
    "https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark2.png"
)
landmark3 = load_image_from_url(
    "https://storage.googleapis.com/cloud-samples-data/vertex-ai/llm/prompts/landmark3.png"
)

In [None]:
# Pass multimodal prompt
model = GenerativeModel("gemini-1.0-pro-vision")
response = model.generate_content(
    [
        landmark1,
        "city: Rome, Landmark: the Colosseum",
        landmark2,
        "city: Beijing, Landmark: Forbidden City",
        landmark3,
    ]
)
print(response.text)

# Function calling

Create a FC prompt for API calling
- The model doesn't actually call the API
- The model only parses text to return parameters to call an external API

In [None]:
prompt = "What is the distance between Warsaw and Krakow"

In [None]:
# Initialize Gemini model
model = GenerativeModel("gemini-1.0-pro")

# Specify a function declaration and parameters for an API request
get_distance_details_func = FunctionDeclaration(
    name="get_distance_between_locations",
    description="Get the route details from Google Maps and then calculate distance",
    # Function parameters are specified in OpenAPI JSON schema format
    parameters={
        "type": "object",
        "properties": {
                        "src_location": {"type": "string", "description": "Source Location"},
                        "tgt_location": {"type": "string", "description": "Target Location"}
                      },
    },
)

# Define a tool that includes the above get_current_weather_func
distance_tool = Tool(
    function_declarations=[get_distance_details_func],
)

In [None]:
# Define the user's prompt in a Content object that we can reuse in model calls
user_prompt_content = Content(
    role="user",
    parts=[
        Part.from_text(prompt),
    ],
)

# Send the prompt and instruct the model to generate content using the Tool that you just created
response = model.generate_content(
    user_prompt_content,
    generation_config={"temperature": 0},
    tools=[distance_tool],
)
response_function_call_content = response.candidates[0].content

In [None]:
# Parse response to get parameters parsed by model from the prompt
src_location = response_function_call_content.parts[0].function_call.args["src_location"]
tgt_location = response_function_call_content.parts[0].function_call.args["tgt_location"]

src_location, tgt_location

# Langchain integration

Text: QnA over texts with Refine chain

In [None]:
# Initialize Gemini model
model = VertexAI( max_output_tokens=2048, model="gemini-1.0-pro", top_p = 1, temperature = 0)
embeddings = VertexAIEmbeddings()

def chunk_text(input_texts, separator="", chunk_size=100, overlap=20):
  ## Create split of texts on a chunk
  text_splitter = CharacterTextSplitter(separator="", chunk_size=chunk_size, chunk_overlap=overlap)
  chunks = text_splitter.create_documents(input_texts)

  return chunks

def create_vector_store(chunks):
  ## Create vector store from embeddings
  db = FAISS.from_documents(chunks, embeddings)

  return db

# def query_vector_store(query, db, top_n_docs=5):
#   docs = db.similarity_search(query, k=top_n_docs)

#   return docs

def query_vector_store(query, db, top_n_docs, multiquery):
  if multiquery:
    retriever_from_llm = MultiQueryRetriever.from_llm(
      retriever=db.as_retriever(search_kwargs={"k": top_n_docs}), llm=model
    )
    docs = retriever_from_llm.get_relevant_documents(query=query)
  else:
    retriever = db.as_retriever(search_kwargs={"k": top_n_docs})
    docs = retriever.get_relevant_documents(query)

  return docs

def create_with_refine(input_chunks, title):
  prompt_template = """Write a content with the following:
  {text}
  CONCISE SUMMARY:"""
  prompt = PromptTemplate.from_template(prompt_template)

  refine_template = (
      """
      You are a QnA bot meant to answer questions ONLY from the contexts provided below and NO PRIOR knowledge

      Question:
      -----------
      {query}
      -----------

      We have provided a response version of the response up to a certain point: {existing_answer}
      You have the opportunity to refine the existing response response with below contexts
      (only if needed) with some more context as specified below.

      Rules:
      ------------
      Given the new context and the query, refine the response further as necessary for answering the query but
      never include the context as is in the response, you should ONLY and ONLY use the knowledge provided in contexts.
      DONT add any information which is not present in contexts.
      Make sure that the content is always relevant to the original query.
      If the context provided isn't useful, dont make any change to the response, just return the current response.
      ------------

      Context:
      ------------
      {text}
      ------------
      """
  ).replace('{query}', title)

  refine_prompt = PromptTemplate.from_template(refine_template)
  chain = load_summarize_chain(
      llm=model,
      chain_type="refine",
      question_prompt=prompt,
      refine_prompt=refine_prompt,
      return_intermediate_steps=True,
      input_key="input_documents",
      output_key="output_text"
  )
  result = chain({"input_documents": input_chunks}, return_only_outputs=True)

  return result['intermediate_steps'], result['output_text']

def run_query(query, db, top_n_docs=5, multiquery=False):
  docs = query_vector_store(query, db, top_n_docs, multiquery)

  ## Create article using top N docs
  intermediate_steps, final_text = create_with_refine(docs, query)

  return intermediate_steps, final_text

In [None]:
top_n_docs = 15 # @param {type:"integer"} ## Number of similar chunks used to create content

message = "What are some of the pros and cons of Python as a programming language?"
model.invoke(message)

In [None]:
document = ["""
Information retrieval using AI and LLMs
Vertex AI Search brings together the power of deep information retrieval, state-of-the-art natural language processing, and the latest in large language model (LLM) processing to understand user intent and return the most relevant results for the user.

With Vertex AI Search, you can build a Google-quality search app on your own data and embed a search bar in your web pages or app.

With Recommendations, you can build a recommendations app on your own data that will suggest content similar to the content that the user is currently viewing.

Note: The generic recommendations feature is a Preview offering, subject to the "Pre-GA Offerings Terms" of the GCP Service Specific Terms. Pre-GA products and features may have limited support, and changes to pre-GA products and features may not be compatible with other pre-GA versions. For more information, see the launch stage descriptions. Further, by using this feature, you agree to the Generative AI Preview terms and conditions ("Preview Terms"). For this feature, you can process personal data as outlined in the Cloud Data Processing Addendum, subject to applicable restrictions and obligations in the Agreement (as defined in the Preview Terms).
An easy experience to get started
Vertex AI Search makes it easy to get started with high-quality search or recommendations based on data that you provide. As part of the setup experience, you can:

Use your existing Google Account or sign up for one.
Use your existing Google Cloud project or create one.
Create an app and attach a data store to it. Provide data to search or recommend by entering the URLs for your website content, importing your data from BigQuery or Cloud Storage, or importing FHIR R4 data from Cloud Healthcare API, or uploading through RESTful CRUD APIs. Syncing data from Jira, Salesforce, or Confluence is available in Preview with allowlist.
Embed JavaScript widgets and API samples to integrate search or recommendations into your website or applications.
Data stores and apps
With Vertex AI Search, you create a search or recommendations app and attach it to a data store. You import your data into a data store and index your data. Apps and data stores have a one-to-one relationship.

There are various kinds of data stores that you can create, based on the type of data you use. Each data store can contain one type of data:

Website data: You can provide domains such as yourexamplewebsite.com/faq and yourexamplewebsite.com/events and enable search or recommendations over the content at those domains.
Structured data: A data store with structured data enables semantic search or recommendations over structured data such as a BigQuery table or NDJSON files. For example, you can enable search or recommendations over a product catalog for your ecommerce experience, a movie catalog for movie search or recommendations, or a directory of doctors for provider search or recommendations.
Unstructured data: An unstructured data store enables semantic search or recommendations over data such as documents and images. For example, a financial institution can enable search or recommendations over their private corpus of financial research publications, or a biotech company can enable search or recommendations over their private repository of medical research.
Healthcare data: A healthcare data store enables semantic search over healthcare FHIR R4 data imported from Cloud Healthcare API. For example, a healthcare provider can search over a patient's clinical history using exploratory queries.
"""]

In [None]:
## Create chunks from array of texts
separator = " "
chunks = chunk_text(document, separator, chunk_size=150, overlap=20)
len(chunks)

In [None]:
## Create vector store from chunks
db = create_vector_store(chunks)

In [None]:
query = "What type of data can data stores contain in Vertex AI Search ? Give an elaborated answer" # @param {type:"string"} ## Query

## Create response using top N docs
intermediate_steps, final_text = run_query(query, db, top_n_docs, multiquery=False)

print(final_text)

In [None]:
# query = "What type of data can data stores contain in Vertex AI Search ? Give an elaborated answer" # @param {type:"string"} ## Query

# ## Create response using top N docs
# intermediate_steps, final_text = run_query(query, db, top_n_docs, multiquery=True)

# print(final_text)

In [None]:
query = "What does Vertex search support in terms of Healthcare data ?" # @param {type:"string"} ## Query

## Create response using top N docs
intermediate_steps, final_text = run_query(query, db, top_n_docs)

print(final_text)

In [None]:
query = "Which all services are currently in preview in Vertex Search ?" # @param {type:"string"} ## Query

## Create response using top N docs
intermediate_steps, final_text = run_query(query, db, top_n_docs)

print(final_text)

# Langchain integration

Text: **Summarize** texts with Refine chain

In [None]:
# Initialize Gemini model
model = VertexAI( max_output_tokens=2048, model="gemini-1.0-pro", top_p = 1, temperature = 0)
embeddings = VertexAIEmbeddings()

def chunk_text(input_texts, separator="", chunk_size=100, overlap=20):
  ## Create split of texts on a chunk
  text_splitter = CharacterTextSplitter(separator="", chunk_size=chunk_size, chunk_overlap=overlap)
  chunks = text_splitter.create_documents(input_texts)

  return chunks

def summarize_with_refine(input_chunks):
  prompt_template = """Write a content with the following:
  {text}
  CONCISE SUMMARY:"""
  prompt = PromptTemplate.from_template(prompt_template)

  refine_template = """
  You are a Summarization bot meant to summarize the contexts provided below and NO PRIOR knowledge

  We have provided a version of the summary up to a certain point: {existing_answer}
  You have the opportunity to refine the existing summary with below contexts
  with some more context as specified below.

  Rules:
  ------------
  1. Given the new context, refine the summary further BUT never include the context as is in the response,
  you should ONLY and ONLY use the knowledge provided in contexts.
  2. DONT add any information which is not present in contexts.
  3. Make sure that the content is always inline with the contexts and DONT ADD any information from your prior knowledge.
  ------------

  Context:
  ------------
  {text}
  ------------
  """

  refine_prompt = PromptTemplate.from_template(refine_template)
  chain = load_summarize_chain(
      llm=model,
      chain_type="refine",
      question_prompt=prompt,
      refine_prompt=refine_prompt,
      return_intermediate_steps=True,
      input_key="input_documents",
      output_key="output_text"
  )
  result = chain({"input_documents": input_chunks}, return_only_outputs=True)

  return result['intermediate_steps'], result['output_text']

In [None]:
document = ["""
Information retrieval using AI and LLMs
Vertex AI Search brings together the power of deep information retrieval, state-of-the-art natural language processing, and the latest in large language model (LLM) processing to understand user intent and return the most relevant results for the user.

With Vertex AI Search, you can build a Google-quality search app on your own data and embed a search bar in your web pages or app.

With Recommendations, you can build a recommendations app on your own data that will suggest content similar to the content that the user is currently viewing.

Note: The generic recommendations feature is a Preview offering, subject to the "Pre-GA Offerings Terms" of the GCP Service Specific Terms. Pre-GA products and features may have limited support, and changes to pre-GA products and features may not be compatible with other pre-GA versions. For more information, see the launch stage descriptions. Further, by using this feature, you agree to the Generative AI Preview terms and conditions ("Preview Terms"). For this feature, you can process personal data as outlined in the Cloud Data Processing Addendum, subject to applicable restrictions and obligations in the Agreement (as defined in the Preview Terms).
An easy experience to get started
Vertex AI Search makes it easy to get started with high-quality search or recommendations based on data that you provide. As part of the setup experience, you can:

Use your existing Google Account or sign up for one.
Use your existing Google Cloud project or create one.
Create an app and attach a data store to it. Provide data to search or recommend by entering the URLs for your website content, importing your data from BigQuery or Cloud Storage, or importing FHIR R4 data from Cloud Healthcare API, or uploading through RESTful CRUD APIs. Syncing data from Jira, Salesforce, or Confluence is available in Preview with allowlist.
Embed JavaScript widgets and API samples to integrate search or recommendations into your website or applications.
Data stores and apps
With Vertex AI Search, you create a search or recommendations app and attach it to a data store. You import your data into a data store and index your data. Apps and data stores have a one-to-one relationship.

There are various kinds of data stores that you can create, based on the type of data you use. Each data store can contain one type of data:

Website data: You can provide domains such as yourexamplewebsite.com/faq and yourexamplewebsite.com/events and enable search or recommendations over the content at those domains.
Structured data: A data store with structured data enables semantic search or recommendations over structured data such as a BigQuery table or NDJSON files. For example, you can enable search or recommendations over a product catalog for your ecommerce experience, a movie catalog for movie search or recommendations, or a directory of doctors for provider search or recommendations.
Unstructured data: An unstructured data store enables semantic search or recommendations over data such as documents and images. For example, a financial institution can enable search or recommendations over their private corpus of financial research publications, or a biotech company can enable search or recommendations over their private repository of medical research.
Healthcare data: A healthcare data store enables semantic search over healthcare FHIR R4 data imported from Cloud Healthcare API. For example, a healthcare provider can search over a patient's clinical history using exploratory queries.
"""]

In [None]:
## Create chunks from array of texts
separator = " "
chunks = chunk_text(document, separator, chunk_size=1000, overlap=20)
len(chunks)

In [None]:
intermediate_steps, summary = summarize_with_refine(chunks)

In [None]:
summary