<a href=\"https://colab.research.google.com/github/run-llama/llama_index/blob/main/docs/docs/examples/vector_stores/VertexAIVectorSearchDemo.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>

In [None]:
# ! pip install llama-index-readers-file
# ! pip install --quiet llama-index-embeddings-vertex
# ! pip install --quiet llama-index-storage-docstore-firestore
# ! pip install --quiet llama-index-llms-vertex

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SimpleNodeParser

# load the blogs in using the reader
blogs = SimpleDirectoryReader("./data").load_data()

# chunk up the blog posts into nodes
parser = SimpleNodeParser.from_defaults(chunk_size=1024, chunk_overlap=20)
nodes = parser.get_nodes_from_documents(blogs)

In [None]:
from llama_index.embeddings.vertex import VertexTextEmbedding

embed_model = VertexTextEmbedding(
    project="rthallam-demo-project", location="us-central1"
)
embed_model.dict()

{'model_name': 'textembedding-gecko@003',
 'embed_batch_size': 10,
 'num_workers': None,
 'embed_mode': <VertexEmbeddingMode.RETRIEVAL_MODE: 'retrieval'>,
 'additional_kwargs': {},
 'class_name': 'VertexTextEmbedding'}

In [None]:
embed_text_result = embed_model.get_text_embedding("Hello World!")
embed_query_result = embed_model.get_query_embedding("Hello World!")
from llama_index.core.base.embeddings.base import SimilarityMode

embed_model.similarity(
    embed_text_result, embed_query_result, SimilarityMode.DOT_PRODUCT
)

0.7372278621095343

In [None]:
# Project settings
PROJECT_ID = "rthallam-demo-project"  # @param {type:"string"}
REGION = "us-central1"  # @param {type:"string"}
LOCATION = REGION
PROJECT_NUMBER = (
    ! gcloud projects describe $PROJECT_ID --format="value(projectNumber)"
)
PROJECT_NUMBER = PROJECT_NUMBER[0]

# Cloud storage buckets
GCS_BUCKET_URI = "gs://llamaindex-vertexai"  # @param {type:"string"}
GCS_BUCKET_NAME = GCS_BUCKET_URI.replace("gs://", "")

# Vertex AI Vector Search
# parameter description here
# https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.MatchingEngineIndex#google_cloud_aiplatform_MatchingEngineIndex_create_tree_ah_index
VS_INDEX_NAME = "llamaindex-doc-index"  # @param {type:"string"}
VS_INDEX_ENDPOINT_NAME = "llamaindex-doc-endpoint"  # @param {type:"string"}
VS_CONTENTS_DELTA_URI = f"{GCS_BUCKET_URI}/index/embeddings"
VS_DIMENSIONS = 768
VS_APPROX_NEIGHBORS = 150
VS_INDEX_UPDATE_METHOD = "STREAM_UPDATE"
VS_INDEX_SHARD_SIZE = "SHARD_SIZE_SMALL"
VS_LEAF_NODE_EMB_COUNT = 500
VS_LEAF_SEARCH_PERCENT = 80
VS_DISTANCE_MEASURE_TYPE = "DOT_PRODUCT_DISTANCE"
VS_MACHINE_TYPE = "e2-standard-16"
VS_MIN_REPLICAS = 1
VS_MAX_REPLICAS = 1
VS_DESCRIPTION = (
    "Index for llamaindex Vertex integration"  # @param {type:"string"}
)

CREATE_RESOURCES = True  # @param {type:"boolean"}

In [None]:
# @title Utility function to create resources
import hashlib
import uuid

from google.cloud import storage
from google.cloud import aiplatform
from google.cloud.aiplatform import (
    MatchingEngineIndex,
    MatchingEngineIndexEndpoint,
)
from google.cloud.aiplatform.matching_engine.matching_engine_index_endpoint import (
    Namespace,
)


def create_bucket(bucket_name):
    # create Cloud Storage bucket if does not exists
    storage_client = storage.Client()
    GCS_BUCKET = storage_client.bucket(bucket_name)
    if not GCS_BUCKET.exists() and CREATE_RESOURCES:
        bucket = storage_client.create_bucket(bucket_name)
        print(f"Bucket {bucket.name} created")
    else:
        print(f"Bucket {GCS_BUCKET.name} exists")

    return GCS_BUCKET


def create_index():
    index_names = [
        index.resource_name
        for index in MatchingEngineIndex.list(
            filter=f"display_name={VS_INDEX_NAME}"
        )
    ]

    if len(index_names) == 0:
        if CREATE_RESOURCES:
            print(f"Creating Vector Search index {VS_INDEX_NAME} ...")
            vs_index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
                display_name=VS_INDEX_NAME,
                dimensions=VS_DIMENSIONS,
                approximate_neighbors_count=VS_APPROX_NEIGHBORS,
                distance_measure_type=VS_DISTANCE_MEASURE_TYPE,
                leaf_node_embedding_count=VS_LEAF_NODE_EMB_COUNT,
                leaf_nodes_to_search_percent=VS_LEAF_SEARCH_PERCENT,
                description=VS_DESCRIPTION,
                shard_size=VS_INDEX_SHARD_SIZE,
                index_update_method=VS_INDEX_UPDATE_METHOD,
                project=PROJECT_ID,
                location=REGION,
            )
            print(
                f"Vector Search index {vs_index.display_name} created with resource name {vs_index.resource_name}"
            )
        else:
            vs_index = None
            print(
                f"CREATE_RESOURCES flag set to {CREATE_RESOURCES}. Skip creating resources"
            )
    else:
        vs_index = MatchingEngineIndex(index_name=index_names[0])
        print(
            f"Vector Search index {vs_index.display_name} exists with resource name {vs_index.resource_name}"
        )

    return vs_index


def create_index_endpoint():
    endpoint_names = [
        endpoint.resource_name
        for endpoint in MatchingEngineIndexEndpoint.list(
            filter=f"display_name={VS_INDEX_ENDPOINT_NAME}"
        )
    ]

    if len(endpoint_names) == 0:
        if CREATE_RESOURCES:
            print(
                f"Creating Vector Search index endpoint {VS_INDEX_ENDPOINT_NAME} ..."
            )
            vs_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
                display_name=VS_INDEX_ENDPOINT_NAME,
                public_endpoint_enabled=True,
                description=VS_DESCRIPTION,
                project=PROJECT_ID,
                location=REGION,
            )
            print(
                f"Vector Search index endpoint {vs_endpoint.display_name} created with resource name {vs_endpoint.resource_name}"
            )
        else:
            vs_endpoint = None
            print(
                f"CREATE_RESOURCES flag set to {CREATE_RESOURCES}. Skip creating resources"
            )
    else:
        vs_endpoint = MatchingEngineIndexEndpoint(
            index_endpoint_name=endpoint_names[0]
        )
        print(
            f"Vector Search index endpoint {vs_endpoint.display_name} exists with resource name {vs_endpoint.resource_name}"
        )

    return vs_endpoint


def deploy_index(index, endpoint):
    index_endpoints = [
        (deployed_index.index_endpoint, deployed_index.deployed_index_id)
        for deployed_index in index.deployed_indexes
    ]

    if len(index_endpoints) == 0:
        if CREATE_RESOURCES:
            print(
                f"Deploying Vector Search index {index.display_name} at endpoint {endpoint.display_name} ..."
            )
            deployed_index_id = f'{VS_INDEX_NAME}_{create_uuid(VS_INDEX_NAME).split("-")[-1]}'.replace(
                "-", "_"
            )
            vs_deployed_index = endpoint.deploy_index(
                index=index,
                deployed_index_id=deployed_index_id,
                display_name=VS_INDEX_NAME,
                machine_type=VS_MACHINE_TYPE,
                min_replica_count=VS_MIN_REPLICAS,
                max_replica_count=VS_MAX_REPLICAS,
            )
            print(
                f"Vector Search index {index.display_name} is deployed at endpoint {vs_deployed_index.display_name}"
            )
        else:
            vs_deployed_index = None
            print(
                f"CREATE_RESOURCES flag set to {CREATE_RESOURCES}. Skip creating resources"
            )
    else:
        vs_deployed_index = MatchingEngineIndexEndpoint(
            index_endpoint_name=index_endpoints[0][0]
        )
        print(
            f"Vector Search index {index.display_name} is already deployed at endpoint {vs_deployed_index.display_name}"
        )

    return vs_deployed_index


def create_uuid(name: str):
    hex_string = hashlib.md5(name.encode("UTF-8")).hexdigest()
    return str(uuid.UUID(hex=hex_string))

In [None]:
import vertexai

vertexai.init(project=PROJECT_ID, location=LOCATION)

In [None]:
# Create bucket if not exists
bucket = create_bucket(GCS_BUCKET_NAME)

Bucket llamaindex-vertexai exists


In [None]:
# Create vector search index if not exists else return index resource name
vs_index = create_index()
vs_index = MatchingEngineIndex(index_name=vs_index.resource_name)

Vector Search index llamaindex-doc-index exists with resource name projects/560224572293/locations/us-central1/indexes/6244298057953837056


In [None]:
# Create vector search index endpoint if not exists else return index endpoint resource name
vs_endpoint = create_index_endpoint()

Vector Search index endpoint llamaindex-doc-endpoint exists with resource name projects/560224572293/locations/us-central1/indexEndpoints/151948108912132096


In [None]:
# Deploy index to the index endpoint
deploy_index(vs_index, vs_endpoint)

Vector Search index llamaindex-doc-index is already deployed at endpoint llamaindex-doc-endpoint


<google.cloud.aiplatform.matching_engine.matching_engine_index_endpoint.MatchingEngineIndexEndpoint object at 0x7fc398a077f0> 
resource name: projects/560224572293/locations/us-central1/indexEndpoints/151948108912132096

In [None]:
from llama_index.core import StorageContext, Settings, VectorStoreIndex
from llama_index.vector_stores.vertexaivectorsearch import VertexAIVectorStore
from llama_index.embeddings.vertex import (
    VertexTextEmbedding,
    VertexEmbeddingMode,
)
from llama_index.storage.docstore.firestore import FirestoreDocumentStore
from llama_index.storage.kvstore.firestore import FirestoreKVStore

In [None]:
print(f'export PROJECT_ID="{PROJECT_ID}"')
print(f'export REGION="{LOCATION}"')
print(f'export INDEX_ID="{vs_index.resource_name}"')
print(f'export ENDPOINT_ID="{vs_endpoint.resource_name}"')
print(f'export GCS_BUCKET_NAME="{GCS_BUCKET_NAME}"')

export PROJECT_ID="rthallam-demo-project"
export REGION="us-central1"
export INDEX_ID="projects/560224572293/locations/us-central1/indexes/6244298057953837056"
export ENDPOINT_ID="projects/560224572293/locations/us-central1/indexEndpoints/151948108912132096"
export GCS_BUCKET_NAME="llamaindex-vertexai"


In [None]:
# setup storage
vector_store = VertexAIVectorStore(
    project_id=PROJECT_ID,
    region=LOCATION,
    index_id=vs_index.resource_name,
    endpoint_id=vs_endpoint.resource_name,
    gcs_bucket_name=GCS_BUCKET_NAME,
)

kv_store = FirestoreKVStore()
doc_store = FirestoreDocumentStore(kv_store)

storage_context = StorageContext.from_defaults(
    docstore=doc_store, vector_store=vector_store
)

# setup the index/query process, ie the embedding model (and completion if used)
Settings.embed_model = VertexTextEmbedding(
    project=PROJECT_ID,
    location=LOCATION,
    model_name="textembedding-gecko@003",
    embed_mode=VertexEmbeddingMode.RETRIEVAL_MODE,
)

In [None]:
from llama_index.core.schema import TextNode
import uuid
import hashlib


def create_uuid(text: str):
    hex_string = hashlib.md5(text.encode("UTF-8")).hexdigest()
    return str(uuid.UUID(hex=hex_string))


record_data = [
    {
        "description": "A versatile pair of dark-wash denim jeans."
        "Made from durable cotton with a classic straight-leg cut, these jeans"
        " transition easily from casual days to dressier occasions.",
        "price": 65.00,
        "color": "blue",
        "season": ["fall", "winter", "spring"],
    },
    {
        "description": "A lightweight linen button-down shirt in a crisp white."
        " Perfect for keeping cool with breathable fabric and a relaxed fit.",
        "price": 34.99,
        "color": "white",
        "season": ["summer", "spring"],
    },
    {
        "description": "A soft, chunky knit sweater in a vibrant forest green. "
        "The oversized fit and cozy wool blend make this ideal for staying warm "
        "when the temperature drops.",
        "price": 89.99,
        "color": "green",
        "season": ["fall", "winter"],
    },
    {
        "description": "A classic crewneck t-shirt in a soft, heathered blue. "
        "Made from comfortable cotton jersey, this t-shirt is a wardrobe essential "
        "that works for every season.",
        "price": 19.99,
        "color": "blue",
        "season": ["fall", "winter", "summer", "spring"],
    },
    {
        "description": "A flowing midi-skirt in a delicate floral print. "
        "Lightweight and airy, this skirt adds a touch of feminine style "
        "to warmer days.",
        "price": 45.00,
        "color": "white",
        "season": ["spring", "summer"],
    },
    {
        "description": "A pair of tailored black trousers in a comfortable stretch "
        "fabric. Perfect for work or dressier events, these trousers provide a"
        " sleek, polished look.",
        "price": 59.99,
        "color": "black",
        "season": ["fall", "winter", "spring"],
    },
    {
        "description": "A cozy fleece hoodie in a neutral heather grey.  "
        "This relaxed sweatshirt is perfect for casual days or layering when the "
        "weather turns chilly.",
        "price": 39.99,
        "color": "grey",
        "season": ["fall", "winter", "spring"],
    },
    {
        "description": "A bright yellow raincoat with a playful polka dot pattern. "
        "This waterproof jacket will keep you dry and add a touch of cheer to "
        "rainy days.",
        "price": 75.00,
        "color": "yellow",
        "season": ["spring", "fall"],
    },
    {
        "description": "A pair of comfortable khaki chino shorts. These versatile "
        "shorts are a summer staple, perfect for outdoor adventures or relaxed"
        " weekends.",
        "price": 34.99,
        "color": "khaki",
        "season": ["summer"],
    },
    {
        "description": "A bold red cocktail dress with a flattering A-line "
        "silhouette. This statement piece is made from a luxurious satin fabric, "
        "ensuring a head-turning look.",
        "price": 125.00,
        "color": "red",
        "season": ["fall", "winter", "summer", "spring"],
    },
    {
        "description": "A pair of classic white sneakers crafted from smooth "
        "leather. These timeless shoes offer a clean and polished look, perfect "
        "for everyday wear.",
        "price": 79.99,
        "color": "white",
        "season": ["fall", "winter", "summer", "spring"],
    },
    {
        "description": "A chunky cable-knit scarf in a rich burgundy color. "
        "Made from a soft wool blend, this scarf will provide warmth and a touch "
        "of classic style to cold-weather looks.",
        "price": 45.00,
        "color": "burgundy",
        "season": ["fall", "winter"],
    },
    {
        "description": "A lightweight puffer vest in a vibrant teal hue. "
        "This versatile piece adds a layer of warmth without bulk, transitioning"
        " perfectly between seasons.",
        "price": 65.00,
        "color": "teal",
        "season": ["fall", "spring"],
    },
    {
        "description": "A pair of high-waisted leggings in a sleek black."
        " Crafted from a moisture-wicking fabric with plenty of stretch, "
        "these leggings are perfect for workouts or comfortable athleisure style.",
        "price": 49.99,
        "color": "black",
        "season": ["fall", "winter", "summer", "spring"],
    },
    {
        "description": "A denim jacket with a faded wash and distressed details. "
        "This wardrobe staple adds a touch of effortless cool to any outfit.",
        "price": 79.99,
        "color": "blue",
        "season": ["fall", "spring", "summer"],
    },
    {
        "description": "A woven straw sunhat with a wide brim. This stylish "
        "accessory provides protection from the sun while adding a touch of "
        "summery elegance.",
        "price": 32.00,
        "color": "beige",
        "season": ["summer"],
    },
    {
        "description": "A graphic tee featuring a vintage band logo. "
        "Made from a soft cotton blend, this casual tee adds a touch of "
        "personal style to everyday looks.",
        "price": 24.99,
        "color": "white",
        "season": ["fall", "winter", "summer", "spring"],
    },
    {
        "description": "A pair of well-tailored dress pants in a neutral grey. "
        "Made from a wrinkle-resistant blend, these pants look sharp and "
        "professional for workwear or formal occasions.",
        "price": 69.99,
        "color": "grey",
        "season": ["fall", "winter", "summer", "spring"],
    },
    {
        "description": "A pair of classic leather ankle boots in a rich brown hue."
        " Featuring a subtle stacked heel and sleek design, these boots are perfect"
        " for elevating outfits in cooler seasons.",
        "price": 120.00,
        "color": "brown",
        "season": ["fall", "winter", "spring"],
    },
    # {
    #     "description": "A vibrant swimsuit with a bold geometric pattern. This fun "
    #     "and eye-catching piece is perfect for making a splash by the pool or at "
    #     "the beach.",
    #     "price": 55.00,
    #     "color": "multicolor",
    #     "season": ["summer"],
    # },
]

nodes = []
for record in record_data:
    record = record.copy()
    page_content = record.pop("description")
    node_id = create_uuid(page_content)
    if isinstance(page_content, str):
        metadata = {**record}
        node = TextNode(id_=node_id, text=page_content, metadata=metadata)
        nodes.append(node)

In [None]:
nodes[0].text

'A versatile pair of dark-wash denim jeans.Made from durable cotton with a classic straight-leg cut, these jeans transition easily from casual days to dressier occasions.'

In [None]:
storage_context.docstore.add_documents(nodes)

In [None]:
len(storage_context.docstore.docs)

109

In [None]:
index = VectorStoreIndex(nodes, storage_context=storage_context)

Upserting datapoints MatchingEngineIndex index: projects/560224572293/locations/us-central1/indexes/6244298057953837056
MatchingEngineIndex index Upserted datapoints. Resource name: projects/560224572293/locations/us-central1/indexes/6244298057953837056


In [None]:
from llama_index.llms.vertex import Vertex

vertex_gemini = Vertex(model="gemini-pro", temperature=0, additional_kwargs={})
Settings.llm = vertex_gemini

In [None]:
vector_index = VectorStoreIndex.from_vector_store(vector_store)
query_engine = vector_index.as_query_engine()
vector_response = query_engine.query(
    "What is the Google Cloud revenues in 2022?"
)
print(vector_response.response)

Google Cloud revenues in 2022 were $26.28 billion.


In [None]:
vector_response = query_engine.query("What are Google's R&D expenses in 2022?")
print(vector_response.response)

Google's research and development expenses in 2022 were $31.2 billion. This represents a 17% increase from the previous year. The company's R&D expenses are primarily focused on artificial intelligence, cloud computing, and hardware.


In [None]:
vars(vector_response)

{'response': "Google's research and development expenses in 2022 were $31.2 billion. This represents a 17% increase from the previous year. The company's R&D expenses are primarily focused on artificial intelligence, cloud computing, and hardware.",
 'source_nodes': [NodeWithScore(node=TextNode(id_='b674578d-347c-48ad-a659-4b24c0a6dead', embedding=[0.03281565383076668, -0.027546964585781097, 0.011594679206609726, 0.02160458266735077, 0.10328283905982971, 0.04076341167092323, 0.01853843219578266, -0.047991152852773666, 0.013566440902650356, -0.004668778739869595, -5.800629151053727e-05, 0.019096558913588524, -0.0012207955587655306, 0.022969670593738556, 0.025282833725214005, -0.016517166048288345, -0.039272062480449677, 0.0002738218172453344, 0.02130301482975483, -0.002971512032672763, 0.010454706847667694, 0.0012866107281297445, -0.04000452905893326, -0.03143702819943428, -0.04003188759088516, -0.029545437544584274, -0.04830791801214218, -0.018948303535580635, -0.05820021405816078, 0.0

In [None]:
vector_response.metadata

{'7efb5bf2-5af0-432d-b18b-2a1702aadbbe': {'page_label': '16',
  'file_name': 'Google_10-K_2022.pdf',
  'file_path': '/home/jupyter/llama_index/llama-index-integrations/vector_stores/llama-index-vector-stores-vertexaivectorsearch/tests/data/Google_10-K_2022.pdf',
  'file_type': 'application/pdf',
  'file_size': 4006440,
  'creation_date': '2024-04-26',
  'last_modified_date': '2024-04-26'},
 'bc6af642-d06f-4bec-aec3-4fee8329f18c': {'page_label': '12',
  'file_name': 'Google_10-K_2022.pdf',
  'file_path': '/home/jupyter/llama_index/llama-index-integrations/vector_stores/llama-index-vector-stores-vertexaivectorsearch/tests/data/Google_10-K_2022.pdf',
  'file_type': 'application/pdf',
  'file_size': 4006440,
  'creation_date': '2024-04-26',
  'last_modified_date': '2024-04-26'}}

---

In [None]:
# Delete datapoints from Vertex AI Vector Store
import json
from google.cloud import aiplatform_v1


def delete_from_matching_engine(vs_index, vs_endpoint, delete=False):
    neighbors = vs_endpoint.find_neighbors(
        deployed_index_id=vs_index.deployed_indexes[0].deployed_index_id,
        queries=[[0.0] * VS_DIMENSIONS],
        num_neighbors=5000,
        return_full_datapoint=True,
    )

    for neighbor in neighbors[0]:
        entry = {
            namespace.name: namespace.allow_tokens
            for namespace in neighbor.restricts
        }
        node_content = json.loads(entry["_node_content"][0])
        print(node_content["text"])

    datapoint_ids = [neighbor.id for neighbor in neighbors[0]]
    print(f"Deleting {len(datapoint_ids)} datapoints")

    # Delete datapoints
    if delete:
        response = vs_index.remove_datapoints(datapoint_ids=datapoint_ids)
        print(response)


delete_from_matching_engine(vs_index, vs_endpoint, delete=True)

A versatile pair of dark-wash denim jeans.Made from durable cotton with a classic straight-leg cut, these jeans transition easily from casual days to dressier occasions.
A lightweight linen button-down shirt in a crisp white. Perfect for keeping cool with breathable fabric and a relaxed fit.
A soft, chunky knit sweater in a vibrant forest green. The oversized fit and cozy wool blend make this ideal for staying warm when the temperature drops.
A classic crewneck t-shirt in a soft, heathered blue. Made from comfortable cotton jersey, this t-shirt is a wardrobe essential that works for every season.
A flowing midi-skirt in a delicate floral print. Lightweight and airy, this skirt adds a touch of feminine style to warmer days.
A pair of tailored black trousers in a comfortable stretch fabric. Perfect for work or dressier events, these trousers provide a sleek, polished look.
A cozy fleece hoodie in a neutral heather grey.  This relaxed sweatshirt is perfect for casual days or layering when

In [None]:
print(vs_endpoint.private_service_access_network)
print(vs_endpoint.public_endpoint_domain_name)


1087190516.us-central1-560224572293.vdb.vertexai.goog


In [None]:
from google.cloud import aiplatform_v1

# Set variables for the current deployed index.
API_ENDPOINT = "1087190516.us-central1-560224572293.vdb.vertexai.goog"
INDEX_ENDPOINT = "projects/560224572293/locations/us-central1/indexEndpoints/151948108912132096"
DEPLOYED_INDEX_ID = "llamaindex_doc_index_425fae5a7fc2"

# Configure Vector Search client
client_options = {"api_endpoint": API_ENDPOINT}
vector_search_client = aiplatform_v1.MatchServiceClient(
    client_options=client_options,
)

# Build FindNeighborsRequest object
datapoint = aiplatform_v1.IndexDatapoint(
    datapoint_id="8b2faa25-0bcd-41b3-8612-b745b5a2adf1"
    # feature_vector=[]
)
query = aiplatform_v1.FindNeighborsRequest.Query(
    datapoint=datapoint,
    # The number of nearest neighbors to be retrieved
    neighbor_count=1,
)
request = aiplatform_v1.FindNeighborsRequest(
    index_endpoint=INDEX_ENDPOINT,
    deployed_index_id=DEPLOYED_INDEX_ID,
    # Request can have multiple queries
    queries=[query],
    return_full_datapoint=True,
)

# Execute the request
response = vector_search_client.find_neighbors(request)

# Handle the response
print(response)

nearest_neighbors {
  id: "8b2faa25-0bcd-41b3-8612-b745b5a2adf1"
  neighbors {
    datapoint {
      datapoint_id: "8b2faa25-0bcd-41b3-8612-b745b5a2adf1"
      feature_vector: 0.0508964248
      feature_vector: 0.000485334109
      feature_vector: -0.0424281061
      feature_vector: -0.0081797773
      feature_vector: 0.0729037821
      feature_vector: 0.0293532051
      feature_vector: 0.0218644273
      feature_vector: -0.020121431
      feature_vector: 0.0132044675
      feature_vector: 0.0502230786
      feature_vector: -0.0423855744
      feature_vector: 0.0398046114
      feature_vector: -0.0360235535
      feature_vector: 0.00263720495
      feature_vector: -0.0220741685
      feature_vector: -0.0560635589
      feature_vector: 0.0160678215
      feature_vector: 0.020458106
      feature_vector: -0.000417721691
      feature_vector: 0.0169399362
      feature_vector: 0.0189459175
      feature_vector: 0.00547863776
      feature_vector: 0.000317752972
      feature_vector: -0.00

In [None]:
from llama_index.vector_stores.vertexaivectorsearch.utils import get_datapoint

result = get_datapoint(
    vs_index, vs_endpoint, "5dd452b3-5eee-412c-aaad-129fb7efb577"
)
len(result.nearest_neighbors)

NotFound: 404 5dd452b3-5eee-412c-aaad-129fb7efb577 entity does not exist in the dataset

In [None]:
metadata = {
    m.namespace: m.allow_list
    for m in response.nearest_neighbors[0].neighbors[0].datapoint.restricts
}
node_json = metadata.get("_node_content", None)
print(json.loads(node_json[0])["text"])

Table of Contents Alphabet Inc.
Research and Development
The following table presents R&D expenses (in millions, except percentages):
 Year Ended December 31,
 2021 2022
Research and development expenses $ 31,562 $ 39,500 
Research and development expenses as a percentage of revenues 12 % 14 %
R&D expenses increased $7.9 billion from 2021 to 2022 primarily driven by an increase in compensation expenses of
$5.4 billion, largely resulting from a 21% increase in average headcount, and an increase in third-party service fees of
$704 million.
Sales and Marketing
The following table presents sales and marketing expenses (in millions, except percentages):
 Year Ended December 31,
 2021 2022
Sales and marketing expenses $ 22,912 $ 26,567 
Sales and marketing expenses as a percentage of revenues 9 % 9 %
Sales and marketing expenses increased $3.7 billion from 2021 to 2022, primarily driven by an increase in
compensation expenses of $1.8 billion, largely resulting from a 19% increase in average 

In [None]:
import json

node_json = [
    metadata.allow_list
    for metadata in response.nearest_neighbors[0]
    .neighbors[0]
    .datapoint.restricts
    if metadata.namespace == "_node_content"
][0][0]
json.loads(node_json)
type(node_json)

str

In [None]:
from llama_index.core.schema import TextNode

TextNode.from_json(node_json)

TypeError: the JSON object must be str, bytes or bytearray, not Repeated

In [None]:
import os

PROJECT_ID = os.getenv("PROJECT_ID", "")
REGION = os.getenv("REGION", "")
INDEX_ID = os.getenv("INDEX_ID", "")
ENDPOINT_ID = os.getenv("ENDPOINT_ID", "")
GCS_BUCKET_NAME = os.getenv("GCS_BUCKET_NAME", "")

print(f"PROJECT_ID={PROJECT_ID}")
print(f"REGION={LOCATION}")
print(f"INDEX_ID={vs_index.resource_name}")
print(f"ENDPOINT_ID={vs_endpoint.resource_name}")
print(f"GCS_BUCKET_NAME={GCS_BUCKET_NAME}")


def set_all_env_vars() -> bool:
    """Check if all required environment variables are set."""
    return all([PROJECT_ID, REGION, INDEX_ID, ENDPOINT_ID, GCS_BUCKET_NAME])


set_all_env_vars()

PROJECT_ID=
REGION=us-central1
INDEX_ID=projects/560224572293/locations/us-central1/indexes/6244298057953837056
ENDPOINT_ID=projects/560224572293/locations/us-central1/indexEndpoints/151948108912132096
GCS_BUCKET_NAME=


False

In [None]:
os.environ["INDEX_ID"]

KeyError: 'INDEX_ID'

In [None]:
print(f"PROJECT_ID={PROJECT_ID}")
print(f"REGION={LOCATION}")
print(f"INDEX_ID={vs_index.resource_name}")
print(f"ENDPOINT_ID={vs_endpoint.resource_name}")
print(f"GCS_BUCKET_NAME={GCS_BUCKET_NAME}")

PROJECT_ID=
REGION=us-central1
INDEX_ID=projects/560224572293/locations/us-central1/indexes/6244298057953837056
ENDPOINT_ID=projects/560224572293/locations/us-central1/indexEndpoints/151948108912132096
GCS_BUCKET_NAME=
