## Vector Search : Create index

In [2]:
%pip install --upgrade --quiet google-cloud-aiplatform

Note: you may need to restart the kernel to use updated packages.


In [4]:
PROJECT_ID = "ai-hangsik"
REGION = "us-central1"

In [5]:
!gcloud auth application-default login
!gcloud config set project {PROJECT_ID}

Your browser has been opened to visit:

    https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=764086051850-6qr4p6gpi6hn506pt8ejuq83di341hur.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A8085%2F&scope=openid+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fuserinfo.email+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fcloud-platform+https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fsqlservice.login&state=CVbRrZINX8lyiUV3Gsi4KWAjShXJlo&access_type=offline&code_challenge=HmoPpo-XO49ABthpe734rnEh9ZIN43IR01Gn7fQHa2Q&code_challenge_method=S256


Credentials saved to file: [/Users/hangsik/.config/gcloud/application_default_credentials.json]

These credentials will be used by any library that requests Application Default Credentials (ADC).

Quota project "ai-hangsik" was added to ADC which can be used by Google client libraries for billing and quota. Note that some services may still bill the project owning the resource.
Updated property [core/project].


In [78]:
# Initialize AI Platform SDK

from datetime import datetime

from google.cloud import aiplatform
from typing import Optional
import embedding as embed_utils


aiplatform.init(project=PROJECT_ID, location=REGION)

In [None]:
# Create Index

UID = datetime.now().strftime("%m%d%H%M")
INDEX_NAME = f"vs-index-{UID}"
DIMESSIONS = 3072  # for gemini-embedding-001

vs_index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
    display_name=INDEX_NAME,
    dimensions=DIMESSIONS,
    approximate_neighbors_count=10,
    leaf_node_embedding_count=100,
    leaf_nodes_to_search_percent=20,    
    index_update_method="STREAM_UPDATE",
    distance_measure_type=aiplatform.matching_engine.matching_engine_index_config.DistanceMeasureType.COSINE_DISTANCE,
)

print(f"Created Index: {vs_index.resource_name}")

Creating MatchingEngineIndex
Create MatchingEngineIndex backing LRO: projects/721521243942/locations/us-central1/indexes/4991547494614695936/operations/3174590184524087296
MatchingEngineIndex created. Resource name: projects/721521243942/locations/us-central1/indexes/4991547494614695936
To use this MatchingEngineIndex in another session:
index = aiplatform.MatchingEngineIndex('projects/721521243942/locations/us-central1/indexes/4991547494614695936')
Created Index: projects/721521243942/locations/us-central1/indexes/4991547494614695936


In [16]:
# create IndexEndpoint
vs_index_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
    display_name=f"vs-index-{UID}", public_endpoint_enabled=True
)

Creating MatchingEngineIndexEndpoint
Create MatchingEngineIndexEndpoint backing LRO: projects/721521243942/locations/us-central1/indexEndpoints/1450636268059754496/operations/7646664614502989824
MatchingEngineIndexEndpoint created. Resource name: projects/721521243942/locations/us-central1/indexEndpoints/1450636268059754496
To use this MatchingEngineIndexEndpoint in another session:
index_endpoint = aiplatform.MatchingEngineIndexEndpoint('projects/721521243942/locations/us-central1/indexEndpoints/1450636268059754496')


In [17]:
# deploy the Index to the Index Endpoint
DEPLOYED_INDEX_ID = f"vs_index_deployed_{UID}"
vs_index_endpoint.deploy_index(index=vs_index, deployed_index_id=DEPLOYED_INDEX_ID)

Deploying index MatchingEngineIndexEndpoint index_endpoint: projects/721521243942/locations/us-central1/indexEndpoints/1450636268059754496
Deploy index MatchingEngineIndexEndpoint index_endpoint backing LRO: projects/721521243942/locations/us-central1/indexEndpoints/1450636268059754496/operations/2421081671869661184
MatchingEngineIndexEndpoint index_endpoint Deployed index. Resource name: projects/721521243942/locations/us-central1/indexEndpoints/1450636268059754496


<google.cloud.aiplatform.matching_engine.matching_engine_index_endpoint.MatchingEngineIndexEndpoint object at 0x13475c320> 
resource name: projects/721521243942/locations/us-central1/indexEndpoints/1450636268059754496

## Streaming Data Update

In [45]:
import json

def jsonl_to_array(file_path):
  
  data = []
  with open(file_path, 'r', encoding='utf-8') as f:
    for line in f:
      data.append(json.loads(line))
  return data

# load datapoints from jsonl file
file_path = 'vector_search_dataset.json' 
datapoints = jsonl_to_array(file_path)
datapoints[0] 

{'datapoint_id': '1',
 'feature_vector': [-0.028963137418031693,
  0.012049556709825993,
  -0.006816131062805653,
  -0.07344743609428406,
  -0.009694629348814487,
  -0.01742573454976082,
  -0.02613888308405876,
  0.012300304137170315,
  0.0022439893800765276,
  0.012580711394548416,
  -0.008340196684002876,
  -0.0017689877422526479,
  -0.012250063940882683,
  0.02446151152253151,
  0.12362102419137955,
  -0.007907714694738388,
  0.004604454152286053,
  0.008389596827328205,
  -0.009573840536177158,
  -0.0024745287373661995,
  -0.02634391002357006,
  -0.0009189480915665627,
  -0.00220539141446352,
  -0.008932769298553467,
  -0.003138359170407057,
  -0.00407725665718317,
  0.021254248917102814,
  0.00808217003941536,
  0.040438391268253326,
  0.007199564483016729,
  -0.013137942180037498,
  0.007316088769584894,
  0.0026240115985274315,
  0.009585423395037651,
  -0.0022709572222083807,
  0.0011993589578196406,
  0.02767503820359707,
  0.01929563470184803,
  0.009222270920872688,
  0.0207

In [46]:
len(datapoints)

52

In [None]:
# insert datapoints to the index
# batching 1000 datapoints per request
for i in range(0, len(datapoints), 1000):
    vs_index.upsert_datapoints(datapoints=datapoints[i : i + 1000])

Upserting datapoints MatchingEngineIndex index: projects/721521243942/locations/us-central1/indexes/4991547494614695936
MatchingEngineIndex index Upserted datapoints. Resource name: projects/721521243942/locations/us-central1/indexes/4991547494614695936


### gRPC without metadata response

In [48]:
query_emb = datapoints[3]['feature_vector']
query_emb

[-0.004924905952066183,
 -0.0019802474416792393,
 0.017497165128588676,
 -0.08207350224256516,
 -0.014875589869916439,
 0.0029676659032702446,
 -0.017053915187716484,
 -0.0069853560999035835,
 0.026978811249136925,
 0.007630724459886551,
 0.020013580098748207,
 -0.004325113259255886,
 -0.03118353709578514,
 0.019262420013546944,
 0.11868955194950104,
 -0.01146419532597065,
 0.0029286746867001057,
 -0.02326570823788643,
 -0.0027040597051382065,
 0.0017042981926351786,
 -0.012187711894512177,
 -0.012715554796159267,
 0.01630852185189724,
 -0.0034158010967075825,
 -0.008125458844006062,
 -0.014405077323317528,
 0.04114973172545433,
 0.011082475073635578,
 0.03690311685204506,
 -0.00993526540696621,
 -0.0017735337605699897,
 0.0019486336968839169,
 0.02819550223648548,
 0.006040961015969515,
 0.006679041776806116,
 0.027221621945500374,
 0.03107691928744316,
 -0.0095354113727808,
 -0.006400377955287695,
 0.031096231192350388,
 -0.0070350198075175285,
 0.0047868057154119015,
 0.020483959466

In [None]:
# run query

# https://docs.cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.MatchingEngineIndexEndpoint#google_cloud_aiplatform_MatchingEngineIndexEndpoint_find_neighbors
response = vs_index_endpoint.find_neighbors(
    deployed_index_id=DEPLOYED_INDEX_ID, 
    queries=[query_emb], 
    num_neighbors=10,
    return_full_datapoint=True
)


# show the results
for idx, neighbor in enumerate(response[0]):
    print(neighbor)
    print(neighbor.distance)
    # print(f"{neighbor.distance:.2f} {datapoints[neighbor.id]}")

### gRPC with metadata response

In [93]:
import time
from google import genai
import embedding as embed_utils

MODEL = "gemini-embedding-001"
QUERY = "오징어 있어?"

# Login to Vertex AI
client = genai.Client(
    vertexai=True,
    project=PROJECT_ID,
    location=REGION,)


embed_query = embed_utils.gemini_embedding_func(
    client=client,
    model=MODEL,
    task_type="SEMANTIC_SIMILARITY",
    output_dimensionality=3072,
    contents=QUERY
)

embed_query

Latency (ns): 1476.42 ms


[-0.01404335256665945,
 0.007914500311017036,
 0.02732064574956894,
 -0.08660509437322617,
 0.00309509108774364,
 -0.004622259642928839,
 -0.01878415420651436,
 0.011810422874987125,
 0.03722843900322914,
 0.00508031714707613,
 -0.022307518869638443,
 -0.005781945306807756,
 -0.01802007108926773,
 -0.009525507688522339,
 0.12462907284498215,
 -0.020976843312382698,
 0.0020834356546401978,
 0.0041082496754825115,
 0.023111512884497643,
 -0.022340286523103714,
 0.002599147381260991,
 -0.006419892888516188,
 0.002548451302573085,
 -0.014159339480102062,
 -0.008541242219507694,
 -0.014932975172996521,
 0.04534873366355896,
 0.012309001758694649,
 0.036453016102313995,
 -0.0036659948527812958,
 -0.012003879062831402,
 0.025348760187625885,
 0.005796291399747133,
 0.00357604562304914,
 0.02114669233560562,
 -0.0019091065041720867,
 0.022663364186882973,
 0.004417792893946171,
 0.007788562681525946,
 0.03218068554997444,
 -0.006164534017443657,
 0.001550922286696732,
 -0.018275663256645203,
 

In [94]:
from google.cloud import aiplatform_v1

# Set variables for the current deployed index.
API_ENDPOINT="542644810.us-central1-721521243942.vdb.vertexai.goog"
INDEX_ENDPOINT="projects/721521243942/locations/us-central1/indexEndpoints/1450636268059754496"
DEPLOYED_INDEX_ID="vs_index_deployed_11082314"

# Configure Vector Search client
client_options = {
  "api_endpoint": API_ENDPOINT
}
vector_search_client = aiplatform_v1.MatchServiceClient(
  client_options=client_options,
)

start_time = time.perf_counter_ns()

# Build FindNeighborsRequest object
datapoint = aiplatform_v1.IndexDatapoint(
  feature_vector=embed_query
)

query = aiplatform_v1.FindNeighborsRequest.Query(
  datapoint=datapoint,
  neighbor_count=5
)
request = aiplatform_v1.FindNeighborsRequest(
  index_endpoint=INDEX_ENDPOINT,
  deployed_index_id=DEPLOYED_INDEX_ID,
  # Request can have multiple queries
  queries=[query],
  return_full_datapoint=True,
)

# Execute the request
response = vector_search_client.find_neighbors(request)

# Handle the response
search_results = []

for idx, neighbors in enumerate(response.nearest_neighbors):
    for neighbor in neighbors.neighbors:
        search_results.append({
            "distance": f"{neighbor.distance:.4f}",
            "text": neighbor.datapoint.embedding_metadata.get('text')
        })

end_time = time.perf_counter_ns()
latency = (end_time - start_time)
print(f"VS Search Latency (ns): {latency*1e-6:.2f} ms")

search_results



VS Search Latency (ns): 1013.34 ms


[{'distance': '0.1616', 'text': '오징어 게임 시즌 2 예고편 있어'},
 {'distance': '0.2097', 'text': '오늘 새로 업데이트된 콘텐츠 뭐 있어'},
 {'distance': '0.2150', 'text': '오늘의 주우재 가을 코디 영상 볼래'},
 {'distance': '0.2192', 'text': 'TV야 볼만한 거 추천해 줘'},
 {'distance': '0.2237', 'text': '오징어 게임 시즌 2 지금 바로 재생해 줘'}]

In [95]:
MODEL = "gemini-2.5-flash-lite"

PROMPT = f"""
    당신은 사용자의 질문을 이해해서 정확한 질문의 의도를 바탕으로 사용자의 질문을 재작성해주는 AI 어시스턴트입니다.
    사용자의 질문 : {QUERY} 과 검색된 유사한 질문들을 참고하여 최대한 사용자의 질문을 반영한 명확한 질문으로 재작성해 주세요.
    유사한 질문들 : {search_results}    

    답변은 아래와 같이 사용자의 질문을 최소화해서 변경 후 재작성 해주세요.
    답변예제 : "최신 개봉 영화 예고편 모음 틀어줘" 
"""
start_time = time.perf_counter_ns()

response = client.models.generate_content(
    model=MODEL,
    contents=PROMPT,
)

end_time = time.perf_counter_ns()

latency = (end_time - start_time)
print(f"{MODEL} Execution Latency (ns): {latency*1e-6:.2f} ms \n")

print(response.text)

gemini-2.5-flash-lite Execution Latency (ns): 547.68 ms 

"오징어 게임 시즌 2 예고편 보여줘"


### REST API example

In [None]:
!curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" \
"https://542644810.us-central1-721521243942.vdb.vertexai.goog/v1/projects/721521243942/locations/us-central1/indexEndpoints/1450636268059754496:findNeighbors" \
-d '{deployedIndexId:"vs_index_deployed_11082314", "queries":[{datapoint:{"featureVector":[0.1,0.2]}}], returnFullDatapoint:true}'

## End of Dcument