## Vector Search : Create index

In [None]:
%pip install --upgrade --quiet google-cloud-aiplatform

In [None]:
PROJECT_ID = "ai-hangsik"
REGION = "us-central1"

In [None]:
!gcloud auth application-default login
!gcloud config set project {PROJECT_ID}

In [None]:
# Initialize AI Platform SDK

from datetime import datetime

from google.cloud import aiplatform
from typing import Optional
import embedding as embed_utils


aiplatform.init(project=PROJECT_ID, location=REGION)

In [None]:
# Create Index

UID = datetime.now().strftime("%m%d%H%M")
INDEX_NAME = f"vs-index-dot-{UID}"
DIMESSIONS = 3072  # for gemini-embedding-001

vs_index = aiplatform.MatchingEngineIndex.create_tree_ah_index(
    display_name=INDEX_NAME,
    dimensions=DIMESSIONS,
    approximate_neighbors_count=10,
    leaf_node_embedding_count=50,
    leaf_nodes_to_search_percent=30,    
    index_update_method="STREAM_UPDATE",
    distance_measure_type=aiplatform.matching_engine.matching_engine_index_config.DistanceMeasureType.DOT_PRODUCT_DISTANCE,
)

print(f"Created Index: {vs_index.resource_name}")

In [None]:
# create IndexEndpoint
vs_index_endpoint = aiplatform.MatchingEngineIndexEndpoint.create(
    display_name=f"vs-index-dot-{UID}", public_endpoint_enabled=True
)

In [None]:
# deploy the Index to the Index Endpoint
DEPLOYED_INDEX_ID = f"vs_index_dot_deployed_{UID}"
vs_index_endpoint.deploy_index(index=vs_index, deployed_index_id=DEPLOYED_INDEX_ID)

## Streaming Data Update

In [None]:
import json

def jsonl_to_array(file_path):
  
  data = []
  with open(file_path, 'r', encoding='utf-8') as f:
    for line in f:
      data.append(json.loads(line))
  return data

# load datapoints from jsonl file
file_path = 'vector_search_dataset.json' 
datapoints = jsonl_to_array(file_path)
datapoints[0] 

In [None]:
len(datapoints)

In [None]:
# insert datapoints to the index
# batching 1000 datapoints per request
for i in range(0, len(datapoints), 1000):
    vs_index.upsert_datapoints(datapoints=datapoints[i : i + 1000])

### gRPC without metadata response

In [None]:
query_emb = datapoints[3]['feature_vector']
query_emb

In [None]:
# run query

# https://docs.cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.MatchingEngineIndexEndpoint#google_cloud_aiplatform_MatchingEngineIndexEndpoint_find_neighbors
response = vs_index_endpoint.find_neighbors(
    deployed_index_id=DEPLOYED_INDEX_ID, 
    queries=[query_emb], 
    num_neighbors=10,
    return_full_datapoint=True
)


# show the results
for idx, neighbor in enumerate(response[0]):
    print(neighbor)
    print(neighbor.distance)
    # print(f"{neighbor.distance:.2f} {datapoints[neighbor.id]}")

### gRPC with metadata response

In [None]:
import time
from google import genai
import embedding as embed_utils

MODEL = "gemini-embedding-001"
QUERY = "오징어 있어?"

# Login to Vertex AI
client = genai.Client(
    vertexai=True,
    project=PROJECT_ID,
    location=REGION,)


embed_query = embed_utils.gemini_embedding_func(
    client=client,
    model=MODEL,
    task_type="SEMANTIC_SIMILARITY",
    output_dimensionality=3072,
    contents=QUERY
)

embed_query

In [None]:
from google.cloud import aiplatform_v1

# Set variables for the current deployed index.
API_ENDPOINT="542644810.us-central1-721521243942.vdb.vertexai.goog"
INDEX_ENDPOINT="projects/721521243942/locations/us-central1/indexEndpoints/1450636268059754496"
DEPLOYED_INDEX_ID="vs_index_deployed_11082314"

# Configure Vector Search client
client_options = {
  "api_endpoint": API_ENDPOINT
}
vector_search_client = aiplatform_v1.MatchServiceClient(
  client_options=client_options,
)

start_time = time.perf_counter_ns()

# Build FindNeighborsRequest object
datapoint = aiplatform_v1.IndexDatapoint(
  feature_vector=embed_query
)

query = aiplatform_v1.FindNeighborsRequest.Query(
  datapoint=datapoint,
  neighbor_count=5
)
request = aiplatform_v1.FindNeighborsRequest(
  index_endpoint=INDEX_ENDPOINT,
  deployed_index_id=DEPLOYED_INDEX_ID,
  # Request can have multiple queries
  queries=[query],
  return_full_datapoint=True,
)

# Execute the request
response = vector_search_client.find_neighbors(request)

# Handle the response
search_results = []

for idx, neighbors in enumerate(response.nearest_neighbors):
    for neighbor in neighbors.neighbors:
        search_results.append({
            "distance": f"{neighbor.distance:.4f}",
            "text": neighbor.datapoint.embedding_metadata.get('text')
        })

end_time = time.perf_counter_ns()
latency = (end_time - start_time)
print(f"VS Search Latency (ns): {latency*1e-6:.2f} ms")

search_results



In [None]:
MODEL = "gemini-2.5-flash-lite"

PROMPT = f"""
    당신은 사용자의 질문을 이해해서 정확한 질문의 의도를 바탕으로 사용자의 질문을 재작성해주는 AI 어시스턴트입니다.
    사용자의 질문 : {QUERY} 과 검색된 유사한 질문들을 참고하여 최대한 사용자의 질문을 반영한 명확한 질문으로 재작성해 주세요.
    유사한 질문들 : {search_results}    

    답변은 아래와 같이 사용자의 질문을 최소화해서 변경 후 재작성 해주세요.
    답변예제 : "최신 개봉 영화 예고편 모음 틀어줘" 
"""
start_time = time.perf_counter_ns()

response = client.models.generate_content(
    model=MODEL,
    contents=PROMPT,
)

end_time = time.perf_counter_ns()

latency = (end_time - start_time)
print(f"{MODEL} Execution Latency (ns): {latency*1e-6:.2f} ms \n")

print(response.text)

### REST API example

In [None]:
!curl -X POST -H "Authorization: Bearer $(gcloud auth print-access-token)" \
"https://542644810.us-central1-721521243942.vdb.vertexai.goog/v1/projects/721521243942/locations/us-central1/indexEndpoints/1450636268059754496:findNeighbors" \
-d '{deployedIndexId:"vs_index_deployed_11082314", "queries":[{datapoint:{"featureVector":[0.1,0.2]}}], returnFullDatapoint:true}'

## End of Dcument