# Clean-up Vector Search (formerly Matching Engine) Indexes and Index Endpoints

In [1]:
# naming convention for all cloud resources
VERSION        = "v1"                  # TODO
PREFIX         = f'ndr-{VERSION}'      # TODO

print(f"PREFIX = {PREFIX}")

PREFIX = ndr-v1


In [2]:
# staging GCS
GCP_PROJECTS             = !gcloud config get-value project
PROJECT_ID               = GCP_PROJECTS[0]

# GCS bucket and paths
BUCKET_NAME              = f'{PREFIX}-{PROJECT_ID}-bucket'
BUCKET_URI               = f'gs://{BUCKET_NAME}'

config = !gsutil cat {BUCKET_URI}/config/notebook_env.py
print(config.n)
exec(config.n)


PROJECT_ID               = "hybrid-vertex"
PROJECT_NUM              = "934903580331"
LOCATION                 = "us-central1"

REGION                   = "us-central1"
BQ_LOCATION              = "US"
VPC_NETWORK_NAME         = "ucaip-haystack-vpc-network"

VERTEX_SA                = "934903580331-compute@developer.gserviceaccount.com"

PREFIX                   = "ndr-v1"
VERSION                  = "v1"

APP                      = "sp"
MODEL_TYPE               = "2tower"
FRAMEWORK                = "tfrs"
DATA_VERSION             = "v1"
TRACK_HISTORY            = "5"

BUCKET_NAME              = "ndr-v1-hybrid-vertex-bucket"
BUCKET_URI               = "gs://ndr-v1-hybrid-vertex-bucket"
SOURCE_BUCKET            = "spotify-million-playlist-dataset"

DATA_GCS_PREFIX          = "data"
DATA_PATH                = "gs://ndr-v1-hybrid-vertex-bucket/data"
VOCAB_SUBDIR             = "vocabs"
VOCAB_FILENAME           = "vocab_dict.pkl"

CANDIDATE_PREFIX         = "candidates"
TRAIN_DIR_PREFIX      

In [3]:
import os
import sys
import time
import numpy as np

# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' 

# google cloud SDKs
from google.cloud import storage
from google.cloud import aiplatform as vertex_ai

In [4]:
vertex_ai.init(project=PROJECT_ID, location=LOCATION)

storage_client = storage.Client(project=PROJECT_ID)

## Index Endpoints

In [5]:
!gcloud beta ai index-endpoints list \
  --project=$PROJECT_ID \
  --region=$LOCATION

Using endpoint [https://us-central1-aiplatform.googleapis.com/]
---
createTime: '2023-09-25T18:17:22.236805Z'
deployedIndexes:
- automaticResources:
    maxReplicaCount: 2
    minReplicaCount: 2
  createTime: '2023-09-25T18:37:23.100220Z'
  deploymentGroup: default
  id: deployedann_v1
  index: projects/934903580331/locations/us-central1/indexes/1713892337098162176
  indexSyncTime: '2023-09-27T11:42:02.532637Z'
  privateEndpoints:
    matchGrpcAddress: 10.41.2.5
description: endpoint for ann index
displayName: ann_index_endpoint_v1
encryptionSpec: {}
etag: AMEw9yN_mR5UxH4GBdKr2IVK3OQLg5Lnp-tIwgotn0cJh51YL1CjOhG6EPwlP7R5UYrC
name: projects/934903580331/locations/us-central1/indexEndpoints/7571386602446913536
network: projects/934903580331/global/networks/ucaip-haystack-vpc-network
updateTime: '2023-09-25T18:17:22.946019Z'
---
createTime: '2023-09-25T17:53:18.383235Z'
deployedIndexes:
- automaticResources:
    maxReplicaCount: 2
    minReplicaCount: 2
  createTime: '2023-09-25T18:29:41.2

In [36]:
TO_DELETE = "projects/934903580331/locations/us-central1/indexEndpoints/1049611392061014016" # XXX

my_index_endpoint = vertex_ai.MatchingEngineIndexEndpoint(index_endpoint_name=TO_DELETE)

my_index_endpoint

<google.cloud.aiplatform.matching_engine.matching_engine_index_endpoint.MatchingEngineIndexEndpoint object at 0x7f6e0ae3a7d0> 
resource name: projects/934903580331/locations/us-central1/indexEndpoints/1049611392061014016

In [37]:
my_index_endpoint.deployed_indexes

[]

In [38]:
my_index_endpoint.undeploy_all()

<google.cloud.aiplatform.matching_engine.matching_engine_index_endpoint.MatchingEngineIndexEndpoint object at 0x7f6e0ae3a7d0> 
resource name: projects/934903580331/locations/us-central1/indexEndpoints/1049611392061014016

In [39]:
my_index_endpoint.delete(force=True)

Deleting MatchingEngineIndexEndpoint : projects/934903580331/locations/us-central1/indexEndpoints/1049611392061014016
Delete MatchingEngineIndexEndpoint  backing LRO: projects/934903580331/locations/us-central1/operations/7749232264642297856
MatchingEngineIndexEndpoint deleted. . Resource name: projects/934903580331/locations/us-central1/indexEndpoints/1049611392061014016


## Vector Search (Matching Engine) Indexes

In [6]:
!gcloud ai indexes list \
  --project=$PROJECT_ID \
  --region=$LOCATION

Using endpoint [https://us-central1-aiplatform.googleapis.com/]
---
createTime: '2023-09-26T14:37:03.773840Z'
description: testing ann index for Merlin deployment
displayName: ann_index_pipe-v2-v1
encryptionSpec: {}
etag: AMEw9yNDevUG5nrLgk71-oji6rvQg0ym6H53FTXibd82ZhmCTc4I1fYH1Z3A3lJfhE75
indexStats:
  shardsCount: 1
  vectorsCount: '2243497'
indexUpdateMethod: BATCH_UPDATE
metadata:
  config:
    algorithmConfig:
      treeAhConfig:
        leafNodeEmbeddingCount: '500'
        leafNodesToSearchPercent: 7
    approximateNeighborsCount: 50
    dimensions: 128
    distanceMeasureType: DOT_PRODUCT_DISTANCE
    shardSize: SHARD_SIZE_MEDIUM
metadataSchemaUri: gs://google-cloud-aiplatform/schema/matchingengine/metadata/nearest_neighbor_search_1.0.0.yaml
name: projects/934903580331/locations/us-central1/indexes/93722371151626240
updateTime: '2023-09-26T15:52:37.566023Z'
---
createTime: '2023-09-26T14:36:58.716251Z'
description: testing bf index for Merlin deployment
displayName: bf_index_pi

In [28]:
INDEX_TO_DELETE = "projects/934903580331/locations/us-central1/indexes/5215441047378722816" # XXXX

my_index = vertex_ai.MatchingEngineIndex(index_name=INDEX_TO_DELETE)

my_index

<google.cloud.aiplatform.matching_engine.matching_engine_index.MatchingEngineIndex object at 0x7ff24be02510> 
resource name: projects/934903580331/locations/us-central1/indexes/5215441047378722816

In [29]:
# Delete indexes
my_index.delete()

Deleting MatchingEngineIndex : projects/934903580331/locations/us-central1/indexes/5215441047378722816
Delete MatchingEngineIndex  backing LRO: projects/934903580331/locations/us-central1/indexes/5215441047378722816/operations/9220730666327474176
MatchingEngineIndex deleted. . Resource name: projects/934903580331/locations/us-central1/indexes/5215441047378722816


In [30]:
!gcloud ai indexes list \
  --project=$PROJECT_ID \
  --region=$LOCATION

Using endpoint [https://us-central1-aiplatform.googleapis.com/]
---
createTime: '2023-09-25T16:46:07.946644Z'
deployedIndexes:
- deployedIndexId: deployedann_v1
  indexEndpoint: projects/934903580331/locations/us-central1/indexEndpoints/7571386602446913536
description: testing ann index for Merlin deployment
displayName: ann_index_v1-v1
encryptionSpec: {}
etag: AMEw9yPJQQ9wltdwLcsWPLKHZLPsvyNStp1RIj5mD_GgxxZdR_6vxlsxxx3swUmlsxY=
indexStats:
  shardsCount: 1
  vectorsCount: '2243058'
indexUpdateMethod: BATCH_UPDATE
metadata:
  config:
    algorithmConfig:
      treeAhConfig:
        leafNodeEmbeddingCount: '500'
        leafNodesToSearchPercent: 7
    approximateNeighborsCount: 50
    dimensions: 128
    distanceMeasureType: DOT_PRODUCT_DISTANCE
    shardSize: SHARD_SIZE_MEDIUM
metadataSchemaUri: gs://google-cloud-aiplatform/schema/matchingengine/metadata/nearest_neighbor_search_1.0.0.yaml
name: projects/934903580331/locations/us-central1/indexes/1713892337098162176
updateTime: '2023-09