# Create Matching Engine serving index

**TODO:**
* add section headers and instructions
* organize workflow

In [1]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
PROJECT_NUM = !gcloud projects list --filter="$PROJECT_ID" --format="value(PROJECT_NUMBER)"
PROJECT_NUM = PROJECT_NUM[0]
LOCATION = 'us-central1'

print(f"PROJECT_ID: {PROJECT_ID}")
print(f"PROJECT_NUM: {PROJECT_NUM}")
print(f"LOCATION: {LOCATION}")

PROJECT_ID: hybrid-vertex
PROJECT_NUM: 934903580331
LOCATION: us-central1


In [2]:
# gs://jt-merlin-scaling/test-e2e-pipe-v10/run-20230308-171530/model_dir

BUCKET = 'jt-merlin-scaling'
BUCKET_URI = f'gs://{BUCKET}'
EXPERIMENT_NAME = 'test-e2e-pipe-v10'
EXPERIMENT_RUN_NAME ='run-20230308-171530'

RUN_DIR_PATH = f'{EXPERIMENT_NAME}/{EXPERIMENT_RUN_NAME}'
RUN_DIR_GCS_PATH = f'{BUCKET_URI}/{RUN_DIR_PATH}'

VERSION = 'jtv1'

print(f"BUCKET_URI: {BUCKET_URI}")
print(f"EXPERIMENT_NAME: {EXPERIMENT_NAME}")
print(f"EXPERIMENT_RUN_NAME: {EXPERIMENT_RUN_NAME}")
print(f"RUN_DIR_GCS_PATH: {RUN_DIR_GCS_PATH}")

BUCKET_URI: gs://jt-merlin-scaling
EXPERIMENT_NAME: test-e2e-pipe-v10
EXPERIMENT_RUN_NAME: run-20230308-171530
RUN_DIR_GCS_PATH: gs://jt-merlin-scaling/test-e2e-pipe-v10/run-20230308-171530


In [3]:
import os
import sys
from google.cloud import aiplatform as vertex_ai

vertex_ai.init(project=PROJECT_ID, location=LOCATION)

In [8]:
! python3 -c "import google.cloud.aiplatform; print('aiplatform SDK version: {}'.format(google.cloud.aiplatform.__version__))"

aiplatform SDK version: 1.23.0


In [6]:
! gsutil ls $RUN_DIR_GCS_PATH/model_dir/candidate_embeddings

gs://jt-merlin-scaling/test-e2e-pipe-v10/run-20230308-171530/model_dir/candidate_embeddings/candidate_embeddings.json


In [7]:
EMBEDDINGS_INITIAL_URI = f'{RUN_DIR_GCS_PATH}/model_dir/candidate_embeddings/'

print(f"EMBEDDINGS_INITIAL_URI: {EMBEDDINGS_INITIAL_URI}")

EMBEDDINGS_INITIAL_URI: gs://jt-merlin-scaling/test-e2e-pipe-v10/run-20230308-171530/model_dir/candidate_embeddings/


## create ANN & Brute Force Indexes

In [9]:
# ANN index config
APPROX_NEIGHBORS=50
DISTANCE_MEASURE="DOT_PRODUCT_DISTANCE"
LEAF_NODE_EMB_COUNT=500
LEAF_NODES_SEARCH_PERCENT=7
DIMENSIONS = 128 # must match output dimensions

DATA_REGIME='full-65m'

ANN_DISPLAY_NAME = f"merlin_{DIMENSIONS}dim_{VERSION}"
BF_DISPLAY_NAME=f"{ANN_DISPLAY_NAME}_bf"

In [12]:
tree_ah_index = vertex_ai.MatchingEngineIndex.create_tree_ah_index(
    display_name=ANN_DISPLAY_NAME,
    contents_delta_uri=EMBEDDINGS_INITIAL_URI,
    dimensions=DIMENSIONS,
    approximate_neighbors_count=APPROX_NEIGHBORS,
    distance_measure_type=DISTANCE_MEASURE,
    leaf_node_embedding_count=LEAF_NODE_EMB_COUNT,
    leaf_nodes_to_search_percent=LEAF_NODES_SEARCH_PERCENT,
    description="merlin index - local",
    sync=False,
    labels={
        "experiment_name": f'{EXPERIMENT_NAME}',
        "experiment_run": f'{EXPERIMENT_RUN_NAME}',
        "data_regime": f'{DATA_REGIME}',
    },
)

Creating MatchingEngineIndex
Create MatchingEngineIndex backing LRO: projects/934903580331/locations/us-central1/indexes/8417482790253101056/operations/6046357171493404672


In [13]:
brute_force_index = vertex_ai.MatchingEngineIndex.create_brute_force_index(
    display_name=BF_DISPLAY_NAME,
    contents_delta_uri=EMBEDDINGS_INITIAL_URI,
    dimensions=DIMENSIONS,
    distance_measure_type=DISTANCE_MEASURE,
    sync=False,
    labels={
        "experiment_name": f'{EXPERIMENT_NAME}',
        "experiment_run": f'{EXPERIMENT_RUN_NAME}',
        "data_regime": f'{DATA_REGIME}',
    },
)

Creating MatchingEngineIndex
Create MatchingEngineIndex backing LRO: projects/934903580331/locations/us-central1/indexes/1905277729075363840/operations/6048608971307089920
MatchingEngineIndex created. Resource name: projects/934903580331/locations/us-central1/indexes/1905277729075363840
To use this MatchingEngineIndex in another session:
index = aiplatform.MatchingEngineIndex('projects/934903580331/locations/us-central1/indexes/1905277729075363840')
MatchingEngineIndex created. Resource name: projects/934903580331/locations/us-central1/indexes/8417482790253101056
To use this MatchingEngineIndex in another session:
index = aiplatform.MatchingEngineIndex('projects/934903580331/locations/us-central1/indexes/8417482790253101056')


## Create Matching Engine endpoint(s)
* both the ANN and brute force indices can be deployed to a single endpoint
* alternatively, we can create seperate endpoints, one for each index

### index endpoint config:

In [14]:
VPC_NETWORK = "ucaip-haystack-vpc-network" # TODO: update this

VPC_NETWORK_FULL = f"projects/{PROJECT_NUM}/global/networks/{VPC_NETWORK}"

ANN_ENDPOINT_DISPLAY_NAME = f'{ANN_DISPLAY_NAME}_endpoint'

BF_ENDPOINT_DISPLAY_NAME = f'{BF_DISPLAY_NAME}_endpoint'

print(f"VPC_NETWORK_FULL: {VPC_NETWORK_FULL}")
print(f"ANN_ENDPOINT_DISPLAY_NAME: {ANN_ENDPOINT_DISPLAY_NAME}")
print(f"BF_ENDPOINT_DISPLAY_NAME: {BF_ENDPOINT_DISPLAY_NAME}")

VPC_NETWORK_FULL: projects/934903580331/global/networks/ucaip-haystack-vpc-network
ANN_ENDPOINT_DISPLAY_NAME: merlin_128dim_jtv1_endpoint
BF_ENDPOINT_DISPLAY_NAME: merlin_128dim_jtv1_bf_endpoint


In [15]:
my_ann_index_endpoint = vertex_ai.MatchingEngineIndexEndpoint.create(
    display_name=f'{ANN_ENDPOINT_DISPLAY_NAME}',
    description="index endpoint for ANN index",
    network=VPC_NETWORK_FULL,
    sync=False,
)

# to use existing
# my_ann_index_endpoint = aiplatform.MatchingEngineIndexEndpoint('projects/934903580331/locations/us-central1/indexEndpoints/8097410557360996352')

Creating MatchingEngineIndexEndpoint
Create MatchingEngineIndexEndpoint backing LRO: projects/934903580331/locations/us-central1/indexEndpoints/8956225895677296640/operations/1414967904696270848


In [16]:
my_bf_index_endpoint = vertex_ai.MatchingEngineIndexEndpoint.create(
    display_name=f'{BF_ENDPOINT_DISPLAY_NAME}',
    description="index endpoint for ANN index",
    network=VPC_NETWORK_FULL,
    sync=False,
)
# to use existing
# my_bf_index_endpoint = aiplatform.MatchingEngineIndexEndpoint('projects/934903580331/locations/us-central1/indexEndpoints/1972515064137121792')

Creating MatchingEngineIndexEndpoint
Create MatchingEngineIndexEndpoint backing LRO: projects/934903580331/locations/us-central1/indexEndpoints/6149357427918635008/operations/5549272362622386176
MatchingEngineIndexEndpoint created. Resource name: projects/934903580331/locations/us-central1/indexEndpoints/6149357427918635008
To use this MatchingEngineIndexEndpoint in another session:
index_endpoint = aiplatform.MatchingEngineIndexEndpoint('projects/934903580331/locations/us-central1/indexEndpoints/6149357427918635008')
MatchingEngineIndexEndpoint created. Resource name: projects/934903580331/locations/us-central1/indexEndpoints/8956225895677296640
To use this MatchingEngineIndexEndpoint in another session:
index_endpoint = aiplatform.MatchingEngineIndexEndpoint('projects/934903580331/locations/us-central1/indexEndpoints/8956225895677296640')


In [17]:
ANN_INDEX_ENDPOINT_NAME = my_ann_index_endpoint.resource_name
BF_INDEX_ENDPOINT_NAME = my_bf_index_endpoint.resource_name

print(f"ANN_INDEX_ENDPOINT_NAME: {ANN_INDEX_ENDPOINT_NAME}")
print(f"BF_INDEX_ENDPOINT_NAME: {BF_INDEX_ENDPOINT_NAME}")

ANN_INDEX_ENDPOINT_NAME: projects/934903580331/locations/us-central1/indexEndpoints/8956225895677296640
BF_INDEX_ENDPOINT_NAME: projects/934903580331/locations/us-central1/indexEndpoints/6149357427918635008


## Deploy Indexes to endpoints

In [None]:
# !gcloud ai indexes list \
#   --project=$PROJECT_ID \
#   --region=$LOCATION

In [18]:
# get index resource names
tree_ah_resource_name = tree_ah_index.resource_name
brute_force_index_resource_name = brute_force_index.resource_name

# if neededing to create index obj in session
# tree_ah_resource_name = f'projects/{PROJECT_NUM}/locations/us-central1/indexes/8930963516517515264'
# brute_force_index_resource_name = f'projects/{PROJECT_NUM}/locations/us-central1/indexes/8006881167976431616'

tree_ah_index = vertex_ai.MatchingEngineIndex(index_name=tree_ah_resource_name)
brute_force_index = vertex_ai.MatchingEngineIndex(index_name=brute_force_index_resource_name)

In [19]:
tree_ah_index.display_name

'merlin_128dim_jtv1'

In [21]:
ANN_INDEX_NAME = tree_ah_index.resource_name
BF_INDEX_NAME = brute_force_index.resource_name

print(f"ANN_INDEX_NAME: {ANN_INDEX_NAME}")
print(f"BF_INDEX_NAME: {BF_INDEX_NAME}")

DEPLOYED_ANN_INDEX_ID = f"deployed_{ANN_DISPLAY_NAME}"
DEPLOYED_BF_INDEX_ID = f"deployed_{BF_DISPLAY_NAME}"

print(f"DEPLOYED_ANN_INDEX_ID: {DEPLOYED_ANN_INDEX_ID}")
print(f"DEPLOYED_BF_INDEX_ID: {DEPLOYED_BF_INDEX_ID}")

ANN_INDEX_NAME: projects/934903580331/locations/us-central1/indexes/8417482790253101056
BF_INDEX_NAME: projects/934903580331/locations/us-central1/indexes/1905277729075363840
DEPLOYED_ANN_INDEX_ID: deployed_merlin_128dim_jtv1
DEPLOYED_BF_INDEX_ID: deployed_merlin_128dim_jtv1_bf


### Deploy ANN index

In [22]:
deployed_ann_index = my_ann_index_endpoint.deploy_index(
    index=tree_ah_index, 
    deployed_index_id=DEPLOYED_ANN_INDEX_ID
)
deployed_ann_index.deployed_indexes

Deploying index MatchingEngineIndexEndpoint index_endpoint: projects/934903580331/locations/us-central1/indexEndpoints/8956225895677296640
Deploy index MatchingEngineIndexEndpoint index_endpoint backing LRO: projects/934903580331/locations/us-central1/indexEndpoints/8956225895677296640/operations/782775107004137472
MatchingEngineIndexEndpoint index_endpoint Deployed index. Resource name: projects/934903580331/locations/us-central1/indexEndpoints/8956225895677296640


[id: "deployed_merlin_128dim_jtv1"
index: "projects/934903580331/locations/us-central1/indexes/8417482790253101056"
create_time {
  seconds: 1679379546
  nanos: 893955000
}
private_endpoints {
  match_grpc_address: "10.41.2.5"
}
index_sync_time {
  seconds: 1679379762
  nanos: 554035000
}
automatic_resources {
  min_replica_count: 2
  max_replica_count: 2
}
deployment_group: "default"
]

### Deploy Bruteforce index

In [23]:
deployed_bf_index = my_bf_index_endpoint.deploy_index(
    index=brute_force_index, 
    deployed_index_id=DEPLOYED_BF_INDEX_ID
)
deployed_bf_index.deployed_indexes

Deploying index MatchingEngineIndexEndpoint index_endpoint: projects/934903580331/locations/us-central1/indexEndpoints/6149357427918635008
Deploy index MatchingEngineIndexEndpoint index_endpoint backing LRO: projects/934903580331/locations/us-central1/indexEndpoints/6149357427918635008/operations/6472510286233337856
MatchingEngineIndexEndpoint index_endpoint Deployed index. Resource name: projects/934903580331/locations/us-central1/indexEndpoints/6149357427918635008


[id: "deployed_merlin_128dim_jtv1_bf"
index: "projects/934903580331/locations/us-central1/indexes/1905277729075363840"
create_time {
  seconds: 1679380267
  nanos: 211308000
}
private_endpoints {
  match_grpc_address: "10.41.2.5"
}
index_sync_time {
  seconds: 1679380482
  nanos: 199078000
}
automatic_resources {
  min_replica_count: 2
  max_replica_count: 2
}
deployment_group: "default"
]

## Test: deployed query_model --> Matching Engine

### define deployed model endpoint

In [39]:
ENDPOINT_URI = 'projects/934903580331/locations/us-central1/endpoints/714467053775355904'
endpoint = vertex_ai.Endpoint(ENDPOINT_URI)

### get test instance

In [40]:
import pickle as pkl

filehandler = open('merlin_last5_test_instance.pkl', 'rb')
TEST_INSTANCE = pkl.load(filehandler)
filehandler.close()

TEST_INSTANCE

{'collaborative': 'false',
 'album_name_pl': ["There's Really A Wolf",
  'Late Nights: The Album',
  'American Teen',
  'Crazy In Love',
  'Pony'],
 'artist_genres_pl': ["'hawaiian hip hop', 'rap'",
  "'chicago rap', 'dance pop', 'pop', 'pop rap', 'r&b', 'southern hip hop', 'trap', 'urban contemporary'",
  "'pop', 'pop r&b'",
  "'dance pop', 'pop', 'r&b'",
  "'chill r&b', 'pop', 'pop r&b', 'r&b', 'urban contemporary'"],
 'artist_name_pl': ['Russ', 'Jeremih', 'Khalid', 'BeyoncÃ©', 'William Singe'],
 'artist_pop_can': 82.0,
 'description_pl': '',
 'duration_ms_songs_pl': [237506.0, 217200.0, 219080.0, 226400.0, 121739.0],
 'n_songs_pl': 8.0,
 'name': 'Lit Tunes ',
 'num_albums_pl': 8.0,
 'num_artists_pl': 8.0,
 'track_name_pl': ['Losin Control',
  'Paradise',
  'Location',
  'Crazy In Love - Remix',
  'Pony'],
 'track_pop_pl': [79.0, 58.0, 83.0, 71.0, 57.0],
 'duration_ms_seed_pl': 51023.1,
 'pid': 1,
 'track_uri_pl': ['spotify:track:4cxMGhkinTocPSVVKWIw0d',
  'spotify:track:1wNEBPo3nsbG

### get test instance embeddings

In [42]:
import time

start = time.time()

playlist_emb = endpoint.predict(instances=[TEST_INSTANCE])

print(f"query conversion: {round((time.time() - start),4)} seconds")
print(f"Vector Dimensions: {len(playlist_emb.predictions[0])}\n")

print(f"embeddings: {playlist_emb.predictions}")

query conversion: 0.5507 seconds
Vector Dimensions: 128

embeddings: [[0.04306147247552872, 0.0, 0.1541330963373184, 0.02622287534177303, 0.03811377659440041, 0.01116169989109039, 0.0, 0.0, 0.04474653676152229, 0.0, 0.0, 0.0, 0.02140465378761292, 0.0, 0.0, 0.0, 0.001586958067491651, 0.0, 0.0222462136298418, 0.0830300822854042, 0.01280351355671883, 0.02744066342711449, 0.03303935006260872, 0.0, 0.0, 0.05789442732930183, 0.0, 0.0, 0.0, 0.02518848516047001, 0.1041594371199608, 0.1695009768009186, 0.01560135744512081, 0.0, 0.0, 0.0, 0.08916763216257095, 0.0, 0.0, 0.02710841223597527, 0.0, 0.04701695591211319, 0.0, 0.02980164624750614, 0.117186039686203, 0.0, 0.0, 0.0, 0.0, 0.07672543078660965, 0.0794282853603363, 0.0142751494422555, 0.0, 0.01396751310676336, 0.0, 0.01476635225117207, 0.005098144058138132, 0.007774507626891136, 0.0, 0.0, 0.004853374324738979, 0.0, 0.0, 0.02901207469403744, 0.01740282960236073, 0.04868501424789429, 0.1304978728294373, 0.0, 0.0, 0.0, 0.03922000154852867, 0.00

### retrieve nearest neighbors

#### calculate retrieval latencies

In [43]:
%%timeit

ANN_response = deployed_ann_index.match(
    deployed_index_id=DEPLOYED_ANN_INDEX_ID,
    queries=playlist_emb.predictions,
    # queries=test_instance,
    num_neighbors=50
)
# ANN_response

6.41 ms ± 38.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [45]:
%%timeit 
BF_response = deployed_bf_index.match(
    deployed_index_id=DEPLOYED_BF_INDEX_ID,
    queries=playlist_emb.predictions,
    # queries=test_instance,
    num_neighbors=50
)
# BF_response

77.2 ms ± 1.81 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


#### get nearest neighbors from each index

> Calculate recall by determining how many neighbors were correctly retrieved as compared to the brute-force index

In [44]:
ANN_response = deployed_ann_index.match(
    deployed_index_id=DEPLOYED_ANN_INDEX_ID,
    queries=playlist_emb.predictions,
    # queries=test_instance,
    num_neighbors=50
)
# ANN_response

In [46]:
BF_response = deployed_bf_index.match(
    deployed_index_id=DEPLOYED_BF_INDEX_ID,
    queries=playlist_emb.predictions,
    # queries=test_instance,
    num_neighbors=50
)
# BF_response

### compute recall

In [47]:
recalled_neighbors = 0

for tree_ah_neighbors, brute_force_neighbors in zip(
    ANN_response, BF_response
):
    tree_ah_neighbor_ids = [neighbor.id for neighbor in tree_ah_neighbors]
    brute_force_neighbor_ids = [neighbor.id for neighbor in brute_force_neighbors]

    recalled_neighbors += len(
        set(tree_ah_neighbor_ids).intersection(brute_force_neighbor_ids)
    )

recall = recalled_neighbors / len(
    [neighbor for neighbors in BF_response for neighbor in neighbors]
)

print("Recall: {}".format(recall))

Recall: 0.96
