In [None]:
#plan

'''
1. Install dependancies
2. get data from bq. 
3. Create two tower model.
4. deploy query model to endpoint
5. upload candidate model for batch prediction with change signature that output id,encoding format.
6. do batch prediction and get all embeddings for candidate and tranform that compatible with matching engine format id, embedding.
7.save it to GCS in json format.
8. create and deploy vertex ai index endpoint.
9. get the user_id embedding using deployed query endpoint.
10. feed it to index endpoint to get recommendations. 
'''

In [2]:
PROJECT_ID = "tokyo-country-452614-f7"

!gcloud config set project {PROJECT_ID}

Updated property [core/project].


In [3]:
from google.cloud import aiplatform
import tensorflow as tf
import pandas as pd
import numpy as np
import tensorflow_recommenders as tfrs
from google.cloud import bigquery

2025-03-31 11:35:35.986584: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-31 11:35:36.733689: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-31 11:35:36.733802: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-31 11:35:36.891477: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-31 11:35:37.212253: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-31 11:35:37.217373: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [4]:
REGION = "us-central1"
DETASET = "book_database"
TABLE = "pairs_2"

Load Data into Python

In [6]:
client = bigquery.Client()

query = """
SELECT User_ID,ISBN,Book_Title,Book_Author,Year_Of_Publication FROM `tokyo-country-452614-f7.book_database.pairs_2`
"""

df = client.query(query).to_dataframe()


In [7]:
df.head()

Unnamed: 0,User_ID,ISBN,Book_Title,Book_Author,Year_Of_Publication
0,75081,553204467,Yesterday Today and Forever,Jeane Dixon,1976
1,75081,876040628,Meditation: Gateway to Light,Elsie Sechrist,1972
2,75081,60905204,The Findhorn Garden,Findhorn Community,1976
3,75081,316380318,The Best Short Stories by Black Writers : 1899...,Langston Hughes,1969
4,38357,140036954,Strange meeting,Susan Hill,1974


In [8]:
print(df.isnull().values.any())  #No Null Values present in dataframe

False


In [9]:
print(df.dtypes)

User_ID                object
ISBN                   object
Book_Title             object
Book_Author            object
Year_Of_Publication    object
dtype: object


In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 164015 entries, 0 to 164014
Data columns (total 5 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   User_ID              164015 non-null  object
 1   ISBN                 164015 non-null  object
 2   Book_Title           164015 non-null  object
 3   Book_Author          164015 non-null  object
 4   Year_Of_Publication  164015 non-null  object
dtypes: object(5)
memory usage: 6.3+ MB


Ensure all columns are string

In [12]:
df["User_ID"] = df["User_ID"].astype(str)
df["ISBN"] = df["ISBN"].astype(str)
df["Book_Title"] = df["Book_Title"].astype(str)
df["Book_Author"] = df["Book_Author"].astype(str)
df["Year_Of_Publication"] = df["Year_Of_Publication"].astype(str)

Create Vocabularies

In [17]:
user_ids = sorted(df["User_ID"].unique())
book_ids = sorted(df["ISBN"].unique())
title_vocab = sorted(df["Book_Title"].unique())
author_vocab = sorted(df["Book_Author"].unique())
year_vocab = sorted(df["Year_Of_Publication"].unique())

#Define Embedding Dimension (Hyperparameter)

In [35]:
embedding_dimension = 64

User Model

In [60]:
class UserModel(tf.keras.Model):
    def __init__(self, user_ids):
        super().__init__()
        self.lookup_layer = tf.keras.layers.StringLookup(vocabulary=user_ids, mask_token=None)
        self.embedding = tf.keras.layers.Embedding(len(user_ids) + 1, embedding_dimension)
        self.dense = tf.keras.layers.Dense(embedding_dimension,activation="relu")
        
    def call(self,inputs):
        user_index = self.lookup_layer(inputs) 
        user_embeddings = self.embedding(user_index)
        return self.dense(user_embeddings)

Book Model

In [61]:
class BookModel(tf.keras.Model):
    def __init__(self,book_ids, title_vocab, author_vocab, year_vocab):
        super().__init__()
        
        self.book_lookup = tf.keras.layers.StringLookup(vocabulary=book_ids, mask_token=None)
        self.title_lookup = tf.keras.layers.StringLookup(vocabulary=title_vocab, mask_token=None)
        self.author_lookup = tf.keras.layers.StringLookup(vocabulary=author_vocab, mask_token=None)
        self.year_lookup = tf.keras.layers.StringLookup(vocabulary=year_vocab, mask_token=None)

        self.book_embedding = tf.keras.layers.Embedding(len(book_ids)+1, embedding_dimension)
        self.title_embedding = tf.keras.layers.Embedding(len(title_vocab)+1, embedding_dimension)
        self.author_embedding = tf.keras.layers.Embedding(len(author_vocab)+1, embedding_dimension)
        self.year_embedding = tf.keras.layers.Embedding(len(year_vocab)+1, embedding_dimension)
        self.dense = tf.keras.layers.Dense(embedding_dimension,activation = "relu")
    
    def call(self,inputs):
        book_id = inputs["ISBN"]
        book_index = self.book_lookup(inputs["ISBN"])
        title_index = self.title_lookup(inputs["Book_Title"])
        author_index = self.author_lookup(inputs["Book_Author"])
        year_index = self.year_lookup(inputs["Year_Of_Publication"])
        
        book_embed = self.book_embedding(book_index)
        title_embed = self.book_embedding(title_index)
        author_embed = self.book_embedding(author_index)
        year_embed = self.book_embedding(year_index)
        combined_embed = tf.concat([book_embed,title_embed,author_embed,year_embed],axis=1)
        final_embed = self.dense(combined_embed)
        return {"id" : book_id, "embedding" : final_embed}
        

In [None]:
Two Tower Model

In [62]:
class TwoTowerModel(tfrs.models.Model):
    def __init__(self, user_ids, book_ids, title_vocab, author_vocab, year_vocab):
        super().__init__()
        self.user_model = UserModel(user_ids)
        self.book_model = BookModel(book_ids, title_vocab, author_vocab, year_vocab)
        self.task = tfrs.tasks.Retrieval()
        
    def compute_loss(self,features,training=False):
        user_embeddings = self.user_model(features["User_ID"])
        book_output = self.book_model({
            "ISBN" : features["ISBN"],
            "Book_Title" : features["Book_Title"],
            "Book_Author" : features["Book_Author"],
            "Year_Of_Publication" : features["Year_Of_Publication"]
        })
        
        book_embeddings = book_output["embedding"]
        
        return self.task(user_embeddings,book_embeddings)
        

In [63]:
tf_data = (
    tf.data.Dataset.from_tensor_slices({
        "User_ID" : df["User_ID"].values,
        "ISBN" : df["ISBN"].values,
        "Book_Title" : df["Book_Title"].values,
        "Book_Author" : df["Book_Author"].values,
        "Year_Of_Publication" : df["Year_Of_Publication"].values
    })
    .shuffle(buffer_size=10000)
    .batch(256)
    .prefetch(tf.data.AUTOTUNE)

)

In [64]:
model = TwoTowerModel(user_ids,book_ids,title_vocab, author_vocab, year_vocab)
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.001))
model.fit(tf_data,epochs=5)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x7fe238667af0>

In [65]:
query_model = model.user_model
candidate_model = model.book_model

In [58]:
mkdir books

In [66]:
query_model.save("books/query_model")
candidate_model.save("books/candidate_model")

INFO:tensorflow:Assets written to: books/query_model/assets


INFO:tensorflow:Assets written to: books/query_model/assets














INFO:tensorflow:Assets written to: books/candidate_model/assets


INFO:tensorflow:Assets written to: books/candidate_model/assets


In [8]:
loaded_query_model = tf.saved_model.load("books/query_model")
loaded_candidate_model = tf.saved_model.load("books/candidate_model")

In [13]:
print(loaded_query_model.signatures)

_SignatureMap({'serving_default': <ConcreteFunction (*, input_1: TensorSpec(shape=(None,), dtype=tf.string, name='input_1')) -> Dict[['output_1', TensorSpec(shape=(None, 64), dtype=tf.float32, name='output_1')]] at 0x7F0AEA0ACB80>})


In [14]:
print(loaded_candidate_model.signatures)

_SignatureMap({'serving_default': <ConcreteFunction (*, Book_Author: TensorSpec(shape=(None,), dtype=tf.string, name='book_author'), Book_Title: TensorSpec(shape=(None,), dtype=tf.string, name='book_title'), ISBN: TensorSpec(shape=(None,), dtype=tf.string, name='isbn'), Year_Of_Publication: TensorSpec(shape=(None,), dtype=tf.string, name='year_of_publication')) -> Dict[['embedding', TensorSpec(shape=(None, 64), dtype=tf.float32, name='embedding')], ['id', TensorSpec(shape=(None,), dtype=tf.string, name='id')]] at 0x7F0AE86FC220>})


In [15]:
test_user_id = tf.constant(["42"])

user_embedding_ot = loaded_query_model(test_user_id)
print(user_embedding_ot)

tf.Tensor(
[[0.26124555 0.23750721 0.72505647 0.3894252  0.23232184 0.50466865
  0.5880144  0.5964332  0.49082732 0.4587138  0.4789254  0.4529028
  0.5956879  0.17543778 0.6506428  0.2970649  0.18927327 0.500794
  0.61343503 0.37366652 0.5785155  0.39873856 0.5460372  0.49394974
  0.32079205 0.29255486 0.3975669  0.5584178  0.510144   0.31482852
  0.46396828 0.41620603 0.6453707  0.42953345 0.42790645 0.49637657
  0.47070307 0.0468829  0.26100796 0.20228162 0.41510168 0.6292526
  0.6452517  0.5317269  0.15060513 0.35874826 0.38904995 0.53180027
  0.40575382 0.5617276  0.4925018  0.41196015 0.5534923  0.49143255
  0.29370987 0.20485757 0.31414306 0.48874715 0.43116254 0.246707
  0.4087636  0.45340723 0.49837047 0.6135304 ]], shape=(1, 64), dtype=float32)


In [16]:
test_book_ip = {
    "ISBN" : tf.constant(["0330293907"]),
    "Book_Title" : tf.constant(["Book of Travellers Tales"]),
    "Book_Author" : tf.constant(["Eric Newby"]),
    "Year_Of_Publication" : tf.constant(["1985"]),
}

book_embedding_ot = loaded_candidate_model(test_book_ip)

print(book_embedding_ot["id"])
print(book_embedding_ot["embedding"])

tf.Tensor([b'0330293907'], shape=(1,), dtype=string)
tf.Tensor(
[[1.2270631  0.73145807 2.195582   1.292615   1.1155738  1.5110795
  2.0170941  2.09883    1.9978099  1.1053503  1.0045266  1.1028476
  1.2476037  1.1757245  1.1957431  0.92618465 0.77474535 0.7898278
  0.7028116  1.057468   1.8146853  0.69048387 1.0737748  0.5594988
  1.291427   1.1648805  1.4420921  1.7755616  1.8319212  0.8528392
  0.71825016 0.83830404 1.5096476  1.6935405  1.2154536  1.50477
  1.1262057  0.7687188  1.3263558  0.1250931  0.91325074 2.3440735
  1.0618215  1.3831617  0.70299715 0.6843505  1.2896416  0.8580639
  0.83263296 0.84633994 1.7370698  1.7622304  1.6075944  0.89473784
  1.0568725  1.5099087  0.5255393  1.6575549  0.45758986 0.71861446
  0.6356787  1.0942835  1.2138654  1.1128423 ]], shape=(1, 64), dtype=float32)


Upload & Deploy Model for batch and online prediction

In [17]:
!gcloud services enable aiplatform.googleapis.com 

1. upload query and candidate model to GCS Bucket

In [22]:
BUCKET_NAME = "gs://book-recsys-bucket"

QUERY_MODEL = "books/query_model"
CANDIDATE_MODEL = "books/candidate_model"
serving_image = "us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-11:latest"

In [20]:
aiplatform.init(project = PROJECT_ID, location=REGION, staging_bucket = BUCKET_NAME)

In [21]:
def upload_model(model_name,model_path,serving_container_image):
    
    model = aiplatform.Model.upload(
        display_name = model_name,
        artifact_uri = model_path,
        serving_container_image_uri = serving_container_image,
    )
    
    return model

In [23]:
query_model = upload_model("query_model",QUERY_MODEL,serving_image)

Creating Model
Create Model backing LRO: projects/738666983022/locations/us-central1/models/2790645173690826752/operations/4638816570921975808
Model created. Resource name: projects/738666983022/locations/us-central1/models/2790645173690826752@1
To use this Model in another session:
model = aiplatform.Model('projects/738666983022/locations/us-central1/models/2790645173690826752@1')


In [24]:
candidate_model = upload_model("candidate_model",CANDIDATE_MODEL,serving_image)

Creating Model
Create Model backing LRO: projects/738666983022/locations/us-central1/models/7149003713078624256/operations/3836964731017494528
Model created. Resource name: projects/738666983022/locations/us-central1/models/7149003713078624256@1
To use this Model in another session:
model = aiplatform.Model('projects/738666983022/locations/us-central1/models/7149003713078624256@1')


In [25]:
endpoint = aiplatform.Endpoint.create(display_name = "query_model_endpoint")

Creating Endpoint
Create Endpoint backing LRO: projects/738666983022/locations/us-central1/endpoints/7454131383925997568/operations/8926243416178688000
Endpoint created. Resource name: projects/738666983022/locations/us-central1/endpoints/7454131383925997568
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/738666983022/locations/us-central1/endpoints/7454131383925997568')


In [26]:
query_model.deploy(endpoint = endpoint,
                  machine_type = "n1-standard-4",
                  traffic_percentage=100,
                  deployed_model_display_name = "query_model_deployment")

Deploying model to Endpoint : projects/738666983022/locations/us-central1/endpoints/7454131383925997568
Deploy Endpoint model backing LRO: projects/738666983022/locations/us-central1/endpoints/7454131383925997568/operations/7372501544735866880
Endpoint model deployed. Resource name: projects/738666983022/locations/us-central1/endpoints/7454131383925997568


<google.cloud.aiplatform.models.Endpoint object at 0x7f0b76e40d60> 
resource name: projects/738666983022/locations/us-central1/endpoints/7454131383925997568

In [27]:
print(f"Query Model deployed to endpoint: {endpoint.resource_name}")

Query Model deployed to endpoint: projects/738666983022/locations/us-central1/endpoints/7454131383925997568


In [None]:
#now get data from bigquery table and submit for batch prediction

In [32]:
GCS_OUTPUT_PREFIX = "gs://book-recsys-bucket/batch-outputs/"
bigquery_source = "bq://tokyo-country-452614-f7.book_database.books_2"


In [38]:
from google.cloud import bigquery, storage
import json
import decimal


BUCKET_NAME = "book-recsys-bucket"
GCS_FILE_PATH = "batch_inputs/books_data.jsonl"  # Path in GCS

# Initialize BigQuery Client
bq_client = bigquery.Client(project=PROJECT_ID)

# Define BigQuery SQL Query
query = """
SELECT 
    book_id AS ISBN, 
    book_title AS Book_Title, 
    book_author AS Book_Author, 
    book_year AS Year_Of_Publication
FROM `tokyo-country-452614-f7.book_database.books`
"""

# Run Query
query_job = bq_client.query(query)
rows = query_job.result()

# Function to handle Decimal serialization
def convert_types(obj):
    if isinstance(obj, decimal.Decimal):
        return str(obj)  # Convert Decimal to Float
    return obj

# Convert Query Results to JSONL Format
jsonl_data = [json.dumps({k: convert_types(v) for k, v in dict(row).items()}) for row in rows]
jsonl_string = "\n".join(jsonl_data)  # Convert list to JSONL format

# Initialize GCS Client
storage_client = storage.Client()
bucket = storage_client.bucket(BUCKET_NAME)
blob = bucket.blob(GCS_FILE_PATH)

# Upload JSONL File to GCS
blob.upload_from_string(jsonl_string, content_type="application/json")

print(f"Data successfully uploaded to GCS: gs://{BUCKET_NAME}/{GCS_FILE_PATH}")


Data successfully uploaded to GCS: gs://book-recsys-bucket/batch_inputs/books_data.jsonl


In [39]:
batch_prediction_job = aiplatform.BatchPredictionJob.create(
    job_display_name = "book_recsys_predictions",
    model_name = candidate_model.resource_name,
    gcs_source = "gs://book-recsys-bucket/batch_inputs/books_data.jsonl",
    gcs_destination_prefix = GCS_OUTPUT_PREFIX,
    instances_format = "jsonl",
    predictions_format = "jsonl",
    machine_type = "n1-standard-4",
)

Creating BatchPredictionJob
BatchPredictionJob created. Resource name: projects/738666983022/locations/us-central1/batchPredictionJobs/2293920746935681024
To use this BatchPredictionJob in another session:
bpj = aiplatform.BatchPredictionJob('projects/738666983022/locations/us-central1/batchPredictionJobs/2293920746935681024')
View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/2293920746935681024?project=738666983022
BatchPredictionJob projects/738666983022/locations/us-central1/batchPredictionJobs/2293920746935681024 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/738666983022/locations/us-central1/batchPredictionJobs/2293920746935681024 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/738666983022/locations/us-central1/batchPredictionJobs/2293920746935681024 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/738666983022/locations/us-central1/batchPredictionJobs/

Coveret batch output to matching engine index compatible format

In [41]:
!pip install -q jsonlines


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [50]:
import jsonlines

storage_client = storage.Client()
batch_bucket = storage_client.bucket(BUCKET_NAME)

embeddings = []
prefix = "batch-outputs/prediction-candidate_model-2025_03_31T08_08_37_420Z/prediction.results"

blobs = batch_bucket.list_blobs(prefix=prefix)

for blob in blobs:
    file_jsonl = blob.download_as_string()
    for line in file_jsonl.splitlines():
        candidate = json.loads(line)
        embeddings.append({"id":candidate['prediction']['id'],"embedding":candidate['prediction']['embedding']})
print(len(embeddings))

with jsonlines.open('./embeddings.json','w') as writer:
    writer.write_all(embeddings)
    
storage_client = storage.Client()
bucket = storage_client.bucket(BUCKET_NAME)
blob = bucket.blob('embeddings/embeddings.json')

blob.upload_from_filename('./embeddings.json')

271356


In [None]:
#create matching engine index

In [54]:
from google.cloud import aiplatform_v1beta1
from google.protobuf import struct_pb2
import time
import os.path

index_name = "book_recommendation"
ENDPOINT = "{}-aiplatform.googleapis.com".format(REGION)
PARENT = "projects/{}/locations/{}".format(PROJECT_ID,REGION)

index_client = aiplatform_v1beta1.IndexServiceClient(client_options = dict(api_endpoint = ENDPOINT))

def create_index():
    
    print("creating index...")
    
    treeAhConfig = struct_pb2.Struct(fields = {})
    
    algorithmconfig = struct_pb2.Struct(
        fields = {"treeAhConfig" : struct_pb2.Value(struct_value = treeAhConfig)}
    )
    
    config = struct_pb2.Struct(
        fields = {"dimensions" : struct_pb2.Value(number_value=64),
                 "algorithmConfig" : struct_pb2.Value(struct_value = algorithmconfig),
                 "approximateNeighborsCount" : struct_pb2.Value(number_value = 150),}
    )
    
    metadata = struct_pb2.Struct(
        fields = {"config" : struct_pb2.Value(struct_value=config),
                 "contentsDeltaUri" : struct_pb2.Value(string_value = "gs://" + BUCKET_NAME + "/embeddings"),
                 "isCompleteOverwrite" : struct_pb2.Value(bool_value=True)}
    )
    
    ann_index = {"display_name" : "book_recommendation",
                "description" : "recommends book based on DL and two tower model",
                "metadata" : struct_pb2.Value(struct_value=metadata),
                }
    
    print("creating index with configs....")
    ann_index = index_client.create_index(parent=PARENT,index=ann_index)
    
    while not ann_index.done():
        print("creating...")
        time.sleep(120)
    return ann_index.result().name
    
    
def update_index(name):
    
    print("update index if already exists...")
    
    metadata = struct_pb2.Struct(
        fields = {"contentsDeltaUri" : struct_pb2.Value(string_value = "gs://" + BUCKET_NAME + "/embeddings"),
                 "isCompleteOverwrite" : struct_pb2.Value(bool_value=True)
                 })
    
    ann_index = {"name" : name, "metadata" : struct_pb2.Value(struct_value = metadata),}
    
    index_update = index_client.update_index(index = ann_index)
    


indexes = index_client.list_indexes(parent=PARENT)
type(indexes)

index_resource_name = None
index_existing = False

for index in indexes:
    if index.display_name == index.name:
        index_existing = True
        index_resource_name = index.name
        break
        
if index_existing:
    update_index(index_resource_name)
    
else:
    index_resource_name = create_index()
    
print("created index is : " + index_resource_name)

creating index...
creating index with configs....
creating...
creating...
creating...
creating...
creating...
creating...
creating...
creating...
creating...
creating...
creating...
creating...
created index is : projects/738666983022/locations/us-central1/indexes/3600207888640901120


In [None]:
#create matching engine index endpoint

In [55]:
NETWORK_NAME = "my-first-vpc-network"
PROJECT_NUMBER = "738666983022"
VPC_NETWORK_NAME = "projects/{}/global/networks/{}".format(PROJECT_NUMBER, NETWORK_NAME)
print(VPC_NETWORK_NAME)

projects/738666983022/global/networks/my-first-vpc-network


In [56]:
endpoint_name = "index_endpoint_book_recommendation"

index_endpoint_client = aiplatform_v1beta1.IndexEndpointServiceClient(client_options = dict(api_endpoint=ENDPOINT))

request = aiplatform_v1beta1.ListIndexEndpointsRequest(parent=PARENT,)

endpoints = index_endpoint_client.list_index_endpoints(request=request)
endpoint_existing = False
INDEX_ENDPOINT_NAME = None

for endpoint in endpoints:
    if endpoint.display_name == endpoint_name:
        endpoint_existing = True
        INDEX_ENDPOINT_NAME = endpoint.name
        break
        
if not endpoint_existing:
    index_endpoint  = {
        "display_name" : endpoint_name,
        "network" : VPC_NETWORK_NAME,
    }
    
    r = index_endpoint_client.create_index_endpoint(
        parent = PARENT,
        index_endpoint = index_endpoint
    )
    
    r.result()
    
    INDEX_ENDPOINT_NAME = r.result().name
    
print(INDEX_ENDPOINT_NAME)


projects/738666983022/locations/us-central1/indexEndpoints/1890663020518965248


In [None]:
#deploy matching engine index on endpoint

In [59]:
index = index_resource_name
index_endpoint = INDEX_ENDPOINT_NAME
index_deployed = False

for endpoint in endpoints:
    if endpoint.display_name == endpoint_name:
        if len(endpoint.deployed_indexes) > 0:
            index_deployed = True
            break
    
    
if not index_deployed:
    DEPLOYED_INDEX_ID = "books_deployed"
    deploy_ann_index = {
        "id" : DEPLOYED_INDEX_ID,
        "display_name" : DEPLOYED_INDEX_ID,
        "index" : index,
    }
    
    r = index_endpoint_client.deploy_index(index_endpoint = index_endpoint, deployed_index = deploy_ann_index)
    
    while True:
        if r.done():
            break
        time.sleep(120)
        print("deploying index to endpoint...")
    
    
print("index deployed...")

deploying index to endpoint...
deploying index to endpoint...
deploying index to endpoint...
deploying index to endpoint...
deploying index to endpoint...
deploying index to endpoint...
deploying index to endpoint...
deploying index to endpoint...
deploying index to endpoint...
deploying index to endpoint...
deploying index to endpoint...
index deployed...


Get query Embedding

In [64]:
query_endpoint_id = "7454131383925997568"
endpoint = aiplatform.Endpoint(endpoint_name=f"projects/{PROJECT_ID}/locations/{REGION}/endpoints/{query_endpoint_id}")

input_data = {
    "instances" : [
        {"input_1" : "98391"}
    ]
}

response = endpoint.predict(instances = input_data["instances"])

user_embedding = response.predictions[0]

print(user_embedding)
print(len(user_embedding))

[0.64109391, 0.645832956, 0.410251439, 1.0308249, 0.264123559, 0.421879381, 0.695008755, 0.321870267, 0.0, 0.361802727, 0.396711528, 0.384601206, 0.270285606, 0.535059631, 0.701950312, 0.327647716, 0.704599857, 0.745073855, 0.878816843, 0.281253278, 0.432675809, 1.10554576, 0.684552968, 0.550030887, 0.499848247, 0.293901414, 0.50178206, 0.0682981, 0.159686118, 0.859260321, 0.281290382, 0.744888186, 0.407783151, 0.208564728, 0.0, 0.365311742, 0.26200819, 0.598406613, 0.631515443, 0.760341763, 0.580571413, 0.66568, 0.0, 0.276215434, 0.882964671, 0.541433334, 0.61374265, 0.711462259, 0.840345383, 0.383537114, 0.361310244, 0.162519723, 0.0, 0.348087072, 0.950048685, 0.563564181, 0.630755544, 0.529509068, 0.472181201, 0.445267975, 0.599867702, 0.174079955, 0.725564241, 0.562317073]
64


Get Book Recomendations

In [65]:
from google.cloud import aiplatform_v1
import grpc

# Set variables for the current deployed index.
INDEX_ENDPOINT = 'projects/738666983022/locations/us-central1/indexEndpoints/1890663020518965248'
DEPLOYED_INDEX_ID = 'books_deployed'

# Configure Vector Search client
channel = grpc.insecure_channel(target='10.56.0.14:10000')
transport = aiplatform_v1.services.match_service.transports.grpc.MatchServiceGrpcTransport(channel=channel)
vector_search_client = aiplatform_v1.MatchServiceClient(transport=transport)

# Build FindNeighborsRequest object
datapoint = aiplatform_v1.IndexDatapoint(
  feature_vector=user_embedding
)

query = aiplatform_v1.FindNeighborsRequest.Query(
  datapoint=datapoint,

  # The number of nearest neighbors to be retrieved
  neighbor_count=10
)
request = aiplatform_v1.FindNeighborsRequest(
  index_endpoint=INDEX_ENDPOINT,
  deployed_index_id=DEPLOYED_INDEX_ID,
  # Request can have multiple queries
  queries=[query],
  return_full_datapoint=False,
)

# Execute the request
response = vector_search_client.find_neighbors(request)

# Handle the response
print(response)

nearest_neighbors {
  neighbors {
    datapoint {
      datapoint_id: "0966037103"
      crowding_tag {
        crowding_attribute: "0"
      }
    }
    distance: 41.51947784423828
  }
  neighbors {
    datapoint {
      datapoint_id: "0778320820"
      crowding_tag {
        crowding_attribute: "0"
      }
    }
    distance: 41.32643508911133
  }
  neighbors {
    datapoint {
      datapoint_id: "0525940979"
      crowding_tag {
        crowding_attribute: "0"
      }
    }
    distance: 40.98165512084961
  }
  neighbors {
    datapoint {
      datapoint_id: "0758200374"
      crowding_tag {
        crowding_attribute: "0"
      }
    }
    distance: 40.95263671875
  }
  neighbors {
    datapoint {
      datapoint_id: "1576737365"
      crowding_tag {
        crowding_attribute: "0"
      }
    }
    distance: 40.94483947753906
  }
  neighbors {
    datapoint {
      datapoint_id: "0684871556"
      crowding_tag {
        crowding_attribute: "0"
      }
    }
    distance: 40.855281

In [None]:
#From here you can get this datapoint_id which is ISBN numbers in pairs_2 db table and display it.