# E2E recsys with matching engine and TFRS

Simple example, goal being:
    1) Train a Two-Tower model using movielens data
    2) Deploy the query model endpoint
    3) Save movie embeddings to json, for use in matching engine
    
First we will create a user-managed notebook behind the already created peered VPC network used for matching engine. Select tensorflow enterprise 2.6 with a T4 GPU
![](./create-workbench.png)

##### Be sure to create the notebook in the peered network
![](./network-create.png)
    
The next notebook will connect matching engine with the query endpoint for a simple recomender system

Run the below pip install one time to install tensorflow-recommenders

In [None]:
!echo Y | pip uninstall tensorflow 
!pip install tensorflow-recommenders --user

# Train a 2 tower model

In [None]:
from typing import Dict, Text

import json

import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

# disable INFO and DEBUG logging everywhere
import logging

from google.cloud import aiplatform_v1beta1
from google.protobuf import struct_pb2

import pandas as pd


logging.disable(logging.WARNING)

DIMENSIONS = 64 


# Ratings data.
ratings = tfds.load('movielens/100k-ratings', split="train")
# Features of all the available movies.
movies = tfds.load('movielens/100k-movies', split="train")

# Select the basic features.
ratings = ratings.map(lambda x: {
    "movie_id": tf.strings.to_number(x["movie_id"]),
    "user_id": tf.strings.to_number(x["user_id"])
})
movies = movies.map(lambda x: tf.strings.to_number(x["movie_id"]))

# Build a model.
class Model(tfrs.Model):

    def __init__(self):
        super().__init__()

        # Set up user representation.
        self.user_model = tf.keras.Sequential([
            tf.keras.layers.Embedding(
            input_dim=2000, output_dim=DIMENSIONS),
            ])
        # Set up movie representation.
        self.item_model = tf.keras.Sequential([
            tf.keras.layers.Embedding(
            input_dim=2000, output_dim=DIMENSIONS),
        ])
        # Set up a retrieval task and evaluation metrics over the
        # entire dataset of candidates.
        self.task = tfrs.tasks.Retrieval(
            metrics=tfrs.metrics.FactorizedTopK(
                candidates=movies.batch(128).map(self.item_model)
            )
        )

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:

        user_embeddings = self.user_model(features["user_id"])
        movie_embeddings = self.item_model(features["movie_id"])

        return self.task(user_embeddings, movie_embeddings)


model = Model()
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.5))

# Randomly shuffle data and split between train and test.
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)

# Train.
model.fit(train.batch(1024), epochs=5)

# Evaluate.
model.evaluate(test.batch(1024), return_dict=True)

### Set your variables

In [None]:
import os

PROJECT = 'wortz-project-352116'
NETWORK_NAME = 'me-network' #same as VPC peered network

### Create a bucket to store our embeddings and models
BUCKET = 'gs://end-to-end-two-tower' # TODO - change for each user
EMBEDDINGS = os.path.join(BUCKET, 'embeddings')
QUERY_MODEL = os.path.join(BUCKET, 'query_model')
REGION = 'us-central1'

## Gets an auth token with the Parent variable
PROJECT_ID = PROJECT
AUTH_TOKEN = !gcloud auth print-access-token
PROJECT_NUMBER = !gcloud projects list --filter="PROJECT_ID:'{PROJECT_ID}'" --format='value(PROJECT_NUMBER)'
PROJECT_NUMBER = PROJECT_NUMBER[0]


PARENT = "projects/{}/locations/{}".format(PROJECT_ID, REGION)
PARENT

In [None]:
# run one time to create your bucket
# !gsutil mb -l $REGION $BUCKET

In [None]:
# Save the query/user model

model.user_model.save(QUERY_MODEL)

In [None]:
# Make sure it saved
!gsutil ls $QUERY_MODEL

In [None]:
from google.cloud import aiplatform

model_gcp = aiplatform.Model.upload(
        display_name="Movielens User Query Model",
        artifact_uri=QUERY_MODEL,
        serving_container_image_uri='us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-6:latest',
        description="Top of the query tower, meant to return an embedding for each user instance",
    )

In [None]:
#validate the model type output
model_gcp

In [None]:
endpoint = aiplatform.Endpoint.create(
    display_name="Movielens Model Endpoint",
    project=PROJECT,
    location=REGION,
)

In [None]:
deployment = model_gcp.deploy(
    endpoint=endpoint,
    deployed_model_display_name="Movielens User Query Model",
    machine_type="n1-standard-4",
    min_replica_count=1,
    max_replica_count=2,
    accelerator_type=None,
    accelerator_count=0,
    sync=False,
)


In [None]:
deployment

### Finally, save the embeddings for the movie dataset - this is a transform from the item model

## Write embeddings to local storage
Follwing this format for Matching Engine
https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/matching_engine/sdk_matching_engine_for_indexing.ipynb

This step can be repeated as new products come in - they will be scored but think of what info will not be available (OOB tokens)

In [None]:
movie_embs = movies.batch(1000).map(lambda x: [x, model.item_model(x)]).unbatch() #process 1000 at a time then flatten it back

In [None]:
### Quick check on bad records

In [None]:
# Write to local disk
with open("movie_embeddings.json", 'w') as f:
    for movie_id, movie_emb in movie_embs:
        # print(movie_id.numpy(), movie_emb.numpy())
        f.write('{"id":"' + str(movie_id.numpy()) + '","embedding":[' + ",".join(str(x) for x in list(movie_emb.numpy())) + ']}')
        f.write("\n")

You should now see .jsonl data as required by matching engine
![](jsonl.png)

### Upload the data to GCS
Only remove if you have issues uploading the json file

In [None]:
# !gsutil rm $EMBEDDINGS/movie_embeddings.json

In [None]:
!gsutil cp movie_embeddings.json $EMBEDDINGS/movie_embeddings.json

# Next we will deploy our movie inidicies. With Matching Engine
* Create an index (from the `json` files)
* Create and endpoint
* Deploy the index to the endpoint so you can perform vector search

In [None]:


api_endpoint_me = "{}-aiplatform.googleapis.com".format(REGION)

index_client = aiplatform_v1beta1.IndexServiceClient(
    client_options=dict(api_endpoint=api_endpoint_me)
)


DISPLAY_NAME = f"Movielens Movie: {DIMENSIONS} DIMENSIONS"

Set the Nearest Neighbor Options

See here for tips on [tuning the index](https://cloud.google.com/vertex-ai/docs/matching-engine/using-matching-engine#tuning_the_index)

Other best practices from our PM team:
```
Start from leafNodesToSearchPercent=5 and approximateNeighborsCount=10 * k

use default values for others.

measure performance and recall and change those 2 parameters accordingly.
```

In [None]:
treeAhConfig = struct_pb2.Struct(
    fields={
        "leafNodeEmbeddingCount": struct_pb2.Value(number_value=20),
        "leafNodesToSearchPercent": struct_pb2.Value(number_value=7),
    }
)

algorithmConfig = struct_pb2.Struct(
    fields={"treeAhConfig": struct_pb2.Value(struct_value=treeAhConfig)}
)

config = struct_pb2.Struct(
    fields={
        "dimensions": struct_pb2.Value(number_value=DIMENSIONS),
        "approximateNeighborsCount": struct_pb2.Value(number_value=10),
        "distanceMeasureType": struct_pb2.Value(string_value="DOT_PRODUCT_DISTANCE"),
        "algorithmConfig": struct_pb2.Value(struct_value=algorithmConfig),
    }
)

metadata = struct_pb2.Struct(
    fields={
        "config": struct_pb2.Value(struct_value=config),
        "contentsDeltaUri": struct_pb2.Value(string_value=EMBEDDINGS),
    }
)

ann_index = {
    "display_name": DISPLAY_NAME,
    "description": f"Movielens {DIMENSIONS}",
    "metadata": struct_pb2.Value(struct_value=metadata),
}

In [None]:
ann_index = index_client.create_index(parent=PARENT, index=ann_index)

In [None]:
# Poll the operation until it's done successfullly.
# This will take ~20 min.
import time 

while True:
    if ann_index.done():
        break
    print("Poll the operation to create index...")
    time.sleep(60)

In [None]:
ann_index

In [None]:
ann_index.result()

### Save the name of the endpoint

In [None]:
INDEX_RESOURCE_NAME = ann_index.result().name
INDEX_RESOURCE_NAME

Debugging tool in case you run into issues. Example usage below.
`!gcloud beta ai operations describe 4122851463774863360 --index=7253099976438317056 --project=$PROJECT`

## Create Index Endpoint and Deploy Index

In [None]:
VPC_NETWORK_NAME = "projects/{}/global/networks/{}".format(PROJECT_NUMBER, NETWORK_NAME)
VPC_NETWORK_NAME

In [None]:
index_endpoint = {
    "display_name": "index_endpoint_for_demo",
    "network": VPC_NETWORK_NAME,
}

In [None]:
index_endpoint_client = aiplatform_v1beta1.IndexEndpointServiceClient(
    client_options=dict(api_endpoint=api_endpoint_me)
)

ann_index_en = index_endpoint_client.create_index_endpoint(
    parent=PARENT, index_endpoint=index_endpoint
)

In [None]:
ann_index_en.result()

In [None]:
INDEX_ENDPOINT_NAME = ann_index_en.result().name
INDEX_ENDPOINT_NAME

In [None]:
DEPLOYED_INDEX_ID = 'movielens_deployed'

deploy_ann_index = {
    "id": DEPLOYED_INDEX_ID,
    "display_name": DEPLOYED_INDEX_ID,
    "index": INDEX_RESOURCE_NAME,
}
r = index_endpoint_client.deploy_index(
    index_endpoint=INDEX_ENDPOINT_NAME, deployed_index=deploy_ann_index
)

# Connect Matching Engine and The User Model Into a Recommendation System

This will bring it all together by incorporating the prediction endpoint 

In [None]:
# establish index_endpoint -IMPORTANT for constructing already created endpoints/indicies/etc...
ME_index_endpoint = aiplatform.MatchingEngineIndexEndpoint(INDEX_ENDPOINT_NAME)


In [None]:
ME_index_endpoint.match(queries=emb_627)

In [None]:
USER = 627.0 #pick anyone 0-100k to see watch history and recommendations
NUM_NEIGH=3

emb_627 = endpoint.predict([[USER]]) #prediction from the saved model
emb_627 = emb_627.predictions
emb_627 # we should get our user xxx embedding @ dim len

In [None]:
#### Get watch history

In [None]:
! wget https://files.grouplens.org/datasets/movielens/ml-100k/u.item

In [None]:
# Quick sidetour - create movie lookup dictionary
movie_names = pd.read_csv('u.item', delimiter='|' , 
                          encoding='latin-1', 
                          usecols=(0,1),
                          names = ['movie_id', 'title'])
movielookup = movie_names.to_dict()['title']

In [None]:
for i, watched_movie in enumerate(ratings.filter(lambda x: x['user_id']==USER)):
    key = watched_movie['movie_id'].numpy()
    print(f"""Movies watched: \n 
          {i}: {movielookup[key]}"""
         )

In [None]:
query_vector = emb_627[0]


ann_response = ME_index_endpoint.match(
    deployed_index_id='movielens_deployed', 
    queries=query_vector, 
    num_neighbors=NUM_NEIGH
)

print("Recommended movie IDs:", ann_response)

In [None]:
# look at the recommended movies vs the viewed for that user
for i, match in enumerate(ann_response[0]):
    key = int(float(match.id))
    print(f"""Movies recommended: \n 
          {i}: {movielookup[key]} (distance: {match.distance})"""
         )


In [None]:
mn_inst.id

#### Appendix

In [None]:
!wc -l movie_embeddings.json

In [None]:
a= {"id":"627.0","embedding":[-0.14212863,0.12457584,-0.7434126,0.17593558,-0.026925895,0.02495948,1.0722023,-0.31858772,-0.046656977,-0.4895339,-0.061055277,-0.3349046,-0.15609045,-0.19707416,0.16084102,-0.27045664,-0.3428139,0.48958114,0.6361803,0.30314636,0.26163542,-0.41117418,-0.49810714,0.2877337,-0.6315446,-0.40054193,-0.19141906,-0.41936785,-0.35436887,-0.06877242,-0.044330686,-0.014343675,0.5349128,-0.7423113,-0.0022975504,-0.44584844,0.010413095,1.2816586,-0.09251402,0.87896764,-0.06194441,-0.05595126,-0.51902133,-0.74573165,0.16893004,0.047940448,0.29358983,0.4754887,0.15502426,0.44330248,-0.022351682,-0.27851883,0.57796776,-0.47142428,0.064454705,-0.11905501,0.26168153,-0.05122369,-0.06797159,0.2765007,0.06892009,0.4566458,-0.12807411,0.35002083]}


In [None]:
b = {"id":"555.0","embedding":[-0.533472,0.88814735,-0.24613492,-0.013929918,0.03658565,-0.14172664,0.80844355,-0.5363197,0.08592684,1.0273799,-0.2758104,0.24798381,-0.29079348,0.18664293,-1.084568,0.5093116,-0.16913137,0.05772318,0.5618666,0.76637805,-0.3082445,0.4029161,-0.19377074,1.4662749,-0.8224506,-0.27167782,0.49119562,0.13797675,-0.2449953,0.31923363,0.17741793,0.39652658,0.40723267,0.6747394,-0.0496943,0.3108281,0.4267428,-0.2918449,-1.067363,0.78338695,0.09410989,0.5394794,0.6590967,-0.20916262,0.24139857,0.8387299,1.0063992,1.4427522,0.5820984,1.3107247,0.23732755,-0.16565211,0.027624682,-0.29418015,0.02249343,0.14499913,-0.6506447,0.016585588,0.09415283,-0.19091803,0.73259926,0.10929288,-0.43171406,-1.7558013]}

In [None]:
len(a['embedding'])

In [None]:
for _ in movies.take(10):
    print(_)

In [None]:
import pandas as pd
movie_names = pd.read_csv('u.item', delimiter='|' , 
                          encoding='latin-1', 
                          usecols=(0,1),
                          names = ['movie_id', 'title'])
movielookup = movie_names.to_dict()['title']

In [None]:
movie_lookup = movie_names.drop_index.to_dict()
movie_lookup

### Cleaning up
To clean up all Google Cloud resources used in this project, you can delete the Google Cloud project you used for the tutorial. You can also manually delete resources that you created by running the following code.

In [None]:
INDEX_RESOURCE_NAME
# 7352179168240467968

In [None]:
index_endpoint_client

In [None]:
index_endpoint_client.undeploy_index(index_endpoint=INDEX_ENDPOINT_NAME, deployed_index_id=DEPLOYED_INDEX_ID)

index_client.delete_index(name=INDEX_RESOURCE_NAME)

index_endpoint_client.delete_index_endpoint(name=INDEX_ENDPOINT_NAME)

In [None]:
endpoint_resource_name = endpoint.resource_name
endpoint_resource_name

In [None]:
deployment_resource_name = deployment.resource_name
deployment_resource_name
aiplatform.Endpoint.delete(endpoint, gcp_model)
#delete our model endpoints, etc..