# Deploying Merlin Query Tower with Vertex AI

* Create custom prediction routine (CPR)
* Upload query model to Vertex AI Model Registry
* Test registered models predictions

In [1]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
PROJECT_NUM = !gcloud projects list --filter="$PROJECT_ID" --format="value(PROJECT_NUMBER)"
PROJECT_NUM = PROJECT_NUM[0]
LOCATION = 'us-central1'

print(f"PROJECT_ID: {PROJECT_ID}")
print(f"PROJECT_NUM: {PROJECT_NUM}")
print(f"LOCATION: {LOCATION}")

PROJECT_ID: hybrid-vertex
PROJECT_NUM: 934903580331
LOCATION: us-central1


In [2]:
from google.cloud import aiplatform as vertex_ai
import os
import time

BUCKET = 'jt-merlin-scaling'
BUCKET_URI = 'gs://jt-merlin-scaling'

vertex_ai.init(project=PROJECT_ID, location=LOCATION)

In [3]:
# !gcloud auth application-default login

## Build serving app

In [4]:
REPO_DOCKER_PATH_PREFIX = 'src'
SERVING_SUB_DIR = 'serving'
SERVING_APPLICATION_DIR = 'app'
SERVING_DOCKERNAME = 'merlin-retriever'

### write pred files to local dir

In [5]:
# Make the training subfolder
! rm -rf {REPO_DOCKER_PATH_PREFIX}/{SERVING_SUB_DIR}
! mkdir {REPO_DOCKER_PATH_PREFIX}/{SERVING_SUB_DIR}
! mkdir {REPO_DOCKER_PATH_PREFIX}/{SERVING_SUB_DIR}/{SERVING_APPLICATION_DIR}
! touch {REPO_DOCKER_PATH_PREFIX}/{SERVING_SUB_DIR}/{SERVING_APPLICATION_DIR}/__init__.py

#### requirements

In [6]:
%%writefile {REPO_DOCKER_PATH_PREFIX}/{SERVING_SUB_DIR}/requirements.txt
uvicorn[standard]==0.15.0
gunicorn==20.1.0
fastapi==0.68.1
uvloop==0.15.2
fastapi-utils
google-cloud-aiplatform
git+https://github.com/NVIDIA-Merlin/models.git
nvtabular==1.3.3
gcsfs
google-cloud-storage

Writing src/serving/requirements.txt


#### predictor

In [7]:
%%writefile {REPO_DOCKER_PATH_PREFIX}/{SERVING_SUB_DIR}/{SERVING_APPLICATION_DIR}/predictor.py
import nvtabular as nvt
import pandas as pd
import os
import json
import merlin.models.tf as mm
from nvtabular.loader.tf_utils import configure_tensorflow
configure_tensorflow()
import tensorflow as tf
import time
import logging


# These are helper functions that ensure the dictionary input is in a certain order and types are preserved
# this is to get scalar values to appear first in the dict to not confuse pandas with lists https://github.com/pandas-dev/pandas/issues/46092
reordered_keys = [
    'collaborative', 
    'album_name_pl', 
    'artist_genres_pl', 
    'artist_name_pl', 
    'artist_pop_can', 
    'description_pl', 
    'duration_ms_songs_pl', 
    'n_songs_pl', 
    'name', 
    'num_albums_pl', 
    'num_artists_pl', 
    'track_name_pl', 
    'track_pop_pl', 
    'duration_ms_seed_pl', 
    'pid', 
    'track_uri_pl'
]

float_num_fix = ['n_songs_pl','num_albums_pl','num_artists_pl','duration_ms_seed_pl']
float_list_fix = ['track_pop_pl', 'duration_ms_songs_pl']
    
def fix_list_num_dtypes(num_list):
    "this fixes lists of ints to list of floats converted in json input"
    return [float(x) for x in num_list]

def fix_num_dtypes(num):
    "this fixes ints and casts to floats"
    return float(num)

def fix_types(k, v):
    if k in float_num_fix:
        return fix_num_dtypes(v)
    if k in float_list_fix:
        return fix_list_num_dtypes(v)
    else:
        return v

def create_pandas_instance(inputs):
    """
    Helper function to reorder the input to have a sclar first for pandas
    And fix the types converted when data is imported by fastAPI
    """
    if type(inputs) == list:
        header = inputs[0]
        reordered_header_dict = {k: fix_types(k,header[k]) for k in reordered_keys}
        pandas_instance = pd.DataFrame.from_dict(reordered_header_dict, orient='index').T
        if len(inputs) > 1:
            for ti in inputs[1:]:
                reordered_dict = {k: fix_types(k,ti[k]) for k in reordered_keys}
                pandas_instance = pandas_instance.append(pd.DataFrame.from_dict(reordered_dict, orient='index').T)
    else:
        reordered_dict = {k: fix_types(k,inputs[k]) for k in reordered_keys}
        pandas_instance = pd.DataFrame.from_dict(reordered_dict, orient='index').T
    return pandas_instance

class Predictor():
    """Interface of the Predictor class for Custom Prediction Routines.
    The Predictor is responsible for the ML logic for processing a prediction request.
    Specifically, the Predictor must define:
    (1) How to load all model artifacts used during prediction into memory.
    (2) The logic that should be executed at predict time.
    When using the default PredictionHandler, the Predictor will be invoked as follows:
      predictor.postprocess(predictor.predict(predictor.preprocess(prediction_input)))
    """
    def __init__(self):
        return
    
    def load(self, artifacts_uri):
        """Loads the model artifact.
        Args:
            artifacts_uri (str):
                Required. The value of the environment variable AIP_STORAGE_URI.
        """
        logging.info("loading model and workflow")
        logging.info(f"artifacts_uri: {artifacts_uri}")
        start = time.process_time()
        
        self.model = tf.keras.models.load_model(f"{artifacts_uri}/query-tower")
        self.workflow = nvt.Workflow.load(f"{artifacts_uri}/workflow")
        self.workflow = self.workflow.remove_inputs(
            [
                'track_pop_can', 
                'track_uri_can', 
                'duration_ms_can', 
                'track_name_can', 
                'artist_name_can',
                'album_name_can',
                'album_uri_can',
                'artist_followers_can', 
                'artist_genres_can',
                'artist_name_can', 
                'artist_pop_can',
                'artist_pop_pl',
                'artist_uri_can', 
                'artists_followers_pl'
            ]
        )
        self.loader = None # will load this after first load
        self.n_rows = 0
        logging.info(f"loading took {time.process_time() - start} seconds")
        
        return self

    
    def predict(self, prediction_input):
        """Preprocesses the prediction input before doing the prediction.
        Args:
            prediction_input (Any):
                Required. The prediction input that needs to be preprocessed.
        Returns:
            The preprocessed prediction input.
        """
        # handle different input types, can take a dict or list of dicts
        self.n_rows = len(prediction_input)
        
        # pandas convert
        start = time.process_time()
        pandas_instance = create_pandas_instance(prediction_input[0])
        logging.info(f"Pandas conversion took {time.process_time() - start} seconds")
        
        # nvtabular data loading
        start = time.process_time()
        transformed_inputs = nvt.Dataset(pandas_instance)
        logging.info(f"NVT data loading took {time.process_time() - start} seconds")
        
        # workflow transformation
        start = time.process_time()
        transformed_instance = self.workflow.transform(transformed_inputs)
        logging.info(f"Workflow transformation took {time.process_time() - start} seconds")

        # tensorflow data loader
        start = time.process_time()
        batch = mm.sample_batch(transformed_instance, batch_size=1, include_targets=False, shuffle=False)
        logging.info(f"TF Dataloader took {time.process_time() - start} seconds")
        
        # model predict
        start = time.process_time()
        output = self.model(batch)
        logging.info(f"Prediction took {time.process_time() - start} seconds")
        
        return output

Writing src/serving/app/predictor.py


#### main

In [8]:
%%writefile {REPO_DOCKER_PATH_PREFIX}/{SERVING_SUB_DIR}/{SERVING_APPLICATION_DIR}/main.py
from fastapi import FastAPI, Request

import json
import numpy as np
import os
import logging
from fastapi_utils.timing import add_timing_middleware, record_timing

from google.cloud import storage
from .predictor import Predictor

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

predictor_instance = Predictor()
ARTIFACT_DIR = "gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir"
loaded_predictor = predictor_instance.load(artifacts_uri = ARTIFACT_DIR)  # os.environ['AIP_STORAGE_URI'])

app = FastAPI()
add_timing_middleware(app, record=logger.info, prefix="app", exclude="untimed")

@app.get(os.environ['AIP_HEALTH_ROUTE'], status_code=200)
def health():
    return {}


@app.post(os.environ['AIP_PREDICT_ROUTE'])
async def predict(request: Request):
    body = await request.json()
    instances = body["instances"]
    outputs = loaded_predictor.predict(instances)
    # outputs = loaded_predictor.predict(preprocessed_inputs)

    return {"predictions": outputs.numpy().tolist()}

Writing src/serving/app/main.py


In [9]:
%%writefile {REPO_DOCKER_PATH_PREFIX}/{SERVING_SUB_DIR}/{SERVING_APPLICATION_DIR}/prestart.sh
#!/bin/bash
export PORT=$AIP_HTTP_PORT

Writing src/serving/app/prestart.sh


In [10]:
%%writefile {REPO_DOCKER_PATH_PREFIX}/{SERVING_SUB_DIR}/instances.json
{"instances": {"collaborative": "false", "album_name_pl": ["There's Really A Wolf", "Late Nights: The Album", "American Teen", "Crazy In Love", "Pony"], "album_uri_can": "spotify:album:5l83t3mbVgCrIe1VU9uJZR", "artist_followers_can": 4339757.0, "artist_genres_can": "'hawaiian hip hop', 'rap'", "artist_genres_pl": ["'hawaiian hip hop', 'rap'", "'chicago rap', 'dance pop', 'pop', 'pop rap', 'r&b', 'southern hip hop', 'trap', 'urban contemporary'", "'pop', 'pop r&b'", "'dance pop', 'pop', 'r&b'", "'chill r&b', 'pop', 'pop r&b', 'r&b', 'urban contemporary'"], "artist_name_can": "Russ", "artist_name_pl": ["Russ", "Jeremih", "Khalid", "Beyonc\u00c3\u00a9", "William Singe"], "artist_pop_can": 82.0, "artist_pop_pl": [82.0, 80.0, 90.0, 87.0, 65.0], "artist_uri_can": "spotify:artist:1z7b1Pr1rSlvWRzsW3HOrS", "artists_followers_pl": [4339757.0, 5611842.0, 15046756.0, 30713126.0, 603837.0], "description_pl": "", "duration_ms_can": 237322.0, "duration_ms_songs_pl": [237506.0, 217200.0, 219080.0, 226400.0, 121739.0], "n_songs_pl": 8.0, "name": "Lit Tunes ", "num_albums_pl": 8.0, "num_artists_pl": 8.0, "track_name_can": "We Just Havent Met Yet", "track_name_pl": ["Losin Control", "Paradise", "Location", "Crazy In Love - Remix", "Pony"], "track_pop_can": 57.0, "track_pop_pl": [79.0, 58.0, 83.0, 71.0, 57.0], "duration_ms_seed_pl": 51023.1, "pid": 1, "track_uri_can": "spotify:track:0VzDv4wiuZsLsNOmfaUy2W", "track_uri_pl": ["spotify:track:4cxMGhkinTocPSVVKWIw0d", "spotify:track:1wNEBPo3nsbGCZRryI832I", "spotify:track:152lZdxL1OR0ZMW6KquMif", "spotify:track:2f4IuijXLxYOeBncS60GUD", "spotify:track:4Lj8paMFwyKTGfILLELVxt"]}}

Writing src/serving/instances.json


In [11]:
# !pwd
# !tree /home/jupyter/merlin-on-vertex

### dockerfile

In [12]:
! rm -rf {REPO_DOCKER_PATH_PREFIX}/Dockerfile.{SERVING_DOCKERNAME}

In [13]:
%%writefile {REPO_DOCKER_PATH_PREFIX}/Dockerfile.{SERVING_DOCKERNAME}

FROM nvcr.io/nvidia/merlin/merlin-tensorflow:22.09

WORKDIR / 

COPY /serving/requirements.txt /requirements.txt

RUN pip install -r /requirements.txt

COPY /serving/app /app

EXPOSE 80
    
CMD ["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port $AIP_HTTP_PORT"]

Writing src/Dockerfile.merlin-retriever


## Copy serving assets to `MODEL_DIR`

In [17]:
MODEL_DIR_old = 'gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir'
MODEL_DIR = 'gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir2'

In [15]:
!gsutil ls $MODEL_DIR

gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir2/


In [19]:
!gsutil cp -r $MODEL_DIR_old/candidate-embeddings $MODEL_DIR/candidate-embeddings

Copying gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir/candidate-embeddings/candidate_embeddings.json [Content-Type=application/json]...
/ [1 files][  4.1 GiB/  4.1 GiB]                                                
Operation completed over 1 objects/4.1 GiB.                                      


In [21]:
# !gsutil cp -r $MODEL_DIR_old/query-tower $MODEL_DIR/query-tower
# !gsutil cp -r $MODEL_DIR_old/candidate-tower $MODEL_DIR/candidate-tower
# !gsutil cp -r $MODEL_DIR_old/workflow $MODEL_DIR/workflow

### copy Dockerfile

In [22]:
!gsutil cp ./src/Dockerfile.$SERVING_DOCKERNAME $MODEL_DIR/

Copying file://./src/Dockerfile.merlin-retriever [Content-Type=application/octet-stream]...
/ [1 files][  270.0 B/  270.0 B]                                                
Operation completed over 1 objects/270.0 B.                                      


### copy serving application

In [23]:
!gsutil -m cp -r ./$REPO_DOCKER_PATH_PREFIX/$SERVING_SUB_DIR $MODEL_DIR/

Copying file://./src/serving/requirements.txt [Content-Type=text/plain]...
Copying file://./src/serving/instances.json [Content-Type=application/json]...  
Copying file://./src/serving/app/__init__.py [Content-Type=text/x-python]...    
Copying file://./src/serving/app/predictor.py [Content-Type=text/x-python]...   
Copying file://./src/serving/app/main.py [Content-Type=text/x-python]...        
Copying file://./src/serving/app/prestart.sh [Content-Type=text/x-sh]...        
Copying file://./src/serving/app/.ipynb_checkpoints/__init__-checkpoint.py [Content-Type=text/x-python]...
/ [7/7 files][  8.5 KiB/  8.5 KiB] 100% Done                                    
Operation completed over 7 objects/8.5 KiB.                                      


### copy workflow

* easier for prediction container if a model's related workflow artifacts are stored in the `model_dir`

In [28]:
WORKFLOW_DIR = "gs://jt-merlin-scaling/nvt-last5-v1full/nvt-analyzed"

In [29]:
!gsutil -m cp -r $WORKFLOW_DIR $MODEL_DIR/workflow

Copying gs://jt-merlin-scaling/nvt-last5-v1full/nvt-analyzed/categories/unique.album_name_pl.parquet [Content-Type=application/octet-stream]...
Copying gs://jt-merlin-scaling/nvt-last5-v1full/nvt-analyzed/categories/unique.artist_genres_can.parquet [Content-Type=application/octet-stream]...
Copying gs://jt-merlin-scaling/nvt-last5-v1full/nvt-analyzed/categories/unique.artist_genres_pl.parquet [Content-Type=application/octet-stream]...
Copying gs://jt-merlin-scaling/nvt-last5-v1full/nvt-analyzed/categories/unique.artist_name_can.parquet [Content-Type=application/octet-stream]...
Copying gs://jt-merlin-scaling/nvt-last5-v1full/nvt-analyzed/categories/unique.artist_name_pl.parquet [Content-Type=application/octet-stream]...
Copying gs://jt-merlin-scaling/nvt-last5-v1full/nvt-analyzed/categories/unique.collaborative.parquet [Content-Type=application/octet-stream]...
Copying gs://jt-merlin-scaling/nvt-last5-v1full/nvt-analyzed/categories/unique.description_pl.parquet [Content-Type=applicatio

In [24]:
# check model_dir
!gsutil ls $MODEL_DIR

gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir2/
gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir2/Dockerfile.merlin-retriever
gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir2/candidate-embeddings/
gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir2/candidate-tower/
gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir2/query-tower/
gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir2/serving/
gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir2/workflow/


## Build Serving Image

In [27]:
SERVING_VERSION = 'v11'

In [28]:
# Docker definitions for training
IMAGE_NAME = f'merlin-vertex-serv-{SERVING_VERSION}'
IMAGE_URI = f'gcr.io/{PROJECT_ID}/{IMAGE_NAME}'

DOCKERNAME = f'{SERVING_DOCKERNAME}'
MACHINE_TYPE ='e2-highcpu-32'
FILE_LOCATION = './src'

print(f"IMAGE_NAME: {IMAGE_NAME}")
print(f"IMAGE_URI: {IMAGE_URI}")
print(f"DOCKERNAME: {DOCKERNAME}")

IMAGE_NAME: merlin-vertex-serv-v11
IMAGE_URI: gcr.io/hybrid-vertex/merlin-vertex-serv-v11
DOCKERNAME: merlin-retriever


### submit to Cloud Build

In [29]:
! gcloud builds submit --config src/cloudbuild.yaml \
    --substitutions _DOCKERNAME=$DOCKERNAME,_IMAGE_URI=$IMAGE_URI,_FILE_LOCATION=$FILE_LOCATION \
    --timeout=2h \
    --machine-type=$MACHINE_TYPE

Creating temporary tarball archive of 62 file(s) totalling 1.6 MiB before compression.
Some files were not included in the source upload.

Check the gcloud log [/home/jupyter/.config/gcloud/logs/2022.11.11/18.33.54.534693.log] to see which files and the contents of the
default gcloudignore file used (see `$ gcloud topic gcloudignore` to learn
more).

Uploading tarball of [.] to [gs://hybrid-vertex_cloudbuild/source/1668191634.654956-7861da43623d4552a36a65c4c1cf6618.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/hybrid-vertex/locations/global/builds/aa73ebb1-a7cd-412e-8f84-a81f15abe095].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/aa73ebb1-a7cd-412e-8f84-a81f15abe095?project=934903580331 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "aa73ebb1-a7cd-412e-8f84-a81f15abe095"

FETCHSOURCE
Fetching storage object: gs://hybrid-vertex_cloudbuild/source/1668191634.654956-7861da43623d4552a36a65c4c1cf

# Deploy Query Model to Vertex

## Upload to Model Regsitry

* `parent_model` [src](https://github.com/googleapis/python-aiplatform/blob/main/google/cloud/aiplatform/models.py#L2831) code

In [30]:
MODEL_DEPLOY_VERSION = "v11"

In [31]:
MODEL_DISPLAY_NAME=f"mm-qtower-{MODEL_DEPLOY_VERSION}"
MODEL_ARTIFACT_URI = 'gs://jt-merlin-scaling/pipes-2tower-merlin-tf-v5/run-v1-20221110-024710/model-dir2'

In [32]:
start = time.process_time()

model = vertex_ai.Model.upload(
        display_name=MODEL_DISPLAY_NAME,
        artifact_uri=MODEL_ARTIFACT_URI,
        serving_container_image_uri=IMAGE_URI,
        serving_container_predict_route='/predict',
        serving_container_health_route='/health',
        serving_container_command=["sh", "-c", "uvicorn app.main:app --host 0.0.0.0 --port $AIP_HTTP_PORT"],
        serving_container_args='--gpus all',
        # parent_model=PARENT_MODEL,
        sync=True,
    )

print(f"Created model in {round((time.process_time() - start),2)} seconds\n")

Creating Model
Create Model backing LRO: projects/934903580331/locations/us-central1/models/4560312437131182080/operations/458563762589270016
Model created. Resource name: projects/934903580331/locations/us-central1/models/4560312437131182080@1
To use this Model in another session:
model = aiplatform.Model('projects/934903580331/locations/us-central1/models/4560312437131182080@1')
Created model in 0.34 seconds



In [33]:
# existing model resource
# MODEL_URI = 'projects/934903580331/locations/us-central1/models/4060694353469767680@1'
# MODEL_URI = "projects/934903580331/locations/us-central1/models/3574305593713754112" # 50 epoch
# model = vertex_ai.Model(MODEL_URI)

model

<google.cloud.aiplatform.models.Model object at 0x7fb26cb6e490> 
resource name: projects/934903580331/locations/us-central1/models/4560312437131182080

## Deploy to Vertex AI Endpoint

### deployment config

In [34]:
# service account for predictions
VERTEX_SA = '934903580331-compute@developer.gserviceaccount.com'

# model and endpoint display names
# MODEL_DISPLAY_NAME=f"mm-retrieval-{VERSION_serving}"
DEPLOYED_MODEL_DISPLAY_NAME=f"nb-deployed-{MODEL_DISPLAY_NAME}"
ENDPOINT_DISPLAY_NAME = f"mm-endpoint-{MODEL_DEPLOY_VERSION}"

# Endpoint resource config
DEPLOY_COMPUTE="n1-standard-4"
DEPLOY_GPU="NVIDIA_TESLA_T4"
DEPLOY_NGPU=1
MIN_NODES=1
MAX_NODES=1
TRAFFIC=100

### create model endpoint

In [35]:
start = time.process_time()

endpoint = vertex_ai.Endpoint.create(
    display_name=ENDPOINT_DISPLAY_NAME,
    project=PROJECT_ID,
    location=LOCATION,
)

print(f"Created endpoint in {round((time.process_time() - start),2)} seconds\n")

print(endpoint)

Creating Endpoint
Create Endpoint backing LRO: projects/934903580331/locations/us-central1/endpoints/6231227063722835968/operations/926375173882380288
Endpoint created. Resource name: projects/934903580331/locations/us-central1/endpoints/6231227063722835968
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/934903580331/locations/us-central1/endpoints/6231227063722835968')
Created endpoint in 0.14 seconds

<google.cloud.aiplatform.models.Endpoint object at 0x7fb26cbea550> 
resource name: projects/934903580331/locations/us-central1/endpoints/6231227063722835968


### deploy model to endpoint

In [36]:
start = time.process_time()

model.deploy(
    endpoint=endpoint,
    deployed_model_display_name=MODEL_DISPLAY_NAME,
    machine_type=DEPLOY_COMPUTE,
    min_replica_count=MIN_NODES,
    max_replica_count=MAX_NODES,
    traffic_percentage=TRAFFIC,
    accelerator_type=DEPLOY_GPU,
    accelerator_count=DEPLOY_NGPU,
    service_account=VERTEX_SA,
    sync=True
)

print(f"Deployed model to endpoint in {round((time.process_time() - start),2)} seconds\n")

print(model)

Deploying model to Endpoint : projects/934903580331/locations/us-central1/endpoints/6231227063722835968
Deploy Endpoint model backing LRO: projects/934903580331/locations/us-central1/endpoints/6231227063722835968/operations/7595080322111242240
Endpoint model deployed. Resource name: projects/934903580331/locations/us-central1/endpoints/6231227063722835968
Deployed model to endpoint in 0.99 seconds

<google.cloud.aiplatform.models.Model object at 0x7fb26cb6e490> 
resource name: projects/934903580331/locations/us-central1/models/4560312437131182080


#### check endpoint

In [149]:
print(endpoint.gca_resource.deployed_models[0])

id: "5064566061815300096"
model: "projects/934903580331/locations/us-central1/models/4060694353469767680"
display_name: "mm_qtower_test_v7"
create_time {
  seconds: 1668150467
  nanos: 265666000
}
dedicated_resources {
  machine_spec {
    machine_type: "n1-standard-4"
    accelerator_type: NVIDIA_TESLA_T4
    accelerator_count: 1
  }
  min_replica_count: 1
  max_replica_count: 1
}
service_account: "934903580331-compute@developer.gserviceaccount.com"
model_version_id: "1"



## Test Deployed Endpoint

### create sample test instance

In [199]:
TEST_INSTANCE = {
    'collaborative': 'false',
    'album_name_pl': [
        "There's Really A Wolf", 'Late Nights: The Album','American Teen', 'Crazy In Love', 'Pony'
    ], 
    'artist_genres_pl': [
        "'hawaiian hip hop', 'rap'",
       "'chicago rap', 'dance pop', 'pop', 'pop rap', 'r&b', 'southern hip hop', 'trap', 'urban contemporary'",
       "'pop', 'pop r&b'", "'dance pop', 'pop', 'r&b'",
       "'chill r&b', 'pop', 'pop r&b', 'r&b', 'urban contemporary'"
    ], 
    'artist_name_pl': [
        'Russ', 'Jeremih', 'Khalid', 'Beyonc\xc3\xa9','William Singe'
    ], 
    'artist_pop_can': 82.0, 
    'description_pl': '', 
    'duration_ms_songs_pl': [
        237506.0, 217200.0, 219080.0, 226400.0, 121739.0
    ], 
    'n_songs_pl': 8.0, 
    'name': 'Lit Tunes ', 
    'num_albums_pl': 8.0, 
    'num_artists_pl': 8.0, 
    'track_name_pl': [
        'Losin Control', 'Paradise', 'Location','Crazy In Love - Remix', 'Pony'
    ], 
    'track_pop_pl': [
        79.0, 58.0, 83.0, 71.0, 57.0
    ],
    'duration_ms_seed_pl': 51023.1,
    'pid': 1,
    'track_uri_pl': [
        'spotify:track:4cxMGhkinTocPSVVKWIw0d',
        'spotify:track:1wNEBPo3nsbGCZRryI832I',
        'spotify:track:152lZdxL1OR0ZMW6KquMif',
        'spotify:track:2f4IuijXLxYOeBncS60GUD',
        'spotify:track:4Lj8paMFwyKTGfILLELVxt'
    ]
}

### make prediction request

In [200]:
# endpoint.predict(instances=[[TEST_INSTANCE, TEST_INSTANCE]])

In [202]:
start = time.process_time()

playlist_emb = endpoint.predict(instances=[TEST_INSTANCE])

print(f"neighbors retrieved in {round((time.process_time() - start),2)} seconds")
print(f"Vector Dimensions: {len(playlist_emb.predictions[0])}\n")

playlist_emb.predictions

neighbors retrieved in 0.12 seconds
Vector Dimensions: 128



[[0.0,
  0.0,
  0.4964520633220673,
  0.5604552030563354,
  0.0,
  0.5495439171791077,
  0.0,
  0.1598391383886337,
  0.0,
  0.8183088898658752,
  2.817295074462891,
  0.2966776490211487,
  0.165576845407486,
  0.5191221833229065,
  1.430994272232056,
  0.0,
  0.3241373300552368,
  0.0,
  0.0,
  0.5781844258308411,
  0.0,
  0.0,
  1.366185665130615,
  0.0,
  0.1283302456140518,
  0.165629118680954,
  0.0,
  1.512356758117676,
  0.0,
  0.0,
  0.8049782514572144,
  4.366167068481445,
  0.0,
  0.9294438362121582,
  0.1463523507118225,
  0.0,
  2.721257209777832,
  0.0,
  0.0,
  0.0,
  0.3107767999172211,
  0.0,
  0.0,
  0.0,
  0.08193261176347733,
  0.1614352613687515,
  0.0,
  1.387633681297302,
  1.532091498374939,
  0.0,
  0.0,
  0.0,
  2.248701333999634,
  0.0,
  0.09303939342498779,
  1.476220369338989,
  0.5754297971725464,
  2.056629180908203,
  0.0,
  0.7716501355171204,
  0.0,
  0.0826091468334198,
  1.439555644989014,
  0.0,
  1.09518039226532,
  0.0,
  0.0,
  0.0,
  0.900723338

### prediction latency

In [158]:
%%timeit
endpoint.predict(instances=[[TEST_INSTANCE, TEST_INSTANCE]])

4.06 s ± 419 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%%timeit
endpoint.predict(instances=[[TEST_INSTANCE, TEST_INSTANCE]])