In [17]:
# !pip install google-cloud-aiplatform[prediction]>=1.16.0 fastapi nvtabular git+https://github.com/NVIDIA-Merlin/models.git --user

# Buidling a custom Vertex AI endpoint for Merlin Query Tower

**IMPORTANT** Make sure you are running this notebook in a DLVM (e.g. tensorflow enterprise 2.8) to build the image

________
**This will not work in the training container**
________

Your output should look like this - you are going to use the query model endpoint to create a custom container

![](img/merlin-bucket.png)


In [3]:
from datetime import datetime


PROJECT = 'wortz-project-352116'  # <--- TODO: CHANGE THIS
LOCATION = 'us-central1'
REGION = 'us-central1'
# path = 'gs://two-tower-models' #TODO change to your model directory
BUCKET = 'gs://spotify-jsw-mpd-2023'
REPOSITORY = 'merlin-spotify-cpr'
ARTIFACT_URI = f'{BUCKET}'
MODEL_DIR = f'{BUCKET}/query_model_merlin'
PREFIX = 'merlin-spotify'

### Run one time to locally copy the workflow for the image

In [8]:
# !mkdir workflow
# !gsutil cp -r $BUCKET/workflow workflow

### Set up repo and configure Docker (one-time)

In [9]:
# Create the repo if needed for the artifacts

! gcloud beta artifacts repositories create {REPOSITORY} \
    --repository-format=docker \
    --location=$REGION

[1;31mERROR:[0m (gcloud.beta.artifacts.repositories.create) ALREADY_EXISTS: the repository already exists


In [10]:
! gcloud auth configure-docker {REGION}-docker.pkg.dev --quiet


{
  "credHelpers": {
    "gcr.io": "gcloud",
    "us.gcr.io": "gcloud",
    "eu.gcr.io": "gcloud",
    "asia.gcr.io": "gcloud",
    "staging-k8s.gcr.io": "gcloud",
    "marketplace.gcr.io": "gcloud",
    "us-central1-docker.pkg.dev": "gcloud"
  }
}
Adding credentials for: us-central1-docker.pkg.dev
gcloud credential helpers already registered correctly.


In [11]:
! rm -rf app
! mkdir app

### Dependency file
The first few are for the server handling traffic
Nvtabular was downgraded for this example, it may not be necessary in future versions

In [12]:
%%writefile app/requirements.txt
uvicorn[standard]==0.15.0
gunicorn==20.1.0
fastapi==0.68.1
google-cloud-aiplatform
merlin-models
nvtabular
gcsfs
google-cloud-storage

Writing app/requirements.txt


### Predictor module and class
This is an adaptation of the CPR examples

This was locally tested and created shortly after model creation in the [tensorflow-predict](tensorflow-predict.ipynb) notebook

In [13]:
%%writefile app/dataset_to_tensors.py

try:
    import cudf
except ImportError:
    cudf = None
import pandas as pd
import tensorflow as tf
from typing import Dict
from merlin.io import Dataset
import itertools


def cupy_array_to_tensor(array):
    return tf.experimental.dlpack.from_dlpack(array.reshape(-1, 1).toDlpack())

def numpy_array_to_tensor(array):
    return tf.convert_to_tensor(array.reshape(-1, 1))

def cudf_series_to_tensor(col) -> tf.Tensor:
    "Convert a cudf.Series to a TensorFlow Tensor with DLPack"
    if isinstance(col.dtype, cudf.ListDtype):
        values = col.list.leaves.values
        offsets = col.list._column.offsets.values
        row_lengths = offsets[1:] - offsets[:-1]
        return cupy_array_to_tensor(values), cupy_array_to_tensor(row_lengths)
    else:
        return cupy_array_to_tensor(col.values)

def pandas_series_to_tensor(col) -> tf.Tensor:
    if len(col) and pd.api.types.is_list_like(col.values[0]):
        values = pd.Series(itertools.chain(*col)).values
        row_lengths = col.map(len).values
        return numpy_array_to_tensor(values), numpy_array_to_tensor(row_lengths)
    else:
        return numpy_array_to_tensor(col.values)
        
    
def dataset_to_tensors(dataset: Dataset) -> Dict[str, tf.Tensor]:
    """Convert a DataFrame to Dict of Tensors"""
    df = dataset.to_ddf().compute()
    if isinstance(df, pd.DataFrame):
        col_to_tensor = pandas_series_to_tensor
    else:
        col_to_tensor = cudf_series_to_tensor
    return {
        column: col_to_tensor(df[column])
        for column in df.columns
    }

Writing app/dataset_to_tensors.py


#### Get the workflow if needed

In [14]:
!mkdir workflow
!gsutil cp -r gs://spotify-jsw-mpd-2023/workflow/2t-spotify-workflow/* workflow/

mkdir: cannot create directory ‘workflow’: File exists
Copying gs://spotify-jsw-mpd-2023/workflow/2t-spotify-workflow/metadata.json...
Copying gs://spotify-jsw-mpd-2023/workflow/2t-spotify-workflow/workflow.pkl...  
Copying gs://spotify-jsw-mpd-2023/workflow/2t-spotify-workflow/categories/unique.album_name_pl.parquet...
Copying gs://spotify-jsw-mpd-2023/workflow/2t-spotify-workflow/categories/unique.artist_genres_can.parquet...
/ [4 files][ 16.8 MiB/ 16.8 MiB]                                                
==> NOTE: You are performing a sequence of gsutil operations that may
run significantly faster if you instead use gsutil -m cp ... Please
see the -m section under "gsutil help options" for further information
about when gsutil -m can be advantageous.

Copying gs://spotify-jsw-mpd-2023/workflow/2t-spotify-workflow/categories/unique.artist_genres_pl.parquet...
Copying gs://spotify-jsw-mpd-2023/workflow/2t-spotify-workflow/categories/unique.artist_name_can.parquet...
Copying gs://spoti

In [15]:
%%writefile app/predictor.py
# These are helper functions that ensure the dictionary input is in a certain order and types are preserved
# this is to get scalar values to appear first in the dict to not confuse pandas with lists https://github.com/pandas-dev/pandas/issues/46092
import nvtabular as nvt
import pandas as pd
import os
import json
from typing import Dict

import tensorflow as tf
import time
import logging
from merlin.models.tf.loader import Loader
from dataset_to_tensors import *
from merlin.table.tensor_table import TensorTable
from merlin.table.tensorflow_column import TensorflowColumn

from merlin.table.conversions import convert_col, df_from_tensor_table, tensor_table_from_df
from merlin.table import TensorTable, df_from_tensor_table
from merlin.core.dispatch import df_from_dict, dict_from_df

reordered_keys = [
    'pl_collaborative_src', 
    'album_name_pl', 
    'artist_genres_pl', 
    'artist_name_pl', 
    'artist_pop_can', 
    'num_pl_songs_new', 
    'pl_name_src', 
    'num_pl_albums_new', 
    'num_pl_artists_new', 
    'track_name_pl', 
    'track_pop_can',
    'artist_pop_can',
    'artist_followers_can'
    'pl_duration_ms_new', 
    'pid', 
    'track_uri_pl'
]

float_num_fix = ['n_songs_pl','num_albums_pl','num_artists_pl','duration_ms_seed_pl']
float_list_fix = ['track_pop_pl', 'duration_ms_songs_pl']
    
def fix_list_num_dtypes(num_list):
    "this fixes lists of ints to list of floats converted in json input"
    return [float(x) for x in num_list]

def fix_num_dtypes(num):
    "this fixes ints and casts to floats"
    return float(num)

def fix_types(k, v):
    if k in float_num_fix:
        return fix_num_dtypes(v)
    if k in float_list_fix:
        return fix_list_num_dtypes(v)
    else:
        return v

def create_pandas_instance(inputs):
    """
    Helper function to reorder the input to have a sclar first for pandas
    And fix the types converted when data is imported by fastAPI
    """
    if type(inputs) == list:
        header = inputs[0]
        reordered_header_dict = {k: fix_types(k,header[k]) for k in reordered_keys}
        pandas_instance = pd.DataFrame.from_dict(reordered_header_dict, orient='index').T
        if len(inputs) > 1:
            for ti in inputs[1:]:
                reordered_dict = {k: fix_types(k,ti[k]) for k in reordered_keys}
                pandas_instance = pandas_instance.append(pd.DataFrame.from_dict(reordered_dict, orient='index').T)
    else:
        reordered_dict = {k: fix_types(k,inputs[k]) for k in reordered_keys}
        pandas_instance = pd.DataFrame.from_dict(reordered_dict, orient='index').T
    return pandas_instance


def table_to_tensors(table: TensorTable, schema=None) -> Dict[str, tf.Tensor]:
    
    for col in table.columns:
        if schema and not schema[col].is_ragged:
            values = table[col].values
            offsets = table[col].offsets
            if offsets is None:
                continue

            row_lengths = offsets[1:] - offsets[:-1]
            if not all(row_lengths == row_lengths[0]):
                raise ValueError(
                        f"ColumnSchema for list column '{col}' describes a fixed size list. "
                        "Found a ragged list output. If this dataframe contains a ragged list, "
                        "Please check the 'schema' has a column shape defined to reflect this. "
                    )
            values_list = values.reshape((len(row_lengths), int(row_lengths[0])) + values.shape[1:])
            column_type =type(table[col])
            table[col] = column_type(values_list)

    
    # convert columns from array to TensorFlow types
    for column in table.columns:
        table[column] = convert_col(table[column], TensorflowColumn)

    # convert TensorTable to dictionary
    return table.to_dict()
    


class Predictor():
    """Interface of the Predictor class for Custom Prediction Routines.
    The Predictor is responsible for the ML logic for processing a prediction request.
    Specifically, the Predictor must define:
    (1) How to load all model artifacts used during prediction into memory.
    (2) The logic that should be executed at predict time.
    When using the default PredictionHandler, the Predictor will be invoked as follows:
      predictor.postprocess(predictor.predict(predictor.preprocess(prediction_input)))
    """
    def __init__(self):
        return
    
    def load(self, artifacts_uri):
        """Loads the model artifact.
        Args:
            artifacts_uri (str):
                Required. The value of the environment variable AIP_STORAGE_URI.
        """
        logging.info("loading model and workflow")
        start_init = time.process_time()
        
        #test_bucket = 'gs://jt-merlin-scaling'
        test_bucket = '/workspace/google'
        self.model = tf.keras.models.load_model(os.path.join(artifacts_uri, "query-tower"))
        # self.workflow = nvt.Workflow.load(os.path.join(artifacts_uri, "workflow/2t-spotify-workflow")) # TODO: parameterize
        self.workflow = nvt.Workflow.load(os.path.join(test_bucket, "train_transformed"))
        # self.workflow = nvt.Workflow.load('gs://jt-merlin-scaling/nvt-last5-v1full/nvt-analyzed') # TODO: parametrize
        self.workflow = self.workflow.remove_inputs(
            [
                'track_pop_can', 
                'track_uri_can', 
                'duration_ms_can', 
                'track_name_can', 
                'artist_name_can',
                'album_name_can',
                'album_uri_can',
                'artist_followers_can', 
                'artist_genres_can',
                'artist_name_can', 
                'artist_pop_can',
                'artist_pop_pl',
                'artist_uri_can', 
                'artists_followers_pl'
            ]
        )

        self.loader = None # will load this after first load
        self.n_rows = 0
        #logging.info(f"loading took {time.process_time() - start_init} seconds")
        print(f"loading took {time.process_time() - start_init} seconds")
        
        return self
        
    def predict(self, prediction_input):
        """Preprocesses the prediction input before doing the prediction.
        Args:
            prediction_input (Any):
                Required. The prediction input that needs to be preprocessed.
        Returns:
            The preprocessed prediction input.
        """
        # handle different input types, can take a dict or list of dicts
        self.n_rows = len(prediction_input)
        start = time.process_time()
        cudf_instance = cudf.from_pandas(create_pandas_instance(prediction_input))
        print(f"Pandas conversion took {time.process_time() - start} seconds")
        start = time.process_time()
        transformed_instance = self.workflow.transform(cudf_instance)
        print(f"Workflow transformation took {time.process_time() - start} seconds")

    # def predict(self, instances):
        """Performs prediction.
        Args:
            instances (Any):
                Required. The instance(s) used for performing prediction.
        Returns:
            Prediction results.
        """  
        start = time.process_time()

        table = TensorTable.from_df(transformed_instance)
        batch = table_to_tensors(table, schema=user_schema)
        print(f"converting to dict_tensors took {time.process_time() - start} seconds")
        start = time.process_time()
        output = self.model(batch)
        print(f"Generating query embeddings took {time.process_time() - start} seconds")
        return transformed_instance, output, batch

Writing app/predictor.py


In [16]:
%%writefile app/main.py
from fastapi import FastAPI, Request

import json
import numpy as np
import os
import logging


from google.cloud import storage
from predictor import Predictor

app = FastAPI()

predictor_instance = Predictor()
loaded_predictor = predictor_instance.load(artifacts_uri = os.environ['AIP_STORAGE_URI'])

@app.get(os.environ['AIP_HEALTH_ROUTE'], status_code=200)
def health():
    return {}


@app.post(os.environ['AIP_PREDICT_ROUTE'])
async def predict(request: Request):
    body = await request.json()
    instances = body["instances"]
    outputs = loaded_predictor.predict(instances)

    return {"predictions": outputs[1].numpy().tolist()}

Writing app/main.py


In [17]:
%%writefile app/prestart.sh
#!/bin/bash
export PORT=$AIP_HTTP_PORT

Writing app/prestart.sh


In [18]:
#make it a package
!touch app/__init__.py

In [19]:
%%writefile Dockerfile

FROM nvcr.io/nvidia/merlin/merlin-tensorflow:23.04
WORKDIR /app 

COPY ./app/requirements.txt /requirements.txt
RUN pip install -r /requirements.txt
#DEBUG CHANGES!!
RUN mkdir /workflow
# RUN mkdir /docker_model
# ADD local_model /docker_model
ADD workflow /workflow
#END DEBUG!


COPY ./app /app
EXPOSE 80
    
CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port $AIP_HTTP_PORT"]

Overwriting Dockerfile


In [20]:
SERVER_IMAGE = "merlin-prediction-cpr"  # @param {type:"string"} 
REMOTE_IMAGE_NAME=f"{REGION}-docker.pkg.dev/{PROJECT}/{REPOSITORY}/{SERVER_IMAGE}"

# !docker build -t $REMOTE_IMAGE_NAME .
!gcloud builds submit -t $REMOTE_IMAGE_NAME .

Creating temporary tarball archive of 30 file(s) totalling 566.9 MiB before compression.
^C


Command killed by keyboard interrupt



#### If you are debugging, be sure to set `-d` detached flag off and run the commands in console

us-central1-docker.pkg.dev/wortz-project-352116/merlin-spotify-cpr/merlin-prediction-cp### Copy/paste if you want to run from console for testing
```python
docker run --gpus all -p 80:8080 \
            --name=merlin-prediction-cpr \
            -e AIP_HTTP_PORT=8080 \
            -e AIP_HEALTH_ROUTE=/health \
            -e AIP_PREDICT_ROUTE=/predict \
            -e AIP_STORAGE_URI=gs://spotify-jsw-mpd-2023 \
            us-central1-docker.pkg.dev/wortz-project-352116/merlin-spotify-cpr/merlin-prediction-cpr
```

##### No GPU:
```python
docker run -p 80:8080 \
            --name=merlin-prediction-cpr \
            -e AIP_HTTP_PORT=8080 \
            -e AIP_HEALTH_ROUTE=/health \
            -e AIP_PREDICT_ROUTE=/predict \
            -e AIP_STORAGE_URI=gs://spotify-jsw-mpd-2023 \
            us-central1-docker.pkg.dev/wortz-project-352116/merlin-spotify-cpr/merlin-prediction-cpr
```

#### Test the health route

In [21]:
!docker kill merlin-prediction-cpr
!docker rm merlin-prediction-cpr

^C
Error response from daemon: You cannot remove a running container 0ec8bd5a4f81fcdb74398d428b1f0777c2c4afed8608d38c1ed6fb038824a967. Stop the container before attempting removal or force remove


In [307]:
! curl localhost/health

{}

In [353]:
## Ground truth candidate:
    # 'album_uri_can': 'spotify:album:5l83t3mbVgCrIe1VU9uJZR', 
    # 'artist_name_can': 'Russ', 
    # 'track_name_can': 'We Just Havent Met Yet', 
## TODO - we have to overload with candidate data because of the workflow transform, add overloaded values in the predictor
TEST_INSTANCE = {
  "pl_collaborative_src": "false",
  "album_name_pl": ["Kind Of Blue (Legacy Edition)", "Duke Ellington \u0026 John Coltrane", "The Genius Of Charlie Parker #2: April In Paris", "Good Night, And Good Luck", "Question and Answer"],
  "artist_genres_pl": ["[\u0027cool jazz\u0027, \u0027hard bop\u0027, \u0027jazz\u0027, \u0027jazz fusion\u0027, \u0027jazz trumpet\u0027, \u0027uk contemporary jazz\u0027]", "[\u0027african-american classical\u0027, \u0027big band\u0027, \u0027harlem renaissance\u0027, \u0027jazz\u0027, \u0027jazz piano\u0027, \u0027swing\u0027]", "[\u0027bebop\u0027, \u0027big band\u0027, \u0027cool jazz\u0027, \u0027jazz\u0027, \u0027jazz saxophone\u0027]", "[\u0027contemporary vocal jazz\u0027, \u0027vocal jazz\u0027]", "[\u0027avant-garde jazz\u0027, \u0027contemporary jazz\u0027, \u0027contemporary post-bop\u0027, \u0027jazz\u0027, \u0027jazz fusion\u0027, \u0027jazz guitar\u0027]"],
  "artist_name_pl": ["Miles Davis", "Duke Ellington", "Charlie Parker", "Dianne Reeves", "Pat Metheny"],
  "artist_pop_can": "53.0",
  "num_pl_songs_new": "12.0",
  "pl_name_src": "Jazz Standards",
  "num_pl_albums_new": "11.0",
  "num_pl_artists_new": "7.0",
  "track_name_pl": ["Blue in Green", "In A Sentimental Mood", "April In Paris", "How High The Moon", "All the Things You Are"],
  "track_pop_can": "22.0",
  "artist_pop_can_1": "53.0",
  "artist_followers_can": "313024.0",
  "pl_duration_ms_new": "2968084.0",
  "pid": "476475",
  "track_uri_pl": ["spotify:track:0aWMVrwxPNYkKmFthzmpRi", "spotify:track:0PrGgNDwfJPNXADJYROvBw", "spotify:track:5rPMbUxXRXvWu89k0n6Sxj", "spotify:track:3DRL2sPYVbx87ArfP2TBqD", "spotify:track:7rYSSGZShi5Zgde60MQAMx"]
}

In [354]:
# Test with an ordered list input (no dictionary)

TEST_INSTANCE_LIST = [TEST_INSTANCE[k] for k in TEST_INSTANCE.keys()]
TEST_INSTANCE_LIST

[1,
 'Lit Tunes ',
 'false',
 237506.0,
 ['Russ', 'Jeremih', 'Khalid', 'BeyoncÃ©', 'William Singe'],
 ['spotify:track:4cxMGhkinTocPSVVKWIw0d',
  'spotify:track:1wNEBPo3nsbGCZRryI832I',
  'spotify:track:152lZdxL1OR0ZMW6KquMif',
  'spotify:track:2f4IuijXLxYOeBncS60GUD',
  'spotify:track:4Lj8paMFwyKTGfILLELVxt'],
 ['Losin Control', 'Paradise', 'Location', 'Crazy In Love - Remix', 'Pony'],
 ["There's Really A Wolf",
  'Late Nights: The Album',
  'American Teen',
  'Crazy In Love',
  'Pony']]

In [355]:
import json
import requests

json_instance = json.dumps({"instances": [TEST_INSTANCE]})
print(json_instance)

{"instances": [{"pid": 1, "pl_name_src": "Lit Tunes ", "pl_collaborative_src": "false", "pl_duration_ms_new": 237506.0, "artist_name_pl": ["Russ", "Jeremih", "Khalid", "Beyonc\u00c3\u00a9", "William Singe"], "track_uri_pl": ["spotify:track:4cxMGhkinTocPSVVKWIw0d", "spotify:track:1wNEBPo3nsbGCZRryI832I", "spotify:track:152lZdxL1OR0ZMW6KquMif", "spotify:track:2f4IuijXLxYOeBncS60GUD", "spotify:track:4Lj8paMFwyKTGfILLELVxt"], "track_name_pl": ["Losin Control", "Paradise", "Location", "Crazy In Love - Remix", "Pony"], "album_name_pl": ["There's Really A Wolf", "Late Nights: The Album", "American Teen", "Crazy In Love", "Pony"]}]}


### Test the predict route

In [324]:
%%time
requests.post('http://localhost/predict', data=json_instance).json()

CPU times: user 2.94 ms, sys: 2.24 ms, total: 5.18 ms
Wall time: 427 ms


{'predictions': [[0.0,
   0.0,
   0.0,
   2.197338342666626,
   0.0,
   0.6343151926994324,
   0.0,
   0.0,
   0.8681452870368958,
   0.082596555352211,
   0.06858403980731964,
   0.8690858483314514,
   0.6754541397094727,
   0.04301293566823006,
   0.0,
   0.0,
   0.0,
   0.11446070671081543,
   3.286738872528076,
   0.0,
   0.0,
   2.3397533893585205,
   0.0,
   1.9221959114074707,
   0.0,
   0.0,
   0.4394824206829071,
   0.0,
   0.8329581618309021,
   0.0,
   0.4763728380203247,
   0.0,
   0.0,
   1.2817498445510864,
   0.0,
   0.0,
   2.144164800643921,
   0.0,
   0.715079665184021,
   0.0,
   0.0,
   0.4205787777900696,
   0.0,
   0.9302131533622742,
   1.045517086982727,
   0.0,
   0.4166524410247803,
   0.0,
   1.2028098106384277,
   0.0,
   0.03395405411720276,
   0.4649958908557892,
   2.633600950241089,
   0.8353680372238159,
   0.0,
   0.6172369122505188,
   0.014025628566741943,
   0.04429948329925537,
   0.0,
   0.0,
   2.771533966064453,
   0.0,
   0.0,
   0.0,
   0.0,
 

## Stop the images if they are running

In [325]:
! docker stop $SERVER_IMAGE
! docker rm $SERVER_IMAGE

merlin-prediction-cpr
merlin-prediction-cpr


### Push the container once ready and testing is complete

In [380]:
# ### push the container to registry
!docker push $REMOTE_IMAGE_NAME

Using default tag: latest
The push refers to repository [us-central1-docker.pkg.dev/wortz-project-352116/merlin-spotify-cpr/merlin-prediction-cpr]

[1Bba9615c9: Preparing 
[1B018f1149: Preparing 
[1B0f3c1a79: Preparing 
[1B0a88f4b4: Preparing 
[1B625d9539: Preparing 
[1Bbd45f47a: Preparing 
[1B959e4900: Preparing 
[1Bed4c0b88: Preparing 
[1B435e7569: Preparing 
[1B6c9c8a80: Preparing 
[1B7ee6b1e6: Preparing 
[1B14522996: Preparing 
[1B02fe8fdf: Preparing 
[1Bf1d8a22e: Preparing 
[1B7f811af3: Preparing 
[1Bfa2a6f77: Preparing 
[1B99f75613: Preparing 
[1Bf314ccd3: Preparing 
[1Bbe9e41cc: Preparing 
[1B042e77d8: Preparing 
[1Bb1adf810: Preparing 
[1B5d6e3dc6: Preparing 
[1B5dfe406a: Preparing 
[1B3f5c8bb1: Preparing 
[1B24cd08b1: Preparing 
[1Bf287408a: Preparing 
[1B5fc4d7d6: Preparing 
[1Ba50a1b1f: Preparing 
[1B13e97a8a: Preparing 
[1Bad3d21ac: Preparing 
[1B09a62824: Preparing 
[1B9a9dae8e: Preparing 
[1Bb713171c: Preparing 
[1B1bd92e98: Preparing 
[

### Deploy to Vertex AI

After the serving metadata is set below, the model is properly abstracted for use on Vertex

In [394]:
MODEL_DISPLAY_NAME = "Merlin Spotify Query Tower Model - Workflow Local V2"
from google.cloud import aiplatform

ARTIFACT_URI = BUCKET

model = aiplatform.Model.upload(
        display_name=MODEL_DISPLAY_NAME,
        artifact_uri=ARTIFACT_URI,
        serving_container_image_uri=REMOTE_IMAGE_NAME,
        serving_container_predict_route='/predict',
        serving_container_health_route='/health',
        serving_container_command=["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port $AIP_HTTP_PORT"],
        serving_container_args=["--gpus all"],
        sync=True,
    )

Creating Model
Create Model backing LRO: projects/679926387543/locations/us-central1/models/3019815282756550656/operations/5879471097606307840
Model created. Resource name: projects/679926387543/locations/us-central1/models/3019815282756550656@1
To use this Model in another session:
model = aiplatform.Model('projects/679926387543/locations/us-central1/models/3019815282756550656@1')


In [395]:
endpoint = model.deploy(machine_type="n1-standard-8",
                        accelerator_type="NVIDIA_TESLA_T4",
                        accelerator_count=1)

Creating Endpoint
Create Endpoint backing LRO: projects/679926387543/locations/us-central1/endpoints/7469831310459011072/operations/7505270563087056896
Endpoint created. Resource name: projects/679926387543/locations/us-central1/endpoints/7469831310459011072
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/679926387543/locations/us-central1/endpoints/7469831310459011072')
Deploying model to Endpoint : projects/679926387543/locations/us-central1/endpoints/7469831310459011072
Deploy Endpoint model backing LRO: projects/679926387543/locations/us-central1/endpoints/7469831310459011072/operations/1230630382253113344
Endpoint model deployed. Resource name: projects/679926387543/locations/us-central1/endpoints/7469831310459011072


In [396]:
#make a prediction

endpoint.predict(instances=[TEST_INSTANCE_LIST])

Prediction(predictions=[[0.0, 0.0, 0.0, 2.197338342666626, 0.0, 0.6343151926994324, 0.0, 0.0, 0.8681452870368958, 0.082596555352211, 0.06858403980731964, 0.8690858483314514, 0.6754541397094727, 0.04301293566823006, 0.0, 0.0, 0.0, 0.1144607067108154, 3.286738872528076, 0.0, 0.0, 2.339753389358521, 0.0, 1.922195911407471, 0.0, 0.0, 0.4394824206829071, 0.0, 0.8329581618309021, 0.0, 0.4763728380203247, 0.0, 0.0, 1.281749844551086, 0.0, 0.0, 2.144164800643921, 0.0, 0.715079665184021, 0.0, 0.0, 0.4205787777900696, 0.0, 0.9302131533622742, 1.045517086982727, 0.0, 0.4166524410247803, 0.0, 1.202809810638428, 0.0, 0.03395405411720276, 0.4649958908557892, 2.633600950241089, 0.8353680372238159, 0.0, 0.6172369122505188, 0.01402562856674194, 0.04429948329925537, 0.0, 0.0, 2.771533966064453, 0.0, 0.0, 0.0, 0.0, 1.748955368995667, 0.0, 1.313991546630859, 0.0, 0.0, 0.0, 0.3788366615772247, 0.3188453912734985, 0.0, 0.0, 0.04184942319989204, 1.234071731567383, 0.8302457332611084, 0.4515600502490997, 0.36

## Finished - now go on to the next notebook to create a [matching engine notebook](03-matching-engine.ipynb) and test out the first end to end recommendation

Be sure to use the output of the endpoint logs above to save the endpoint for use in the matching engine notebook

e.g.:

```python
INFO:google.cloud.aiplatform.models:To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/934903580331/locations/us-central1/endpoints/494907775848022016')
```

### Bonus content - Batch Predictions

![](img/batch-predict-output.png)

In [387]:
batch_prediction_job = model.batch_predict(
    job_display_name=f"batch predictions for merlin query tower",
    machine_type="n1-standard-4",
    accelerator_type="NVIDIA_TESLA_T4",
    accelerator_count=1,
    #bigquery as input and output
    instances_format="bigquery",
    predictions_format="bigquery",
    bigquery_source=f'bq://{PROJECT}.spotify_e2e_test.validtion_batch_prediction_test',
    bigquery_destination_prefix=f'bq://{PROJECT}.spotify_e2e_test.batch_validation_results',
)

Creating BatchPredictionJob
BatchPredictionJob created. Resource name: projects/679926387543/locations/us-central1/batchPredictionJobs/2698619761297719296
To use this BatchPredictionJob in another session:
bpj = aiplatform.BatchPredictionJob('projects/679926387543/locations/us-central1/batchPredictionJobs/2698619761297719296')
View Batch Prediction Job:
https://console.cloud.google.com/ai/platform/locations/us-central1/batch-predictions/2698619761297719296?project=679926387543
BatchPredictionJob projects/679926387543/locations/us-central1/batchPredictionJobs/2698619761297719296 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/679926387543/locations/us-central1/batchPredictionJobs/2698619761297719296 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/679926387543/locations/us-central1/batchPredictionJobs/2698619761297719296 current state:
JobState.JOB_STATE_RUNNING
BatchPredictionJob projects/679926387543/locations/us-central1/batchPredictionJobs/

### Timing analysis

In [390]:
%%timeit
endpoint.predict(instances=[TEST_INSTANCE])

514 ms ± 11.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [393]:
%%timeit
endpoint.predict(instances=[TEST_INSTANCE, TEST_INSTANCE])

489 ms ± 11.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [76]:
endpoint.undeploy_all()

Undeploying Endpoint model: projects/934903580331/locations/us-central1/endpoints/4736674102226452480
Undeploy Endpoint model backing LRO: projects/934903580331/locations/us-central1/endpoints/4736674102226452480/operations/6606761755496415232
Endpoint model undeployed. Resource name: projects/934903580331/locations/us-central1/endpoints/4736674102226452480


<google.cloud.aiplatform.models.Endpoint object at 0x7f0d1434ad10> 
resource name: projects/934903580331/locations/us-central1/endpoints/4736674102226452480