This notebook deploys the ONNX model we obtained in the `onnx-conversion.ipynb` notebook to Vertex AI using [custom prediction routes (CPR)](https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/community/prediction/custom_prediction_routines/SDK_Pytorch_Custom_Predict.ipynb). 

## Constants

In [1]:
PROJECT_ID = "[GCP-PROJECT]"
PREDICTION_IMAGE_URI = f"gcr.io/{PROJECT_ID}/resnetv2"
BUCKET_NAME = "gs://[BUCKET-NAME]"
REGION = "us-central1"

## Copy over the initial artifacts

In [2]:
LOCAL_FOLDER = "onnx_deployment_files"
!mkdir -p {LOCAL_FOLDER}
!ls -lh {LOCAL_FOLDER}

total 0


In [3]:
!cp ilsvrc2012_wordnet_lemmas.txt {LOCAL_FOLDER}
!cp resnetv2101.onnx {LOCAL_FOLDER}

In [4]:
%%writefile requirements.txt

google-cloud-storage>=1.26.0,<2.0.0dev
google-cloud-aiplatform[prediction]>=1.16.0
onnxruntime==1.11.1
numpy==1.22.2
tensorflow>=2.5

Overwriting requirements.txt


## The predictor class

This is the meat of our deployment. We define all the logic needed to handle the request payload, run the ONNX model on it, and postprocess the predictions. 

Refer [here](https://github.com/googleapis/python-aiplatform/tree/custom-prediction-routine/google/cloud/aiplatform/prediction) to know more about this class and how Vertex AI's custom prediction routes are configured.

In [5]:
%%writefile {LOCAL_FOLDER}/predictor.py

import os
import pickle
from typing import Dict, List

import numpy as np
import onnxruntime as ort
import tensorflow as tf

from google.cloud.aiplatform.prediction.predictor import Predictor
from google.cloud.aiplatform.utils import prediction_utils


IMG_SIZE = 224


class ImgClassificationPredictor(Predictor):
    def __init__(self):
        self._onnx_path = "resnetv2101.onnx"
        self._labels_path = "ilsvrc2012_wordnet_lemmas.txt"

    def load(self, artifacts_uri: str):
        """Loads the model artifacts."""
        prediction_utils.download_model_artifacts(artifacts_uri)

        sess_options = ort.SessionOptions()
        sess_options.intra_op_num_threads = os.cpu_count()
        self._model_session = ort.InferenceSession(
            self._onnx_path, sess_options, providers=["CPUExecutionProvider"]
        )

        with open(self._labels_path, "r") as f:
            lines = f.readlines()
        self._imagenet_int_to_str = [line.rstrip() for line in lines]

    def preprocess_bytes(self, bytes_input) -> np.ndarray:
        bytes_input = tf.io.decode_base64(bytes_input)
        decoded = tf.io.decode_jpeg(bytes_input, channels=3)
        decoded = tf.image.convert_image_dtype(decoded, tf.float32)
        resized = tf.image.resize(decoded, size=(IMG_SIZE, IMG_SIZE))
        return resized

    def preprocess(self, prediction_input: Dict) -> np.ndarray:
        instances = prediction_input["instances"]
        decoded_images = tf.map_fn(
            self.preprocess_bytes,
            tf.constant(instances),
            dtype=tf.float32,
            back_prop=False,
        )
        return decoded_images.numpy()

    def predict(self, images: np.ndarray) -> List[str]:
        """Performs prediction."""
        predicted_labels = []
        logits = self._model_session.run(None, {"args_0": images})[0]

        for logit in logits:
            predicted_labels.append(self._imagenet_int_to_str[int(np.argmax(logits))])

        return predicted_labels

    def postprocess(self, prediction_results: Tuple) -> Dict[str, List[str]]:
        return {"predictions": prediction_results}

Writing onnx_deployment_files/predictor.py


## Copy over the new artifacts to our GCS bucket for remote predictions

In [6]:
!cp requirements.txt $LOCAL_FOLDER/requirements.txt
!ls -lh $LOCAL_FOLDER

total 171M
-rw-r--r-- 1 jupyter jupyter  22K Sep 20 03:59 ilsvrc2012_wordnet_lemmas.txt
-rw-r--r-- 1 jupyter jupyter 2.1K Sep 20 03:59 predictor.py
-rw-r--r-- 1 jupyter jupyter  134 Sep 20 03:59 requirements.txt
-rw-r--r-- 1 jupyter jupyter 171M Sep 20 03:59 resnetv2101.onnx


In [7]:
!gsutil cp {LOCAL_FOLDER}/ilsvrc2012_wordnet_lemmas.txt {BUCKET_NAME}/{LOCAL_FOLDER}/
!gsutil cp {LOCAL_FOLDER}/resnetv2101.onnx {BUCKET_NAME}/{LOCAL_FOLDER}/
!gsutil ls {BUCKET_NAME}/{LOCAL_FOLDER}/

Copying file://onnx_deployment_files/ilsvrc2012_wordnet_lemmas.txt [Content-Type=text/plain]...
/ [1 files][ 21.2 KiB/ 21.2 KiB]                                                
Operation completed over 1 objects/21.2 KiB.                                     
Copying file://onnx_deployment_files/resnetv2101.onnx [Content-Type=application/octet-stream]...
==> NOTE: You are uploading one or more large file(s), which would run          
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil 

In [8]:
# Remove the local artifacts.
!rm -rf {LOCAL_FOLDER}/resnetv2101.onnx
!rm -rf {LOCAL_FOLDER}/ilsvrc2012_wordnet_lemmas.txt 
!ls -lh {LOCAL_FOLDER}

total 8.0K
-rw-r--r-- 1 jupyter jupyter 2.1K Sep 20 03:59 predictor.py
-rw-r--r-- 1 jupyter jupyter  134 Sep 20 03:59 requirements.txt


## Build the Docker image

In [9]:
import os

from google.cloud.aiplatform.prediction import LocalModel
from onnx_deployment_files.predictor import (
    ImgClassificationPredictor,
)  # Update this path as the variable $USER_SRC_DIR to import the custom predictor.

local_model = LocalModel.build_cpr_model(
    LOCAL_FOLDER,
    PREDICTION_IMAGE_URI,
    base_image="python:3.8",
    predictor=ImgClassificationPredictor,
    requirements_path=os.path.join(LOCAL_FOLDER, "requirements.txt"),
)

In [10]:
local_model.get_serving_container_spec()

image_uri: "gcr.io/fast-ai-exploration/resnetv2"
predict_route: "/predict"
health_route: "/health"

## Copy over the model artifacts to a local directory for local predictions

In [11]:
LOCAL_MODEL_ARTIFACTS_DIR = "model_artifacts"
!mkdir -p {LOCAL_MODEL_ARTIFACTS_DIR}

!gsutil cp {BUCKET_NAME}/{LOCAL_FOLDER}/resnetv2101.onnx $LOCAL_MODEL_ARTIFACTS_DIR
!gsutil cp {BUCKET_NAME}/{LOCAL_FOLDER}/ilsvrc2012_wordnet_lemmas.txt $LOCAL_MODEL_ARTIFACTS_DIR

!ls {LOCAL_MODEL_ARTIFACTS_DIR}

Copying gs://ccd-ahm-2022/onnx_deployment_files/resnetv2101.onnx...
| [1 files][170.5 MiB/170.5 MiB]                                                
Operation completed over 1 objects/170.5 MiB.                                    
Copying gs://ccd-ahm-2022/onnx_deployment_files/ilsvrc2012_wordnet_lemmas.txt...
/ [1 files][ 21.2 KiB/ 21.2 KiB]                                                
Operation completed over 1 objects/21.2 KiB.                                     
ilsvrc2012_wordnet_lemmas.txt  resnetv2101.onnx


## Healthness check

In [12]:
with local_model.deploy_to_local_endpoint(
    artifact_uri=f"{LOCAL_MODEL_ARTIFACTS_DIR}",
) as local_endpoint:
    health_check_response = local_endpoint.run_health_check()

print(health_check_response, health_check_response.content)

<Response [200]> b'{}'


## Test the Docker image if it's running ok

In [13]:
# Create request payload

import base64
import json

with open("test.jpg", "rb") as f:
    data = f.read()
b64str = base64.urlsafe_b64encode(data).decode("utf-8")

instances = {"instances": [b64str]}
s = json.dumps(instances)
with open("instances.json", "w") as f:
    f.write(s)

### Test with a request body

In [14]:
with local_model.deploy_to_local_endpoint(
    artifact_uri=f"{LOCAL_MODEL_ARTIFACTS_DIR}",
) as local_endpoint:
    predict_response = local_endpoint.predict(
        request=s,
        headers={"Content-Type": "application/json"},
    )

    health_check_response = local_endpoint.run_health_check()
    
print(predict_response, predict_response.content)
print(health_check_response, health_check_response.content)

<Response [200]> b'{"predictions": ["yellow_lady\'s_slipper, yellow_lady-slipper, Cypripedium_calceolus, Cypripedium_parviflorum"]}'
<Response [200]> b'{}'


### Test with a request file

In [17]:
with local_model.deploy_to_local_endpoint(
    artifact_uri=f"{LOCAL_MODEL_ARTIFACTS_DIR}",
) as local_endpoint:
    predict_response = local_endpoint.predict(
        request_file="instances.json",
        headers={"Content-Type": "application/json"},
    )

    health_check_response = local_endpoint.run_health_check()
    
print(predict_response, predict_response.content)
print(health_check_response, health_check_response.content)

<Response [200]> b'{"predictions": ["yellow_lady\'s_slipper, yellow_lady-slipper, Cypripedium_calceolus, Cypripedium_parviflorum"]}'
<Response [200]> b'{}'


## Run predictions with the remote artifacts

In [19]:
with local_model.deploy_to_local_endpoint(
    artifact_uri=f"{BUCKET_NAME}/{LOCAL_FOLDER}",
) as local_endpoint:
    predict_response = local_endpoint.predict(
        request=s,
        headers={"Content-Type": "application/json"},
    )

    health_check_response = local_endpoint.run_health_check()
    
print(predict_response, predict_response.content)
print(health_check_response, health_check_response.content)

<Response [200]> b'{"predictions": ["yellow_lady\'s_slipper, yellow_lady-slipper, Cypripedium_calceolus, Cypripedium_parviflorum"]}'
<Response [200]> b'{}'


## Push the Docker image to GCI for deployment

In [20]:
local_model.push_image()

## Deploy the model!

In [21]:
from google.cloud import aiplatform

aiplatform.init(project=PROJECT_ID, location=REGION)

In [22]:
# Upload the model to Vertex AI (Model Registry)
MODEL_DISPLAY_NAME = "resnetv2101-onnx"

model = aiplatform.Model.upload(
    local_model=local_model,
    display_name=MODEL_DISPLAY_NAME,
    artifact_uri=f"{BUCKET_NAME}/{LOCAL_FOLDER}",
)

Creating Model
Create Model backing LRO: projects/29880397572/locations/us-central1/models/7113184922780565504/operations/2616277266774097920
Model created. Resource name: projects/29880397572/locations/us-central1/models/7113184922780565504@1
To use this Model in another session:
model = aiplatform.Model('projects/29880397572/locations/us-central1/models/7113184922780565504@1')


In [23]:
# Deploy the model to an endpoint
endpoint = model.deploy(
    machine_type="n1-standard-8",
    min_replica_count=1,
    max_replica_count=3,
    autoscaling_target_cpu_utilization=60
)

Creating Endpoint
Create Endpoint backing LRO: projects/29880397572/locations/us-central1/endpoints/5357585910617604096/operations/3160086921779085312
Endpoint created. Resource name: projects/29880397572/locations/us-central1/endpoints/5357585910617604096
To use this Endpoint in another session:
endpoint = aiplatform.Endpoint('projects/29880397572/locations/us-central1/endpoints/5357585910617604096')
Deploying model to Endpoint : projects/29880397572/locations/us-central1/endpoints/5357585910617604096
Deploy Endpoint model backing LRO: projects/29880397572/locations/us-central1/endpoints/5357585910617604096/operations/2891841268973830144
Endpoint model deployed. Resource name: projects/29880397572/locations/us-central1/endpoints/5357585910617604096


## Test the Endpoint

In [24]:
results = endpoint.predict(instances=[b64str])
results

Prediction(predictions=["yellow_lady's_slipper, yellow_lady-slipper, Cypripedium_calceolus, Cypripedium_parviflorum"], deployed_model_id='1939454948513153024', model_version_id='1', model_resource_name='projects/29880397572/locations/us-central1/models/7113184922780565504', explanations=None)

In [25]:
ENDPOINT_ID = endpoint.name

! curl \
    -H "Authorization: Bearer $(gcloud auth print-access-token)" \
    -H "Content-Type: application/json" \
    -d @instances.json \
    https://{REGION}-aiplatform.googleapis.com/v1/projects/{PROJECT_ID}/locations/{REGION}/endpoints/{ENDPOINT_ID}:predict

{
  "predictions": [
    "yellow_lady's_slipper, yellow_lady-slipper, Cypripedium_calceolus, Cypripedium_parviflorum"
  ],
  "deployedModelId": "1939454948513153024",
  "model": "projects/29880397572/locations/us-central1/models/7113184922780565504",
  "modelDisplayName": "resnetv2101-onnx",
  "modelVersionId": "1"
}


## Clean up

In [26]:
# Undeploy model and delete endpoint
endpoint.delete(force=True)

# Delete the model resource
model.delete()

!gcloud container images delete $PREDICTION_IMAGE_URI --quiet

# !gsutil rm -r $BUCKET_NAME

Undeploying Endpoint model: projects/29880397572/locations/us-central1/endpoints/5357585910617604096
Undeploy Endpoint model backing LRO: projects/29880397572/locations/us-central1/endpoints/5357585910617604096/operations/4624882700581339136
Endpoint model undeployed. Resource name: projects/29880397572/locations/us-central1/endpoints/5357585910617604096
Deleting Endpoint : projects/29880397572/locations/us-central1/endpoints/5357585910617604096
Delete Endpoint  backing LRO: projects/29880397572/locations/us-central1/operations/5177699554841067520
Endpoint deleted. . Resource name: projects/29880397572/locations/us-central1/endpoints/5357585910617604096
Deleting Model : projects/29880397572/locations/us-central1/models/7113184922780565504
Delete Model  backing LRO: projects/29880397572/locations/us-central1/operations/5050472865367851008
Model deleted. . Resource name: projects/29880397572/locations/us-central1/models/7113184922780565504
Digests:
- gcr.io/fast-ai-exploration/resnetv2@s