## Imports

In [1]:
import google.auth # pip install -U google-auth
from google.auth.transport.requests import AuthorizedSession

In [2]:
import tensorflow as tf 
import base64
import json

## Set up variables

In [3]:
PROJECT_ID = "GCP-PROJECT-ID"
REGION = "us-central1"
ENDPOINT_ID = "ENDPOINT-ID"

## Obtain a authorized session to query the Endpoint

In [4]:
credentials, _ = google.auth.default()
service_endpoint = f"https://{REGION}-aiplatform.googleapis.com"
authed_session = AuthorizedSession(credentials)

In [5]:
url = "{}/v1/projects/{}/locations/{}/endpoints/{}:predict".format(
    service_endpoint, PROJECT_ID, REGION, ENDPOINT_ID
)
print("Endpoint: ", url)

Endpoint:  https://us-central1-aiplatform.googleapis.com/v1/projects/fast-ai-exploration/locations/us-central1/endpoints/8308780672684654592:predict


## Obtain an image for prediction

In [6]:
image_path = tf.keras.utils.get_file(
    "image.jpg", "http://images.cocodataset.org/val2017/000000039769.jpg"
)

with open(image_path, "rb") as f:
    image = f.read()

## Prepare request payloads

In [7]:
serving_input = "string_input"
single_instance_request_body = {
    "instances": [{serving_input: {"b64": base64.b64encode(image).decode("utf-8")}}]
}
two_instances_request_body = {
    "instances": [
        {serving_input: {"b64": base64.b64encode(image).decode("utf-8")}}
        for _ in range(2)
    ]
}

Here we're preparing two kinds of request payloads:

* One that will contain a single image instance
* Another one will contain two images instances

This is just to show you that it's possible to send multiple instances to a Vertex AI Endpoint for prediction as long as you're within in the [request size limit](https://cloud.google.com/vertex-ai/docs/predictions/online-predictions-custom-models?&_ga=2.247311684.-1508917096.1630288038#send_an_online_prediction_request). 

## Make requests

In [8]:
# Single instance.
response = authed_session.post(url, data=json.dumps(single_instance_request_body))
print(response)
print(response.content)

<Response [200]>
b'{\n  "predictions": [\n    {\n      "confidence": 0.896659553,\n      "label": "Egyptian cat"\n    }\n  ],\n  "deployedModelId": "7779233882515177472",\n  "model": "projects/29880397572/locations/us-central1/models/4024718333008936960",\n  "modelDisplayName": "ViT Base TF2.8 GPU model",\n  "modelVersionId": "1"\n}\n'


In [9]:
# Two instances.
response = authed_session.post(url, data=json.dumps(two_instances_request_body))
print(response)
print(response.content)

<Response [200]>
b'{\n  "predictions": [\n    {\n      "label": "Egyptian cat",\n      "confidence": 0.896659374\n    },\n    {\n      "confidence": 0.896659374,\n      "label": "Egyptian cat"\n    }\n  ],\n  "deployedModelId": "7779233882515177472",\n  "model": "projects/29880397572/locations/us-central1/models/4024718333008936960",\n  "modelDisplayName": "ViT Base TF2.8 GPU model",\n  "modelVersionId": "1"\n}\n'


## (Optional) Serialize the request payloads for later use

In [10]:
with open("single-instance.json", "w") as f:
    json.dump(single_instance_request_body, f)


with open("two-instances.json", "w") as f:
    json.dump(two_instances_request_body, f)

## Load a serialized request payload and make a request

In [11]:
with open("single-instance.json", "rb") as f:
    json_string = json.load(f)

In [12]:
response = authed_session.post(url, data=json.dumps(single_instance_request_body))
print(response)
print(response.content)

<Response [200]>
b'{\n  "predictions": [\n    {\n      "confidence": 0.896659553,\n      "label": "Egyptian cat"\n    }\n  ],\n  "deployedModelId": "7779233882515177472",\n  "model": "projects/29880397572/locations/us-central1/models/4024718333008936960",\n  "modelDisplayName": "ViT Base TF2.8 GPU model",\n  "modelVersionId": "1"\n}\n'
