In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vision - Training formalization

## Overview

[Vision Workshop](https://github.com/mblanc/vision-workshop) is a series of labs on how to build an image classification system on Google Cloud. Throughout the Vision Workshop labs, you will learn how to read image data stored in data lake, perform exploratory data analysis (EDA), train a model, register your model in a model registry, evaluate your model, deploy your model to an endpoint, do real-time inference on your model.

### Objective

This notebook shows how to pull features from Feature Store for training, run data exploratory analysis on features, build a machine learning model locally, experiment with various hyperparameters, evaluate the model and deloy it to a Vertex AI endpoint. 

This lab uses the following Google Cloud services and resources:

- [Vertex AI](https://cloud.google.com/vertex-ai/)

Steps performed in this notebook:

- Do some exploratory analysis on the extracted data
- Train the model and track the results using Vertex AI Training
- Deploy the Model using Vertex AI Prediction
- Launch a Batch predictions job

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI
pricing](https://cloud.google.com/vertex-ai/pricing) and use the [Pricing
Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

### Load configuration settings from the setup notebook

Set the constants used in this notebook and load the config settings from the `00_environment_setup.ipynb` notebook.### Load config settings

In [None]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
BUCKET_NAME = f"{PROJECT_ID}-vision-workshop"
config = !gsutil cat gs://{BUCKET_NAME}/config/notebook_env.py
print(config.n)
exec(config.n)

In [None]:
from datetime import datetime, timedelta

END_DATE_TRAIN = datetime.today().strftime("%Y-%m-%d")

## Custom Training
DATASET_NAME=f"sample_train-{ID}-{END_DATE_TRAIN}"
TRAIN_JOB_NAME=f"vision_train_frmlz-{ID}"
MODEL_NAME=f"vision_model_frmlz-{ID}"
DEPLOYED_NAME = f"vision_prediction_frmlz-{ID}"
MODEL_SERVING_IMAGE_URI = "europe-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-8:latest"
IMAGE_REPOSITORY = f"vision-{ID}"
IMAGE_NAME="image-classifier"
IMAGE_TAG="v1"
IMAGE_URI=f"europe-west4-docker.pkg.dev/{PROJECT_ID}/{IMAGE_REPOSITORY}/{IMAGE_NAME}:{IMAGE_TAG}"
TRAIN_COMPUTE="n1-standard-4"
DEPLOY_COMPUTE="n1-standard-4"

## Builing a custom Image Classification Model

### Train a custom model
In this section, we will use the tensorflow algorithm. Specifically, we will perform custom training with a custom tensorflow container.

#### Create the training application
Typically, to perform custom training you can use either a pre-built container or buid your own container. In this section we will build a container for tensorflow, and use it to train a model with the Vertex AI Managed Training service.

The first step is to write your training code. Then, write a Dockerfile and build a container image based on it. The following cell, writes our code into `train_tf.py` which is the module for training a Tensorflow model. We will copy this code later into our container to be run through Vertex Training service.

In [None]:
!mkdir -p -m 777 build_training

Some noteable steps include:

- Command-line arguments:
    - `train-data-dir`, `val-data-dir`, `test-data-dir`: The Cloud Storage locations of the train, validation and test data. When using Vertex AI custom training, these locations will be specified in the corresponding environment variables: `AIP_TRAINING_DATA_URI`, `AIP_VALIDATION_DATA_URI`, and `AIP_TEST_DATA_URI`. The data is exported from an `ImageDataSet` and will be in a JSONL format.
    - `model-dir`: The location to save the trained model. When using Vertex AI custom training, the location will be specified in the environment variable: `AIP_MODEL_DIR`,
- Data preprocessing (`get_data()`):
    - Compiles the one or more JSONL data files for a dataset, and constructs a `tf.data.Dataset()` generator for data preprocessing and model feeding.
- Model architecture (`get_model()`):
    - Builds the corresponding model architecture.
- Training (`train_model()`):
    - Trains the model
- Model artifact saving
    - Saves the model artifacts where the Cloud Storage location is determined.

In [None]:
%%writefile build_training/train_tf.py

"""
train_tf.py is the module for training a Tensorflow Image classifier pipeline
"""

# Libraries --------------------------------------------------------------------------------------------------------------------------
import argparse
import json
import logging
from pathlib import Path
import numpy as np
import os
import tensorflow as tf
import tensorflow_datasets as tfds
import tensorflow_hub as hub
import time
from datetime import datetime, timedelta
from google.cloud import aiplatform as vertex_ai
from pathlib import Path

# Variables --------------------------------------------------------------------------------------------------------------------------
## Read environmental variables
TRAINING_DATA_PATH = os.environ["AIP_TRAINING_DATA_URI"].replace("gs://", "/gcs/")
TEST_DATA_PATH = os.environ["AIP_TEST_DATA_URI"].replace("gs://", "/gcs/")
MODEL_DIR = os.environ["AIP_MODEL_DIR"].replace("gs://", "/gcs/")

# Helpers -----------------------------------------------------------------------------------------------------------------------------
def get_args():
    parser = argparse.ArgumentParser()

    # Data files arguments
    parser.add_argument("--train-data-dir", default=os.getenv('AIP_TRAINING_DATA_URI'), dest="train_data_dir", type=str, help="train data directory")
    parser.add_argument("--val-data-dir", default=os.getenv('AIP_VALIDATION_DATA_URI'), dest="val_data_dir", type=str, help="validation data directory")
    
    # data preprocessing
    parser.add_argument("--image-width", dest="image_width", default=384, type=int, help="image width")
    parser.add_argument("--image-height", dest="image_height", default=384, type=int, help="image height")
    
    # Hyperparameters
    parser.add_argument("--lr", dest="lr", default=0.01, type=float, help="learning rate.")
    parser.add_argument("--batch-size", dest="batch_size", default=32, type=int, help="mini-batch size")
    parser.add_argument("--epochs", default=10, type=int, help="number of training epochs")
    parser.add_argument("--steps", dest="steps", default=92, type=int, help="Number of steps per epoch.")
    
    return parser.parse_args()


args = get_args()

def get_data():
    class_names = ["daisy", "dandelion", "roses", "sunflowers", "tulips"]
    class_indices = dict(zip(class_names, range(len(class_names))))
    num_classes = len(class_names)

    def parse_image(filename):
        image = tf.io.read_file(filename)
        image = tf.image.decode_jpeg(image, channels=3)
        image = tf.image.resize(image, [args.image_width, args.image_height])
        return image

    def extract(data_dir, batch_size=args.batch_size, repeat=True):
        data = []
        labels = []
        for data_uri in tf.io.gfile.glob(pattern=data_dir):
            with tf.io.gfile.GFile(name=data_uri, mode="r") as gfile:
                for line in gfile.readlines():
                    instance = json.loads(line)
                    data.append(instance["imageGcsUri"])
                    classification_annotation = instance["classificationAnnotations"][0]
                    label = classification_annotation["displayName"]
                    labels.append(class_indices[label])

        data_dataset = tf.data.Dataset.from_tensor_slices(data)
        data_dataset = data_dataset.map(
            parse_image, num_parallel_calls=tf.data.experimental.AUTOTUNE
        )

        label_dataset = tf.data.Dataset.from_tensor_slices(labels)

        dataset = tf.data.Dataset.zip((data_dataset, label_dataset)).cache().shuffle(batch_size * 32)
        if repeat:
            dataset = dataset.repeat()
        dataset = dataset.batch(batch_size)

        # Add property to retain the class names
        dataset.class_names = class_names

        return dataset
    
    logging.info('Prepare training data')
    train_dataset = extract(args.train_data_dir)

    logging.info('Prepare validation data')
    val_dataset = extract(args.val_data_dir, batch_size=1, repeat=False)

    return num_classes, train_dataset, val_dataset


def get_cnn_model(num_classes):
    logging.info("Get model architecture")
    
    model = tf.keras.Sequential([
        tf.keras.Input(shape=(args.image_height, args.image_width, 3)),
        tf.keras.layers.Rescaling(1./255),
        tf.keras.layers.Conv2D(32, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(32, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(num_classes)
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.003),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy']
    )
    return model


def get_model(num_classes):
    logging.info("Get model architecture")
    data_augmentation = tf.keras.Sequential(
        [
            tf.keras.layers.RandomFlip("horizontal"), 
            tf.keras.layers.RandomRotation(0.1),
            tf.keras.layers.RandomTranslation(0, 0.2),
            tf.keras.layers.RandomTranslation(0.2, 0),
            tf.keras.layers.RandomZoom(0.2, 0.2),
        ]
    )
    
    # base model
    base_model = hub.KerasLayer("https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet1k_s/feature_vector/2", trainable=False)
    # Create new model on top
    inputs = tf.keras.Input(shape=(args.image_height, args.image_width, 3))
    x = data_augmentation(inputs)  # Apply random data augmentation
    x = tf.keras.layers.Rescaling(1./255)(x)
    # The base model contains batchnorm layers. We want to keep them in inference mode
    # when we unfreeze the base model for fine-tuning, so we make sure that the
    # base_model is running in inference mode here.
    x = base_model(x)
    # x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dropout(0.2)(x)  # Regularize with dropout
    outputs = tf.keras.layers.Dense(num_classes)(x)
    model = tf.keras.Model(inputs, outputs)
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.003),
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy']
    )
    return model


def train_model(model, train_dataset, val_dataset):
    logging.info("Start model training")
    history = model.fit(
        x=train_dataset, epochs=args.epochs, steps_per_epoch=args.steps, batch_size=args.batch_size, validation_data=val_dataset
    )
    return history

        
num_classes, train_dataset, val_dataset = get_data()

model = get_model(num_classes=num_classes)

history = train_model(model, train_dataset, val_dataset)

if not Path(MODEL_DIR).exists():
    Path(MODEL_DIR).mkdir(parents=True, exist_ok=True)

CONCRETE_INPUT = "numpy_inputs"


def _preprocess(bytes_input):
    decoded = tf.io.decode_jpeg(bytes_input, channels=3)
    resized = tf.image.resize(decoded, size=(384, 384))
    return resized


@tf.function(input_signature=[tf.TensorSpec([None], tf.string)])
def preprocess_fn(bytes_inputs):
    decoded_images = tf.map_fn(
        _preprocess, bytes_inputs, dtype=tf.float32, back_prop=False
    )
    return {
        CONCRETE_INPUT: decoded_images
    }  # User needs to make sure the key matches model's input


@tf.function(input_signature=[tf.TensorSpec([None], tf.string)])
def serving_fn(bytes_inputs):
    images = preprocess_fn(bytes_inputs)
    prob = m_call(**images)
    return prob


m_call = tf.function(model.call).get_concrete_function(
    [tf.TensorSpec(shape=[None, 384, 384, 3], dtype=tf.float32, name=CONCRETE_INPUT)]
)

tf.saved_model.save(
    model, MODEL_DIR, signatures={
        "serving_default": serving_fn,
        # Required for XAI
        "xai_preprocess": preprocess_fn,
        "xai_model": m_call
    }
)

#### Image models with serving functions

Previously, your model server took input as a 3-dimensional array. Image models typically take a compressed image and use a serving function fused to the model to decompress the compressed image into a 3-dimensional array, and other preprocesing -- such as normalizing the pixel values.

Next, you upload your custom image model as a `Vertex AI Model` resource with a serving function. During upload, you define a serving function to convert data to the format your model expects. If you send encoded data to Vertex AI, your serving function ensures that the data is decoded on the model server before it is passed as input to your model.

##### How does the serving function work

When you send a request to an online prediction server, the request is received by a HTTP server. The HTTP server extracts the prediction request from the HTTP request content body. The extracted prediction request is forwarded to the serving function. For Google pre-built prediction containers, the request content is passed to the serving function as a `tf.string`.

The serving function consists of two parts:

- `preprocessing function`:
  - Converts the input (`tf.string`) to the input shape and data type of the underlying model (dynamic graph).
  - Performs the same preprocessing of the data that was done during training the underlying model -- e.g., normalizing, scaling, etc.
- `post-processing function`:
  - Converts the model output to format expected by the receiving application -- e.q., compresses the output.
  - Packages the output for the the receiving application -- e.g., add headings, make JSON object, etc.

Both the preprocessing and post-processing functions are converted to static graphs which are fused to the model. The output from the underlying model is passed to the post-processing function. The post-processing function passes the converted/packaged output back to the HTTP server. The HTTP server returns the output as the HTTP response content.

One consideration you need to consider when building serving functions for TF.Keras models is that they run as static graphs. That means, you cannot use TF graph operations that require a dynamic graph. If you do, you will get an error during the compile of the serving function which will indicate that you are using an EagerTensor which is not supported.

#### Define a custom image for tensorflow model training

Here we will build a custom container. A custom container is a Docker image that you create to run your training application. By running your machine learning (ML) training job in a custom container, you can use ML frameworks, non-ML dependencies, libraries, and binaries that are not otherwise supported on Vertex AI. In othere word, we package training code on our local machine into a Docker container image, push the container image to Container Registry, and create a CustomJob.

For the ML framework we will use tensorflow.

In [None]:
# Create image repo
!gcloud artifacts repositories create $IMAGE_REPOSITORY \
    --repository-format=docker \
    --location=europe-west4 \
    --description="Vision Workshop Docker Image repository"

# List repositories under the project
!gcloud artifacts repositories list

In [None]:
!gcloud auth configure-docker europe-west4-docker.pkg.dev -q

In [None]:
%%writefile build_training/Dockerfile
# Specifies base image and tag
FROM tensorflow/tensorflow:2.8.3-gpu
WORKDIR /root

# Installs additional packages
RUN pip install --upgrade pip
RUN pip install gcsfs numpy pandas scikit-learn google-cloud-aiplatform tensorflow_datasets tensorflow_hub --upgrade

# Copies the trainer code to the docker image.
COPY ./train_tf.py /root/train_tf.py

# Sets up the entry point to invoke the trainer.
ENTRYPOINT ["python3", "train_tf.py"]

In [None]:
# Build and push docker file
!docker build -t $IMAGE_URI ./build_training/
!docker push $IMAGE_URI

#### Submit the script to run on Vertex AI
In this section, we create a training pipeline. It will create custom training jobs, load our dataset and upload the model to Vertex AI after the training job is successfully completed. Learn more about creating of custom jobs [here](https://cloud.google.com/vertex-ai/docs/training/create-custom-job).

In [None]:
from google.cloud import aiplatform as vertex_ai


vertex_ai.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_NAME, experiment="train")


ds = vertex_ai.ImageDataset.list(filter="display_name=flowers")[0]
ds

In [None]:
job = vertex_ai.CustomContainerTrainingJob(
    display_name=TRAIN_JOB_NAME,
    container_uri=IMAGE_URI,
    model_serving_container_image_uri=MODEL_SERVING_IMAGE_URI,
)

parameters = {"LR": 0.003}

CMDARGS = [ "--lr=" + str(parameters["LR"]), "--epochs=5"]


model = job.run(
    dataset=ds,
    annotation_schema_uri=vertex_ai.schema.dataset.annotation.image.classification,
    model_display_name=MODEL_NAME,
    args=CMDARGS,
    replica_count=1,
    machine_type="n1-standard-4",
    accelerator_type="NVIDIA_TESLA_T4",
    accelerator_count=1)

In [None]:
# parameters = {"LR": 0.003}

# CMDARGS = [ "--lr=" + str(parameters["LR"]), "--epochs=5"]

# job = vertex_ai.CustomTrainingJob(
#     display_name=TRAIN_JOB_NAME,
#     script_path="build_training/train_tf.py",
#     container_uri="europe-docker.pkg.dev/vertex-ai/training/tf-gpu.2-8:latest",
#     requirements=["gcsfs", "tensorflow_datasets", "tensorflow_hub"],
#     model_serving_container_image_uri=MODEL_SERVING_IMAGE_URI,
# )


# model = job.run(
#     dataset=ds,
#     annotation_schema_uri=vertex_ai.schema.dataset.annotation.image.classification,
#     model_display_name=MODEL_NAME,
#     args=CMDARGS,
#     replica_count=1,
#     machine_type=TRAIN_COMPUTE,
#     accelerator_type="NVIDIA_TESLA_T4",
#     accelerator_count=1)

#### Explanation Specification

To get explanations when doing a prediction, you must enable the explanation capability and set corresponding settings when you upload your custom model to an Vertex `Model` resource. These settings are referred to as the explanation metadata, which consists of:

- `parameters`: This is the specification for the explainability algorithm to use for explanations on your model. You can choose between:
  - Shapley - *Note*, not recommended for image data -- can be very long running
  - XRAI
  - Integrated Gradients
- `metadata`: This is the specification for how the algoithm is applied on your custom model.

#### Explanation Parameters

Let's first dive deeper into the settings for the explainability algorithm.

#### Shapley

Assigns credit for the outcome to each feature, and considers different permutations of the features. This method provides a sampling approximation of exact Shapley values.

Use Cases:
  - Classification and regression on tabular data.

Parameters:

- `path_count`: This is the number of paths over the features that will be processed by the algorithm. An exact approximation of the Shapley values requires M! paths, where M is the number of features. For the CIFAR10 dataset, this would be 784 (28*28).

For any non-trival number of features, this is too compute expensive. You can reduce the number of paths over the features to M * `path_count`.

#### Integrated Gradients

A gradients-based method to efficiently compute feature attributions with the same axiomatic properties as the Shapley value.

Use Cases:
  - Classification and regression on tabular data.
  - Classification on image data.

Parameters:

- `step_count`: This is the number of steps to approximate the remaining sum. The more steps, the more accurate the integral approximation. The general rule of thumb is 50 steps, but as you increase so does the compute time.

#### XRAI

Based on the integrated gradients method, XRAI assesses overlapping regions of the image to create a saliency map, which highlights relevant regions of the image rather than pixels.

Use Cases:

  - Classification on image data.

Parameters:

- `step_count`: This is the number of steps to approximate the remaining sum. The more steps, the more accurate the integral approximation. The general rule of thumb is 50 steps, but as you increase so does the compute time.

In the next code cell, set the variable `XAI` to which explainabilty algorithm you will use on your custom model.

In [None]:
XAI = "xrai"  # [ shapley, ig, xrai ]

if XAI == "shapley":
    PARAMETERS = {"sampled_shapley_attribution": {"path_count": 10}}
elif XAI == "ig":
    PARAMETERS = {"integrated_gradients_attribution": {"step_count": 10}}
elif XAI == "xrai":
    PARAMETERS = {"xrai_attribution": {"step_count": 10}}

parameters = vertex_ai.explain.ExplanationParameters(PARAMETERS)

##### Explanation Metadata

Let's first dive deeper into the explanation metadata, which consists of:

- `outputs`: A scalar value in the output to attribute -- what to explain. For example, in a probability output \[0.1, 0.2, 0.7\] for classification, one wants an explanation for 0.7. Consider the following formulae, where the output is `y` and that is what we want to explain.

    y = f(x)

Consider the following formulae, where the outputs are `y` and `z`. Since we can only do attribution for one scalar value, we have to pick whether we want to explain the output `y` or `z`. Assume in this example the model is object detection and y and z are the bounding box and the object classification. You would want to pick which of the two outputs to explain.

    y, z = f(x)

The dictionary format for `outputs` is:

    { "outputs": { "[your_display_name]":
                   "output_tensor_name": [layer]
                 }
    }

<blockquote>
 -  [your_display_name]: A human readable name you assign to the output to explain. A common example is "probability".<br/>
 -  "output_tensor_name": The key/value field to identify the output layer to explain. <br/>
 -  [layer]: The output layer to explain. In a single task model, like a tabular regressor, it is the last (topmost) layer in the model.
</blockquote>

- `inputs`: The features for attribution -- how they contributed to the output. Consider the following formulae, where `a` and `b` are the features. We have to pick which features to explain how the contributed. Assume that this model is deployed for A/B testing, where `a` are the data_items for the prediction and `b` identifies whether the model instance is A or B. You would want to pick `a` (or some subset of) for the features, and not `b` since it does not contribute to the prediction.

    y = f(a,b)

The minimum dictionary format for `inputs` is:

    { "inputs": { "[your_display_name]":
                  "input_tensor_name": [layer]
                 }
    }

<blockquote>
 -  [your_display_name]: A human readable name you assign to the input to explain. A common example is "features".<br/>
 -  "input_tensor_name": The key/value field to identify the input layer for the feature attribution. <br/>
 -  [layer]: The input layer for feature attribution. In a single input tensor model, it is the first (bottom-most) layer in the model.
</blockquote>

Since the inputs to the model are tabular, you can specify the following two additional fields as reporting/visualization aids:

<blockquote>
 - "modality": "image": Indicates the field values are image data.
</blockquote>

Since the inputs to the model are images, you can specify the following additional fields as reporting/visualization aids:

<blockquote>
 - "modality": "image": Indicates the field values are image data.
</blockquote>

In [None]:
input_metadata = vertex_ai.explain.ExplanationMetadata.InputMetadata({"input_tensor_name": "numpy_inputs", "modality": "image"})
output_metadata = vertex_ai.explain.ExplanationMetadata.OutputMetadata({"output_tensor_name": "output_0"})

metadata = vertex_ai.explain.ExplanationMetadata(
    inputs={"image": input_metadata}, outputs={"class": output_metadata}
)

### Upload the model

Next, upload your model to a `Model` resource using `Model.upload()` method, with the following parameters:

- `display_name`: The human readable name for the `Model` resource.
- `artifact`: The Cloud Storage location of the trained model artifacts.
- `serving_container_image_uri`: The serving container image.
- `sync`: Whether to execute the upload asynchronously or synchronously.
- `explanation_parameters`: Parameters to configure explaining for `Model`'s predictions.
- `explanation_metadata`: Metadata describing the `Model`'s input and output for explanation.

If the `upload()` method is run asynchronously, you can subsequently block until completion with the `wait()` method.

In [None]:
model = vertex_ai.Model.upload(
    display_name=MODEL_NAME + "_explanations",
    artifact_uri=model.uri,
    serving_container_image_uri=MODEL_SERVING_IMAGE_URI,
    explanation_parameters=parameters,
    explanation_metadata=metadata,
    sync=False,
)

model.wait()

In [None]:
# model = vertex_ai.Model.upload(
#     display_name=MODEL_NAME + "_gpu",
#     artifact_uri=model.uri,
#     serving_container_image_uri="europe-docker.pkg.dev/vertex-ai/prediction/tf2-gpu.2-8:latest",
#     explanation_parameters=parameters,
#     explanation_metadata=metadata,
#     sync=True,
# )

#### Deploy the model
Before you use your model to make predictions, you need to deploy it to an Endpoint. You can do this by calling the deploy function on the Model resource. This will do two things:

- create an Endpoint resource
- deploy the Model resource to the Endpoint resource

In [None]:
DEPLOY_COMPUTE="n1-standard-4"
TRAFFIC_SPLIT = {"0": 100}

MIN_NODES = 1
MAX_NODES = 1


endpoint = model.deploy(
    deployed_model_display_name=DEPLOYED_NAME,
    traffic_split=TRAFFIC_SPLIT,
    machine_type=DEPLOY_COMPUTE,
    # accelerator_type="NVIDIA_TESLA_T4",
    # accelerator_count=1,
    min_replica_count=MIN_NODES,
    max_replica_count=MAX_NODES,
    explanation_parameters=parameters,
    explanation_metadata=metadata,
    sync=True
)

#### Test the deployed model (Make an online prediction request)
Send an online prediction request to your deployed model. To make sure your deployed model is working, test it out by sending a request to the endpoint.

Let's first get a test data.

In [None]:
import numpy as np
from PIL import Image
import os
import base64
import tensorflow as tf

In [None]:
endpoint = vertex_ai.Endpoint.list(filter=f"display_name={MODEL_NAME}_explanations_endpoint")[0]

In [None]:
model = vertex_ai.Model.list(filter=f"display_name={MODEL_NAME}_explanations")[0]

In [None]:
endpoint

In [None]:
sunflower_url = "gs://temp-vision-workshop-vision-workshop/flowers/daisy/100080576_f52e8ee070_n.jpg"
sunflower_path = tf.keras.utils.get_file('Red_sunflower', origin=sunflower_url)


img = Image.open(sunflower_path).resize((384, 384))
from io import BytesIO

buffered = BytesIO()
img.save(buffered, format="JPEG")

bytes_input = base64.b64encode(buffered.getvalue()).decode("utf-8")
instances = [{'bytes_inputs': {'b64': bytes_input}}]
img

In [None]:
class_names = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']
predictions = endpoint.predict(instances=instances)
score = tf.nn.softmax(predictions[0])

print(
    "This image most likely belongs to {} with a {:.2f} percent confidence."
    .format(class_names[np.argmax(score)], 100 * np.max(score))
)

In [None]:
%%time
endpoint.predict(instances=instances)

In [None]:
import io

for explanation in endpoint.explain(instances=instances).explanations:
    attributions = dict(explanation.attributions[0].feature_attributions)
    label_index = explanation.attributions[0].output_index[0]
    class_name = class_names[label_index]
    b64str = attributions["image"]["b64_jpeg"]
    image = base64.b64decode(b64str)
    image = io.BytesIO(image)
    img = mpimg.imread(image, format="JPG")

    plt.imshow(img, interpolation="nearest")
    plt.show()

### Batch predictions

Send a batch prediction request to your deployed model.

Batch prediction provides the ability to do offline batch processing of large amounts of prediction requests. Resources are only provisioned during the batch process and then deprovisioned when the batch request is completed. The results are stored in Cloud Storage, in contrast to online prediction where the results are returned as a HTTP response packet.

The input format for your batch job is dependent on the format supported by your model server. Foremost, the web server in your model server must support a JSONL format, which the web server will convert to a format support either directly by the model input intertace or a serving function interface. For batch prediction, this JSONL format is referred to as the `pivot` format.

### Input format for batch prediction jobs

The batch server accepts the following input formats for custom image models:

- JSONL
- File-List

### Output format for batch prediction jobs

The batch server accepts the following output formats for custom image models:

- JSONL

### Pivot format

The batch server converts the input format to the `pivot` (JSONL) format as follows:

**JSONL**

Each input line (request) should contain one and only one valid json value.

    {"values": [1, 2, 3, 4], "key": 1}
    {"values": [5, 6, 7, 8], "key": 2}

The batch server generates the pivot data with the same format. The generated pivot data is then wrapped into a payload request:

    {"instances": [
      {"values": [1, 2, 3, 4], "key": 1},
      {"values": [5, 6, 7, 8], "key": 2}
    ]}

**FileList**

The FileList format contains a list of files. Each line in a “FileList” file specifies a single file path, specified as a Cloud Storage location.

    gs://my-bucket/file1.txt
    gs://my-bucket/file2.txt

The batch server reads the files as binaries. The binary objects are serialized as ASCII strings.

    {"instances": [
     {"b64","b64EncodedASCIIString"},
     {"b64","b64EncodedASCIIString"}
    ]}

Let's create such a `FileList` input file by sampling ten images per label from our training dataset

In [None]:
from google.cloud import storage

client = storage.Client() 

daisies = list(client.list_blobs(BUCKET_NAME, prefix='flowers/daisy'))[:10]
tullips = list(client.list_blobs(BUCKET_NAME, prefix='flowers/tullip'))[:10]
roses = list(client.list_blobs(BUCKET_NAME, prefix='flowers/rose'))[:10]
sunflowers = list(client.list_blobs(BUCKET_NAME, prefix='flowers/sunflower'))[:10]
dandelions = list(client.list_blobs(BUCKET_NAME, prefix='flowers/dandelion'))[:10]

images = daisies + tullips + roses + sunflowers + dandelions

In [None]:
images = [f"gs://{image.bucket.name}/{image.name}" for image in images]
images

In [None]:
import base64
import json
from io import BytesIO

gcs_input_uri = f"gs://{BUCKET_NAME}/flowers_batch.txt"

with tf.io.gfile.GFile(gcs_input_uri, "w") as f:
    for image in images:
        f.write(image + "\n")

In [None]:
import base64
import json

gcs_input_uri = f"gs://{BUCKET_NAME}/test.jsonl"
with tf.io.gfile.GFile(gcs_input_uri, "w") as f:
    for image in images:
        from io import BytesIO
        bytes = tf.io.read_file(image)
        img = Image.open(io.BytesIO(bytes.numpy())).resize((384, 384))
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        b64str = base64.b64encode(buffered.getvalue()).decode("utf-8")
        data = {"bytes_inputs": {"b64": b64str}}
        f.write(json.dumps(data) + "\n")

#### Send the prediction request

To make a batch prediction request, call the model object's `batch_predict` method with the following parameters: 
- `instances_format`: The format of the batch prediction request file: "jsonl", "csv", "bigquery", "tf-record", "tf-record-gzip" or "file-list"
- `prediction_format`: The format of the batch prediction response file: "jsonl", "csv", "bigquery", "tf-record", "tf-record-gzip" or "file-list"
- `job_display_name`: The human readable name for the prediction job.
 - `gcs_source`: A list of one or more Cloud Storage paths to your batch prediction requests.
- `gcs_destination_prefix`: The Cloud Storage path that the service will write the predictions to.
- `model_parameters`: Additional filtering parameters for serving prediction results.
- `machine_type`: The type of machine to use for training.
- `accelerator_type`: The hardware accelerator type.
- `accelerator_count`: The number of accelerators to attach to a worker replica.
- `starting_replica_count`: The number of compute instances to initially provision.
- `max_replica_count`: The maximum number of compute instances to scale to. In this tutorial, only one instance is provisioned.

### Compute instance scaling

You can specify a single instance (or node) to process your batch prediction request. This tutorial uses a single node, so the variables `MIN_NODES` and `MAX_NODES` are both set to `1`.

If you want to use multiple nodes to process your batch prediction request, set `MAX_NODES` to the maximum number of nodes you want to use. Vertex AI autoscales the number of nodes used to serve your predictions, up to the maximum number you set. Refer to the [pricing page](https://cloud.google.com/vertex-ai/pricing#prediction-prices) to understand the costs of autoscaling with multiple nodes.


In [None]:
MIN_NODES = 1
MAX_NODES = 1

batch_predict_job = model.batch_predict(
    job_display_name=f"{DEPLOYED_NAME}_batch",
    gcs_source=gcs_input_uri,
    gcs_destination_prefix=f"gs://{BUCKET_NAME}",
    instances_format="jsonl",
    model_parameters=None,
    machine_type=DEPLOY_COMPUTE,
    starting_replica_count=MIN_NODES,
    max_replica_count=MAX_NODES,
    generate_explanation=True,
    sync=True,
)

print(batch_predict_job)

#### Get the predictions

Next, get the results from the completed batch prediction job.

The results are written to the Cloud Storage output bucket you specified in the batch prediction request. You call the method iter_outputs() to get a list of each Cloud Storage file generated with the results. Each file contains one or more prediction requests in a JSON format:

- `instance`: The prediction request.
- `prediction`: The prediction response.

In [None]:
batch_predict_job = vertex_ai.BatchPredictionJob.list()[0]

In [None]:
import tensorflow as tf
import pandas as pd
pd.set_option("display.max_colwidth", None)

In [None]:
batch_predict_job.output_info.gcs_output_directory

In [None]:
bp_iter_outputs = batch_predict_job.iter_outputs()
filenames = [f"gs://{blob.bucket.name}/{blob.name}" for blob in bp_iter_outputs if blob.name.split("/")[-1].startswith("explanation.results")]

In [None]:
filenames

In [None]:
dataframes = [pd.read_json(filename, lines=True) for filename in filenames]
batch_predictions = pd.concat(dataframes)
batch_predictions.columns

In [None]:
batch_predictions.explanation = batch_predictions.explanation.apply(lambda x: x['attributions'][0]['featureAttributions']['image']['b64_jpeg'])
batch_predictions.explanation = batch_predictions.explanation.apply(lambda x: f'<img src="data:image/jpeg;base64,{x}">')

Let's interpret the results

In [None]:
pd.set_option('display.max_colwidth', None)

In [None]:
batch_predictions.instance = batch_predictions.instance.apply(lambda x: x['bytes_inputs']['b64'])
batch_predictions.instance = batch_predictions.instance.apply(lambda x: f'<img src="data:image/jpeg;base64,{x}">')

In [None]:
class_names = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']
batch_predictions.prediction = batch_predictions.prediction.apply(lambda x: tf.nn.softmax(x))
batch_predictions.prediction = batch_predictions.prediction.apply(lambda score: "{} {:.2f}%".format(class_names[np.argmax(score)], 100 * np.max(score)))

In [None]:
from IPython.core.display import HTML, display

display(HTML(batch_predictions[['instance', 'explanation', 'prediction']].to_html(escape=False)))