In [None]:
# Copyright 2022 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vision - Training formalization

## Overview

[Vision Workshop](https://github.com/mblanc/vision-workshop) is a series of labs on how to build an image classification system on Google Cloud. Throughout the Vision Workshop labs, you will learn how to read image data stored in data lake, perform exploratory data analysis (EDA), train a model, register your model in a model registry, evaluate your model, deploy your model to an endpoint, do real-time inference on your model.

### Objective

This notebook shows how to pull features from Feature Store for training, run data exploratory analysis on features, build a machine learning model locally, experiment with various hyperparameters, evaluate the model and deloy it to a Vertex AI endpoint. 

This lab uses the following Google Cloud services and resources:

- [Vertex AI](https://cloud.google.com/vertex-ai/)

Steps performed in this notebook:

- Do some exploratory analysis on the extracted data
- Train the model and track the results using Vertex AI Training
- Deploy the Model using Vertex AI Prediction
- Launch a Batch predictions job

### Costs

This tutorial uses billable components of Google Cloud:

* Vertex AI
* Cloud Storage

Learn about [Vertex AI
pricing](https://cloud.google.com/vertex-ai/pricing) and use the [Pricing
Calculator](https://cloud.google.com/products/calculator/)
to generate a cost estimate based on your projected usage.

### Load configuration settings from the setup notebook

Set the constants used in this notebook and load the config settings from the `00_environment_setup.ipynb` notebook.### Load config settings

In [None]:
GCP_PROJECTS = !gcloud config get-value project
PROJECT_ID = GCP_PROJECTS[0]
BUCKET_NAME = f"{PROJECT_ID}-vision-workshop"
config = !gsutil cat gs://{BUCKET_NAME}/config/notebook_env.py
print(config.n)
exec(config.n)

In [None]:
from datetime import datetime, timedelta

END_DATE_TRAIN = datetime.today().strftime("%Y-%m-%d")

## Custom Training
DATASET_NAME=f"sample_train-{ID}-{END_DATE_TRAIN}"
TRAIN_JOB_NAME=f"vision_train_frmlz-{ID}"
MODEL_NAME=f"vision_model_frmlz-{ID}"
DEPLOYED_NAME = f"vision_prediction_frmlz-{ID}"
MODEL_SERVING_IMAGE_URI = "europe-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-8:latest"
IMAGE_REPOSITORY = f"vision-{ID}"
IMAGE_NAME="image-classifier-pytorch"
IMAGE_TAG="v1"
IMAGE_URI=f"europe-west4-docker.pkg.dev/{PROJECT_ID}/{IMAGE_REPOSITORY}/{IMAGE_NAME}:{IMAGE_TAG}"
TRAIN_COMPUTE="e2-standard-4"
DEPLOY_COMPUTE="n1-standard-4"

## Builing a custom Image Classification Model

### Train a custom model
In this section, we will use the tensorflow algorithm. Specifically, we will perform custom training with a custom tensorflow container.

#### Create the training application
Typically, to perform custom training you can use either a pre-built container or buid your own container. In this section we will build a container for tensorflow, and use it to train a model with the Vertex AI Managed Training service.

The first step is to write your training code. Then, write a Dockerfile and build a container image based on it. The following cell, writes our code into `train_pytorch.py` which is the module for training a Tensorflow model. We will copy this code later into our container to be run through Vertex Training service.

In [None]:
!mkdir -p -m 777 build_training

Some noteable steps include:

- Command-line arguments:
    - `train-data-dir`, `val-data-dir`, `test-data-dir`: The Cloud Storage locations of the train, validation and test data. When using Vertex AI custom training, these locations will be specified in the corresponding environment variables: `AIP_TRAINING_DATA_URI`, `AIP_VALIDATION_DATA_URI`, and `AIP_TEST_DATA_URI`. The data is exported from an `ImageDataSet` and will be in a JSONL format.
    - `model-dir`: The location to save the trained model. When using Vertex AI custom training, the location will be specified in the environment variable: `AIP_MODEL_DIR`,
- Data preprocessing (`get_data()`):
    - Compiles the one or more JSONL data files for a dataset, and constructs a `tf.data.Dataset()` generator for data preprocessing and model feeding.
- Model architecture (`get_model()`):
    - Builds the corresponding model architecture.
- Training (`train_model()`):
    - Trains the model
- Model artifact saving
    - Saves the model artifacts where the Cloud Storage location is determined.

In [None]:
%%writefile build_training/train_pytorch.py

"""
train_pytorch.py is the module for training a Tensorflow Image classifier pipeline
"""

# Libraries --------------------------------------------------------------------------------------------------------------------------
import argparse
import json
import logging
import numpy as np
import os
import PIL
import PIL.Image
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import copy

import timm
import time
from datetime import datetime, timedelta
from pathlib import Path
import shutil

# Variables --------------------------------------------------------------------------------------------------------------------------
## Read environmental variables
TRAINING_DATA_PATH = os.environ["AIP_TRAINING_DATA_URI"].replace("gs://", "/gcs/")
TEST_DATA_PATH = os.environ["AIP_TEST_DATA_URI"].replace("gs://", "/gcs/")
MODEL_DIR = os.environ["AIP_MODEL_DIR"].replace("gs://", "/gcs/")

# Helpers -----------------------------------------------------------------------------------------------------------------------------
def get_args():
    parser = argparse.ArgumentParser()

    # Data files arguments
    parser.add_argument("--train-data-dir", default=os.getenv('AIP_TRAINING_DATA_URI'), dest="train_data_dir", type=str, help="train data directory")
    parser.add_argument("--val-data-dir", default=os.getenv('AIP_VALIDATION_DATA_URI'), dest="val_data_dir", type=str, help="validation data directory")
    
    # data preprocessing
    parser.add_argument("--image-width", dest="image_width", default=384, type=int, help="image width")
    parser.add_argument("--image-height", dest="image_height", default=384, type=int, help="image height")
    
    parser.add_argument("--lr", dest="lr",
                        default=0.01, type=float,
                        help="max_depth value.")
    parser.add_argument(
        "--batch-size", dest="batch_size", default=32, type=int, help="mini-batch size"
    )
    parser.add_argument(
        "--epochs", default=10, type=int, help="number of training epochs"
    )
    parser.add_argument(
        "--steps", dest="steps", default=92, type=int, help="Number of steps per epoch.",
    )
    
    return parser.parse_args()


args = get_args()


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# def get_data():
#     class_names = ["daisy", "dandelion", "roses", "sunflowers", "tulips"]
#     class_indices = dict(zip(class_names, range(len(class_names))))
#     num_classes = len(class_names)

#     def parse_image(filename):
#         image = tf.io.read_file(filename)
#         image = tf.image.decode_jpeg(image, channels=3)
#         image = tf.image.resize(image, [args.image_width, args.image_height])
#         return image

#     def extract(data_dir, batch_size=args.batch_size, repeat=True):
#         data = []
#         labels = []
#         for data_uri in tf.io.gfile.glob(pattern=data_dir):
#             with tf.io.gfile.GFile(name=data_uri, mode="r") as gfile:
#                 for line in gfile.readlines():
#                     instance = json.loads(line)
#                     data.append(instance["imageGcsUri"])
#                     classification_annotation = instance["classificationAnnotations"][0]
#                     label = classification_annotation["displayName"]
#                     labels.append(class_indices[label])

#         data_dataset = tf.data.Dataset.from_tensor_slices(data)
#         data_dataset = data_dataset.map(
#             parse_image, num_parallel_calls=tf.data.experimental.AUTOTUNE
#         )

#         label_dataset = tf.data.Dataset.from_tensor_slices(labels)

#         dataset = tf.data.Dataset.zip((data_dataset, label_dataset)).cache().shuffle(batch_size * 32)
#         if repeat:
#             dataset = dataset.repeat()
#         dataset = dataset.batch(batch_size)

#         # Add property to retain the class names
#         dataset.class_names = class_names

#         return dataset
    
#     logging.info('Prepare training data')
#     train_dataset = extract(args.train_data_dir)

#     logging.info('Prepare validation data')
#     val_dataset = extract(args.val_data_dir, batch_size=1, repeat=False)

#     return num_classes, train_dataset, val_dataset


def get_model(num_classes):
    logging.info("Get model architecture")
    
    # model_ft = models.efficientnet_v2_s(weights=torchvision.models.EfficientNet_V2_S_Weights.DEFAULT)
    
    model_ft = timm.create_model('regnetx_040', num_classes=num_classes, pretrained=True) 
    
    for param in model_ft.parameters():
        param.requires_grad = False
        
    if hasattr(model_ft, 'head'):
        for param in model_ft.head.parameters():
            param.requires_grad = True

    if hasattr(model_ft, 'fc'):
        for param in model_ft.fc.parameters():
            param.requires_grad = True

    if hasattr(model_ft, 'classifier'):
        for param in model_ft.classifier.parameters():
            param.requires_grad = True
    return model_ft


def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

        
# num_classes, train_dataset, val_dataset = get_data()


model_name = "efficientnet_v2_s"
num_classes = 5
batch_size = 32
num_epochs = 5
feature_extract = True
input_size=224

data_transforms = transforms.Compose([
    transforms.Resize((input_size, input_size)),
    transforms.RandomResizedCrop(input_size),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

ds = torchvision.datasets.ImageFolder(
    args.train_data_dir.replace("gs://", "/gcs/"), 
    data_transforms
)

train_ds, val_ds = torch.utils.data.random_split(ds, [round(len(ds)*0.8),round(len(ds)*0.2)])
image_datasets = {'train': train_ds, 'val': val_ds}
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}

model = get_model(num_classes=num_classes)

model = model.to(device)

# Observe that all parameters are being optimized
optimizer_ft = optim.Adam(model.parameters(), lr=0.003)

# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

# Train and evaluate
model, hist = train_model(model, dataloaders_dict, criterion, optimizer_ft, num_epochs=args.epochs, is_inception=(model_name=="inception"))


def makedirs(model_dir):
    if os.path.exists(model_dir) and os.path.isdir(model_dir):
        shutil.rmtree(model_dir)
    os.makedirs(model_dir)
    return

makedirs(MODEL_DIR)
model_path = os.path.join(MODEL_DIR, "model.pth")

torch.save(model.state_dict(), model_path)

#### Image models with serving functions

Previously, your model server took input as a 3-dimensional array. Image models typically take a compressed image and use a serving function fused to the model to decompress the compressed image into a 3-dimensional array, and other preprocesing -- such as normalizing the pixel values.

Next, you upload your custom image model as a `Vertex AI Model` resource with a serving function. During upload, you define a serving function to convert data to the format your model expects. If you send encoded data to Vertex AI, your serving function ensures that the data is decoded on the model server before it is passed as input to your model.

##### How does the serving function work

When you send a request to an online prediction server, the request is received by a HTTP server. The HTTP server extracts the prediction request from the HTTP request content body. The extracted prediction request is forwarded to the serving function. For Google pre-built prediction containers, the request content is passed to the serving function as a `tf.string`.

The serving function consists of two parts:

- `preprocessing function`:
  - Converts the input (`tf.string`) to the input shape and data type of the underlying model (dynamic graph).
  - Performs the same preprocessing of the data that was done during training the underlying model -- e.g., normalizing, scaling, etc.
- `post-processing function`:
  - Converts the model output to format expected by the receiving application -- e.q., compresses the output.
  - Packages the output for the the receiving application -- e.g., add headings, make JSON object, etc.

Both the preprocessing and post-processing functions are converted to static graphs which are fused to the model. The output from the underlying model is passed to the post-processing function. The post-processing function passes the converted/packaged output back to the HTTP server. The HTTP server returns the output as the HTTP response content.

One consideration you need to consider when building serving functions for TF.Keras models is that they run as static graphs. That means, you cannot use TF graph operations that require a dynamic graph. If you do, you will get an error during the compile of the serving function which will indicate that you are using an EagerTensor which is not supported.

#### Define a custom image for tensorflow model training

Here we will build a custom container. A custom container is a Docker image that you create to run your training application. By running your machine learning (ML) training job in a custom container, you can use ML frameworks, non-ML dependencies, libraries, and binaries that are not otherwise supported on Vertex AI. In othere word, we package training code on our local machine into a Docker container image, push the container image to Container Registry, and create a CustomJob.

For the ML framework we will use tensorflow.

In [None]:
# Create image repo
!gcloud artifacts repositories create $IMAGE_REPOSITORY \
    --repository-format=docker \
    --location=europe-west4 \
    --description="Vision Workshop Docker Image repository"

# List repositories under the project
!gcloud artifacts repositories list

In [None]:
!gcloud auth configure-docker europe-west4-docker.pkg.dev -q

In [None]:
%%writefile build_training/Dockerfile
# Specifies base image and tag
FROM pytorch/pytorch:1.11.0-cuda11.3-cudnn8-runtime
WORKDIR /root

# Installs additional packages
RUN pip install timm

# Copies the trainer code to the docker image.
COPY ./train_pytorch.py /root/train_pytorch.py

# Sets up the entry point to invoke the trainer.
ENTRYPOINT ["python3", "train_pytorch.py"]

In [None]:
# Build and push docker file
!docker build -t $IMAGE_URI ./build_training/
!docker push $IMAGE_URI

#### Submit the script to run on Vertex AI
In this section, we create a training pipeline. It will create custom training jobs, load our dataset and upload the model to Vertex AI after the training job is successfully completed. Learn more about creating of custom jobs [here](https://cloud.google.com/vertex-ai/docs/training/create-custom-job).

In [None]:
from google.cloud import aiplatform as vertex_ai


vertex_ai.init(project=PROJECT_ID, location=REGION, staging_bucket=BUCKET_NAME, experiment="train")


ds = vertex_ai.ImageDataset.list(filter="display_name=flowers")[0]
ds

In [None]:
job = vertex_ai.CustomContainerTrainingJob(
    display_name=TRAIN_JOB_NAME,
    container_uri=IMAGE_URI,
)

parameters = {"LR": 0.003}

CMDARGS = [ f"""--train-data-dir=gs://{BUCKET_NAME}/flowers""",
    "--lr=" + str(parameters["LR"]),
    "--epochs=10"
]


_ = job.run(
    dataset=ds,
    annotation_schema_uri=vertex_ai.schema.dataset.annotation.image.classification,
    # model_display_name=MODEL_NAME,
    args=CMDARGS,
    replica_count=1,
    machine_type="n1-standard-4",
    accelerator_type="NVIDIA_TESLA_T4",
    accelerator_count=1)

In [None]:
# parameters = {"LR": 0.003}

# CMDARGS = [ f"""--train-data-dir=gs://{BUCKET_NAME}/flowers""",
#     "--lr=" + str(parameters["LR"]),
#     "--epochs=10"
# ]

# job = vertex_ai.CustomTrainingJob(
#     display_name=TRAIN_JOB_NAME,
#     script_path="build_training/train_tf.py",
#     container_uri="europe-docker.pkg.dev/vertex-ai/training/pytorch-gpu.1-11:latest",
#     requirements=["gcsfs", "timm"],
#     # model_serving_container_image_uri=MODEL_SERVING_IMAGE_URI,
# )


# model = job.run(
#     dataset=ds,
#     annotation_schema_uri=vertex_ai.schema.dataset.annotation.image.classification,
#     # model_display_name=MODEL_NAME,
#     args=CMDARGS,
#     replica_count=1,
#     machine_type="n1-standard-4",
#     accelerator_type="NVIDIA_TESLA_T4",
#     accelerator_count=1)

#### Deploy the model
Before you use your model to make predictions, you need to deploy it to an Endpoint. You can do this by calling the deploy function on the Model resource. This will do two things:

- create an Endpoint resource
- deploy the Model resource to the Endpoint resource

In [None]:
job.gca_resource.training_task_inputs['baseOutputDirectory']['outputUriPrefix']

In [None]:
GCS_TRAINED_MODEL_URI = f"{job.gca_resource.training_task_inputs['baseOutputDirectory']['outputUriPrefix']}/model"

In [None]:
!gsutil ls $GCS_TRAINED_MODEL_URI

In [None]:
!mkdir trained_model
!gsutil -m cp -r $GCS_TRAINED_MODEL_URI/* ./trained_model

In [None]:
PREDICTOR_DIRECTORY = "./predictor"

!mkdir $PREDICTOR_DIRECTORY
!cp ./trained_model/* ./predictor/

In [None]:
%%writefile $PREDICTOR_DIRECTORY/custom_handler.py

import os
import json
import logging

import torch
from ts.torch_handler.base_handler import BaseHandler
from torchvision import models, transforms
import timm
from PIL import Image
import io

logger = logging.getLogger(__name__)


class TransformersClassifierHandler(BaseHandler):
    """
    The handler takes an input string and returns the classification text 
    based on the serialized transformers checkpoint.
    """
    def __init__(self):
        super(TransformersClassifierHandler, self).__init__()
        self.initialized = False

    def initialize(self, ctx):
        """ Loads the model.pt file and initialized the model object.
        Instantiates Tokenizer for preprocessor to use
        Loads labels to name mapping file for post-processing inference response
        """
        self.manifest = ctx.manifest

        properties = ctx.system_properties
        model_dir = properties.get("model_dir")
        self.device = torch.device("cuda:" + str(properties.get("gpu_id")) if torch.cuda.is_available() else "cpu")

        # Read model serialize/pt file
        serialized_file = self.manifest["model"]["serializedFile"]
        model_pt_path = os.path.join(model_dir, serialized_file)
        if not os.path.isfile(model_pt_path):
            raise RuntimeError("Missing the model.pt or pytorch_model.bin file")
        
        # Load model
        self.model = timm.create_model('regnetx_040', num_classes=5) 
        self.model.load_state_dict(torch.load(model_pt_path, map_location=self.device))
        
        self.model.to(self.device)
        self.model.eval()
        logger.debug('Transformer model from path {0} loaded successfully'.format(model_dir))

        # Read the mapping file, index to object name
        mapping_file_path = os.path.join(model_dir, "index_to_name.json")

        if os.path.isfile(mapping_file_path):
            with open(mapping_file_path) as f:
                self.mapping = json.load(f)
        else:
            logger.warning('Missing the index_to_name.json file. Inference output will default.')
            self.mapping = {"0": "Negative",  "1": "Positive"}

        self.initialized = True

    def preprocess(self, data):
        """ Preprocessing input request by tokenizing
            Extend with your own preprocessing steps as needed
        """
        bytes_input = data[0].get("data")
        if bytes_input is None:
            bytes_input = data[0].get("body")
        
        img = Image.open(io.BytesIO(bytes_input))

        data_transforms = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        img = data_transforms(img)
        
        
        
        return torch.unsqueeze(img, 0)

    def inference(self, inputs):
        """ Predict the class of a text using a trained transformer model.
        """
        print("### INFERENCE ###")
        print(inputs)
        predictions = self.model(inputs.to(self.device))
        print(predictions)
        index = predictions[0].argmax().item()
        
        if self.mapping:
            prediction = self.mapping[str(index)]
        
        percentage = (torch.nn.functional.softmax(predictions[0]) * 100)[index].item()

        logger.info("Model predicted: '%s', percentage %s", prediction, percentage)
        return [{"prediction" : prediction, "percentage" : percentage}]

    def postprocess(self, inference_output):
        return inference_output

In [None]:
%%writefile $PREDICTOR_DIRECTORY/index_to_name.json

{
    "0": "daisy", 
    "1": "dandelion", 
    "2": "roses", 
    "3": "sunflowers", 
    "4": "tulips"
}

In [None]:
%%writefile $PREDICTOR_DIRECTORY/config.properties
service_envelope=json
inference_address=http://0.0.0.0:7080
management_address=http://0.0.0.0:7081

In [None]:
%%writefile $PREDICTOR_DIRECTORY/Dockerfile

FROM pytorch/torchserve:latest-cpu

RUN pip3 install timm

# copy model artifacts, custom handler and other dependencies
COPY . /home/model-server/
WORKDIR /home/model-server/

USER model-server

EXPOSE 7080
EXPOSE 7081

# create model archive file packaging model artifacts and dependencies
RUN torch-model-archiver -f \
  --model-name=model \
  --version=1.0 \
  --serialized-file=/home/model-server/model.pth \
  --handler=/home/model-server/custom_handler.py \
  --extra-files "/home/model-server/index_to_name.json" \
  --export-path=/home/model-server/model-store

# run Torchserve HTTP serve to respond to prediction requests
CMD ["torchserve", \
     "--start", \
     "--ts-config=/home/model-server/config.properties", \
     "--models", \
     "model=model.mar", \
     "--model-store", \
     "/home/model-server/model-store"]

In [None]:
CUSTOM_PREDICTOR_IMAGE_URI = f"europe-west4-docker.pkg.dev/{PROJECT_ID}/{IMAGE_REPOSITORY}/{IMAGE_NAME}-predictor:{IMAGE_TAG}"
!docker build --tag={CUSTOM_PREDICTOR_IMAGE_URI} ./predictor

In [None]:
# run docker container to start local TorchServe deployment
!docker run -t -d --rm -p 7080:7080 --name=local_bert_classifier $CUSTOM_PREDICTOR_IMAGE_URI
# delay to allow the model to be loaded in torchserve (takes a few seconds)
!sleep 20

In [None]:
CUSTOM_PREDICTOR_IMAGE_URI

In [None]:
import urllib.request
import PIL
from torchvision import transforms
import base64


sunflower_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/592px-Red_sunflower.jpg"
sunflower_path = "592px-Red_sunflower.jpg"
urllib.request.urlretrieve(sunflower_url, sunflower_path)

img = PIL.Image.open(sunflower_path).resize((224, 224))
from io import BytesIO

buffered = BytesIO()
img.save(buffered, format="JPEG")

bytes_input = base64.b64encode(buffered.getvalue()).decode("utf-8")

In [None]:
%%bash  -s "$bytes_input"

cat > ./predictor/instances.json <<END
{
   "instances": [
     {
       "data": {
         "b64": "$1"
       }
     }
   ]
}
END

In [None]:
%%bash

curl -s -X POST \
  -H "Content-Type: application/json; charset=utf-8" \
  -d @./predictor/instances.json \
  http://localhost:7080/predictions/model/

In [None]:
!docker stop local_bert_classifier

In [None]:
# ARCHIVED_MODEL_PATH = "./archived_model"

# !mkdir $ARCHIVED_MODEL_PATH

In [None]:
# import os
# IS_GOOGLE_CLOUD_NOTEBOOK = os.path.exists("/opt/deeplearning/metadata/env_version")

# # Google Cloud Notebook requires to add a path to find the installed torch-model-archiver
# if IS_GOOGLE_CLOUD_NOTEBOOK:
#     os.environ["PATH"] = f'{os.environ.get("PATH")}:~/.local/bin'
     

In [None]:
# !pip install torch-model-archiver

In [None]:
# !torch-model-archiver -f \
#   --model-name=model \
#   --version=1.0 \
#   --serialized-file=trained_model/model.pth \
#   --handler=$PREDICTOR_DIRECTORY/custom_handler.py \
#   --extra-files "$PREDICTOR_DIRECTORY/index_to_name.json" \
#   --export-path=$ARCHIVED_MODEL_PATH

In [None]:
# ARCHIVED_MODEL_GCS_URI = f"gs://{BUCKET_NAME}/archived-pytorch-model"

In [None]:
# !gsutil cp -r $ARCHIVED_MODEL_PATH $ARCHIVED_MODEL_GCS_URI

In [None]:
# !gsutil ls -al $ARCHIVED_MODEL_GCS_URI

In [None]:
VERSION = 1
model_display_name = f"pytorch-v{VERSION}"
model_description = "PyTorch based image classifier with the pre-built PyTorch image"

In [None]:
# serving_container_image_uri = "europe-docker.pkg.dev/vertex-ai/prediction/pytorch-cpu.1-11:latest"

In [None]:
!docker push $CUSTOM_PREDICTOR_IMAGE_URI

In [None]:
model = vertex_ai.Model.upload(
    display_name=model_display_name,
    serving_container_image_uri=CUSTOM_PREDICTOR_IMAGE_URI,
    serving_container_ports=[7080],
    serving_container_predict_route=f"/predictions/model",
    serving_container_health_route="/ping",
)

In [None]:
model = vertex_ai.Model('projects/446303513828/locations/europe-west4/models/937074177934884864@1')

In [None]:
model.display_name

In [None]:
# XAI = "ig"  # [ shapley, ig, xrai ]

# if XAI == "shapley":
#     PARAMETERS = {"sampled_shapley_attribution": {"path_count": 10}}
# elif XAI == "ig":
#     PARAMETERS = {"integrated_gradients_attribution": {"step_count": 50}}
# elif XAI == "xrai":
#     PARAMETERS = {"xrai_attribution": {"step_count": 50}}

# parameters = vertex_ai.explain.ExplanationParameters(PARAMETERS)

In [None]:
# input_metadata = vertex_ai.explain.ExplanationMetadata.InputMetadata({"input_tensor_name": "numpy_inputs", "modality": "image"})
# output_metadata = vertex_ai.explain.ExplanationMetadata.OutputMetadata({"output_tensor_name": "output_0"})

# metadata = vertex_ai.explain.ExplanationMetadata(
#     inputs={"image": input_metadata}, outputs={"class": output_metadata}
# )

In [None]:
import torch

tensor = torch.tensor([3,4,5,6])

In [None]:
tensor.tolist()

In [None]:
DEPLOYED_NAME

In [None]:
DEPLOY_COMPUTE="n1-standard-4"
TRAFFIC_SPLIT = {"0": 100}

MIN_NODES = 1
MAX_NODES = 1


endpoint = model.deploy(
    deployed_model_display_name=DEPLOYED_NAME,
    traffic_split=TRAFFIC_SPLIT,
    machine_type=DEPLOY_COMPUTE,
    accelerator_count=0,
    min_replica_count=MIN_NODES,
    max_replica_count=MAX_NODES,
    # explanation_parameters=parameters,
    # explanation_metadata=metadata,
    sync=True
)

In [None]:
import urllib.request
import PIL
from torchvision import transforms
import base64


sunflower_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/592px-Red_sunflower.jpg"
sunflower_path = "592px-Red_sunflower.jpg"
urllib.request.urlretrieve(sunflower_url, sunflower_path)

img = PIL.Image.open(sunflower_path).resize((224, 224))
from io import BytesIO

buffered = BytesIO()
img.save(buffered, format="JPEG")

bytes_input = base64.b64encode(buffered.getvalue()).decode("utf-8")

In [None]:
instances = [{'data': {'b64': bytes_input}}]

In [None]:
endpoint.predict(instances=instances)

In [None]:
# import io

# for explanation in endpoint.explain(instances=instances).explanations:
#     attributions = dict(explanation.attributions[0].feature_attributions)
#     label_index = explanation.attributions[0].output_index[0]
#     class_name = class_names[label_index]
#     b64str = attributions["image"]["b64_jpeg"]
#     image = base64.b64decode(b64str)
#     image = io.BytesIO(image)
#     img = mpimg.imread(image, format="JPG")

#     plt.imshow(img, interpolation="nearest")
#     plt.show()

#### Test the deployed model (Make an online prediction request)
Send an online prediction request to your deployed model. To make sure your deployed model is working, test it out by sending a request to the endpoint.

Let's first get a test data.

In [None]:
import numpy as np
from PIL import Image
import os
import base64
import tensorflow as tf

In [None]:
endpoint = vertex_ai.Endpoint.list(filter=f"display_name={MODEL_NAME}_endpoint")[0]

In [None]:
model = vertex_ai.Model.list(filter=f"display_name={MODEL_NAME}")[0]

In [None]:
endpoint

In [None]:
sunflower_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/592px-Red_sunflower.jpg"
sunflower_path = tf.keras.utils.get_file('Red_sunflower', origin=sunflower_url)


img = Image.open(sunflower_path).resize((384, 384))
from io import BytesIO

buffered = BytesIO()
img.save(buffered, format="JPEG")

bytes_input = base64.b64encode(buffered.getvalue()).decode("utf-8")
instances = [{'data': {'b64': bytes_input}}]
img

In [None]:
class_names = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']
predictions = endpoint.predict(instances=instances)
score = tf.nn.softmax(predictions[0])

print(
    "This image most likely belongs to {} with a {:.2f} percent confidence."
    .format(class_names[np.argmax(score)], 100 * np.max(score))
)

### Batch predictions

Send a batch prediction request to your deployed model.

Batch prediction provides the ability to do offline batch processing of large amounts of prediction requests. Resources are only provisioned during the batch process and then deprovisioned when the batch request is completed. The results are stored in Cloud Storage, in contrast to online prediction where the results are returned as a HTTP response packet.

The input format for your batch job is dependent on the format supported by your model server. Foremost, the web server in your model server must support a JSONL format, which the web server will convert to a format support either directly by the model input intertace or a serving function interface. For batch prediction, this JSONL format is referred to as the `pivot` format.

### Input format for batch prediction jobs

The batch server accepts the following input formats for custom image models:

- JSONL
- File-List

### Output format for batch prediction jobs

The batch server accepts the following output formats for custom image models:

- JSONL

### Pivot format

The batch server converts the input format to the `pivot` (JSONL) format as follows:

**JSONL**

Each input line (request) should contain one and only one valid json value.

    {"values": [1, 2, 3, 4], "key": 1}
    {"values": [5, 6, 7, 8], "key": 2}

The batch server generates the pivot data with the same format. The generated pivot data is then wrapped into a payload request:

    {"instances": [
      {"values": [1, 2, 3, 4], "key": 1},
      {"values": [5, 6, 7, 8], "key": 2}
    ]}

**FileList**

The FileList format contains a list of files. Each line in a “FileList” file specifies a single file path, specified as a Cloud Storage location.

    gs://my-bucket/file1.txt
    gs://my-bucket/file2.txt

The batch server reads the files as binaries. The binary objects are serialized as ASCII strings.

    {"instances": [
     {"b64","b64EncodedASCIIString"},
     {"b64","b64EncodedASCIIString"}
    ]}

Let's create such a `FileList` input file by sampling ten images per label from our training dataset

In [None]:
from google.cloud import storage

client = storage.Client() 

daisies = list(client.list_blobs(BUCKET_NAME, prefix='flowers/daisy'))[:10]
tullips = list(client.list_blobs(BUCKET_NAME, prefix='flowers/tulips'))[:10]
roses = list(client.list_blobs(BUCKET_NAME, prefix='flowers/roses'))[:10]
sunflowers = list(client.list_blobs(BUCKET_NAME, prefix='flowers/sunflowers'))[:10]
dandelions = list(client.list_blobs(BUCKET_NAME, prefix='flowers/dandelion'))[:10]

images = daisies + tullips + roses + sunflowers + dandelions

In [None]:
list(client.list_blobs(BUCKET_NAME, prefix='flowers/dandelion'))[:10]

In [None]:
images = [f"gs://{image.bucket.name}/{image.name}" for image in images]
images

In [None]:
import base64
import json
from io import BytesIO

gcs_input_uri = f"gs://{BUCKET_NAME}/flowers_batch.txt"

with tf.io.gfile.GFile(gcs_input_uri, "w") as f:
    for image in images:
        f.write(image + "\n")

#### Send the prediction request

To make a batch prediction request, call the model object's `batch_predict` method with the following parameters: 
- `instances_format`: The format of the batch prediction request file: "jsonl", "csv", "bigquery", "tf-record", "tf-record-gzip" or "file-list"
- `prediction_format`: The format of the batch prediction response file: "jsonl", "csv", "bigquery", "tf-record", "tf-record-gzip" or "file-list"
- `job_display_name`: The human readable name for the prediction job.
 - `gcs_source`: A list of one or more Cloud Storage paths to your batch prediction requests.
- `gcs_destination_prefix`: The Cloud Storage path that the service will write the predictions to.
- `model_parameters`: Additional filtering parameters for serving prediction results.
- `machine_type`: The type of machine to use for training.
- `accelerator_type`: The hardware accelerator type.
- `accelerator_count`: The number of accelerators to attach to a worker replica.
- `starting_replica_count`: The number of compute instances to initially provision.
- `max_replica_count`: The maximum number of compute instances to scale to. In this tutorial, only one instance is provisioned.

### Compute instance scaling

You can specify a single instance (or node) to process your batch prediction request. This tutorial uses a single node, so the variables `MIN_NODES` and `MAX_NODES` are both set to `1`.

If you want to use multiple nodes to process your batch prediction request, set `MAX_NODES` to the maximum number of nodes you want to use. Vertex AI autoscales the number of nodes used to serve your predictions, up to the maximum number you set. Refer to the [pricing page](https://cloud.google.com/vertex-ai/pricing#prediction-prices) to understand the costs of autoscaling with multiple nodes.


In [None]:
MIN_NODES = 1
MAX_NODES = 1

batch_predict_job = model.batch_predict(
    job_display_name=f"{DEPLOYED_NAME}_batch",
    gcs_source=gcs_input_uri,
    gcs_destination_prefix=f"gs://{BUCKET_NAME}",
    instances_format="file-list",
    model_parameters=None,
    machine_type=DEPLOY_COMPUTE,
    starting_replica_count=MIN_NODES,
    max_replica_count=MAX_NODES,
    generate_explanation=False,
    sync=True,
)

print(batch_predict_job)

#### Get the predictions

Next, get the results from the completed batch prediction job.

The results are written to the Cloud Storage output bucket you specified in the batch prediction request. You call the method iter_outputs() to get a list of each Cloud Storage file generated with the results. Each file contains one or more prediction requests in a JSON format:

- `instance`: The prediction request.
- `prediction`: The prediction response.

In [None]:
import tensorflow as tf
import pandas as pd
pd.set_option("display.max_colwidth", 0)

In [None]:
batch_predict_job.output_info.gcs_output_directory

In [None]:
bp_iter_outputs = batch_predict_job.iter_outputs()
filenames = [f"gs://{blob.bucket.name}/{blob.name}" for blob in bp_iter_outputs if blob.name.split("/")[-1].startswith("prediction.results")]

In [None]:
filenames

In [None]:
dataframes = [pd.read_json(filename, lines=True) for filename in filenames]
batch_predictions = pd.concat(dataframes)
batch_predictions

Let's interpret the results

In [None]:
class_names = ['daisy', 'dandelion', 'roses', 'sunflowers', 'tulips']
batch_predictions.prediction = batch_predictions.prediction.apply(lambda x: tf.nn.softmax(x))
batch_predictions.prediction = batch_predictions.prediction.apply(lambda score: "{} {:.2f}%".format(class_names[np.argmax(score)], 100 * np.max(score)))

In [None]:
batch_predictions