In [None]:
# Copyright 2021 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Vertex Pipelines: Vertex AI Hyperparameter Tuning Job

## Overview
This notebook shows how to use the `HyperparameterTuningJobRunOp` to run a hyperparameter tuning job in Vertex AI for a TensorFlow model. While this lab uses TensorFlow for the model code, you could easily replace it with another framework. This sample notebook is based on the [Vertex AI:Hyperparameter Tuning Codelab](https://codelabs.developers.google.com/vertex_hyperparameter_tuning).

To learn more about Vertex AI Hyperparameter Tuning Job see [Vertex AI Hyperparameter Tuning Job](https://cloud.google.com/vertex-ai/docs/training/using-hyperparameter-tuning). 

For `HyperparameterTuningJobRunOp` interface please see the [souce code here](https://github.com/kubeflow/pipelines/blob/master/components/google-cloud/google_cloud_pipeline_components/experimental/hyperparameter_tuning_job).

### Install additional packages

In [None]:
!pip3 install  -U google-cloud-pipeline-components -q
!pip3 install  -U google-cloud-aiplatform -q
!pip3 install  -U kfp -q

In [None]:
# Restart the kernel after pip installs
import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)

### Create directory structure

In [None]:
!mkdir horses_or_humans
!mkdir horses_or_humans/trainer

## Containerize training application code

The training application code (inner script) will be put in a Docker container and it will be pushed to the Google Container Registry. After that, the hyperparameter tuning job will be submitted to Vertex by using the `HyperparameterTuningJobRunOp` in a Kubeflow Pipeline. Using this approach, you can tune hyperparameters for a model built with any framework.

First, the files below will be created under the a `horses_or_humans` directory. There are several files under that folder:
+ Dockerfile
+ trainer/
    + task.py

### Set your Project ID and Pipeline Root

In [None]:
PROJECT_ID = "[your-project-id]" #@param {type:"string"}
REGION = "us-central1"

### Create a Dockerfile

In [None]:
%%file horses_or_humans/Dockerfile

FROM gcr.io/deeplearning-platform-release/tf2-gpu.2-5

WORKDIR /

# Installs hypertune library
RUN pip install cloudml-hypertune

# Copies the trainer code to the docker image.
COPY trainer /trainer

# Sets up the entry point to invoke the trainer.
ENTRYPOINT ["python", "-m", "trainer.task"]

The Dockerfile uses the [Deep Learning Container TensorFlow Enterprise 2.5 GPU Docker image](https://cloud.google.com/ai-platform/deep-learning-containers/docs/choosing-container#choose_a_container_image_type?utm_campaign=CDR_sar_aiml_ucaiplabs_011321&utm_source=external&utm_medium=web). The Deep Learning Containers on Google Cloud come with many common ML and data science frameworks pre-installed. After downloading that image, this Dockerfile sets up the entrypoint for the training code.

### Add model training code

In [None]:
%%file horses_or_humans/trainer/task.py

import tensorflow as tf
import tensorflow_datasets as tfds
import argparse
import hypertune

NUM_EPOCHS = 10


def get_args():
  '''Parses args. Must include all hyperparameters you want to tune.'''

  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--learning_rate',
      required=True,
      type=float,
      help='learning rate')
  parser.add_argument(
      '--momentum',
      required=True,
      type=float,
      help='SGD momentum value')
  parser.add_argument(
      '--num_neurons',
      required=True,
      type=int,
      help='number of units in last hidden layer')
  args = parser.parse_args()
  return args


def preprocess_data(image, label):
  '''Resizes and scales images.'''

  image = tf.image.resize(image, (150,150))
  return tf.cast(image, tf.float32) / 255., label


def create_dataset():
  '''Loads Horses Or Humans dataset and preprocesses data.'''

  data, info = tfds.load(name='horses_or_humans', as_supervised=True, with_info=True)

  # Create train dataset
  train_data = data['train'].map(preprocess_data)
  train_data  = train_data.shuffle(1000)
  train_data  = train_data.batch(64)

  # Create validation dataset
  validation_data = data['test'].map(preprocess_data)
  validation_data  = validation_data.batch(64)

  return train_data, validation_data


def create_model(num_neurons, learning_rate, momentum):
  '''Defines and complies model.'''

  inputs = tf.keras.Input(shape=(150, 150, 3))
  x = tf.keras.layers.Conv2D(16, (3, 3), activation='relu')(inputs)
  x = tf.keras.layers.MaxPooling2D((2, 2))(x)
  x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(x)
  x = tf.keras.layers.MaxPooling2D((2, 2))(x)
  x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(x)
  x = tf.keras.layers.MaxPooling2D((2, 2))(x)
  x = tf.keras.layers.Flatten()(x)
  x = tf.keras.layers.Dense(num_neurons, activation='relu')(x)
  outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)
  model = tf.keras.Model(inputs, outputs)
  model.compile(
      loss='binary_crossentropy',
      optimizer=tf.keras.optimizers.SGD(learning_rate=learning_rate, momentum=momentum),
      metrics=['accuracy'])
  return model


def main():
  args = get_args()
  train_data, validation_data = create_dataset()
  model = create_model(args.num_neurons, args.learning_rate, args.momentum)
  history = model.fit(train_data, epochs=NUM_EPOCHS, validation_data=validation_data)

  # DEFINE METRIC
  hp_metric = history.history['val_accuracy'][-1]

  hpt = hypertune.HyperTune()
  hpt.report_hyperparameter_tuning_metric(
      hyperparameter_metric_tag='accuracy',
      metric_value=hp_metric,
      global_step=NUM_EPOCHS)


if __name__ == "__main__":
    main()

The Python file `task.py` is an inner script that contains the model training code. There are a few components that are specific to using the hyperparameter tuning service.

1. The script imports the `hypertune` library. Note that the Dockerfile included instructions to pip install this library.


2. The function `get_args()` defines a command-line argument for each hyperparameter you want to tune. In this example, the hyperparameters that will be tuned are the learning rate, the momentum value in the optimizer, and the number of neurons in the last hidden layer of the model. While these are the only hyperparameters targeted here, you are free to modify others. The value passed in those arguments is then used to set the corresponding hyperparameter in the code.

3. At the end of the `main()` function, the `hypertune` library is used to define the metric you want to optimize. In TensorFlow, the keras `model.fit` method returns a `History` object. The `History.history` attribute is a record of training loss values and metrics values at successive epochs. If you pass validation data to `model.fit` the `History.history` attribute will include validation loss and metrics values as well. For example, if you trained a model for three epochs with validation data and provided `accuracy` as a metric, the `History.history` attribute would look similar to the following dictionary.
```
{
 "accuracy": [
   0.7795261740684509,
   0.9471358060836792,
   0.9870933294296265
 ],
 "loss": [
   0.6340447664260864,
   0.16712145507335663,
   0.04546636343002319
 ],
 "val_accuracy": [
   0.3795261740684509,
   0.4471358060836792,
   0.4870933294296265
 ],
 "val_loss": [
   2.044623374938965,
   4.100203514099121,
   3.0728273391723633
 ]
```
If you want the hyperparameter tuning service to discover the values that maximize the model's validation accuracy, you can define the metric as the last entry (or `NUM_EPOCHS - 1`) of the `val_accuracy` list. Then, pass this metric to an instance of `HyperTune`. You can pick whatever string you like for the `hyperparameter_metric_tag`, but you’ll need to use the string again later when you kick off the hyperparameter tuning job.

### Build and push the container to the Google Container Registry

In [None]:
IMAGE_URI=f"gcr.io/{PROJECT_ID}/horse-human:hypertune"
%cd horses_or_humans
!docker build ./ -t {IMAGE_URI}
!docker push {IMAGE_URI}

## Launch Hyperparameter Tuning Job

This section covers launching the Hyperparameter Tuning Job. The syntax uses exact JSON representation of the protos involved, as documented in the [REST API](https://cloud.google.com/vertex-ai/docs/reference/rest/v1/projects.locations.hyperparameterTuningJobs/create) for Vertex AI Hyperparameter Tuning Job. The example here shows how to use utility functions to convert from that of the [HyperparameterTuningJob](https://github.com/googleapis/python-aiplatform/blob/main/google/cloud/aiplatform/jobs.py) in [Vertex AI SDK](https://github.com/googleapis/python-aiplatform) into that of the exact JSON representation, for compatibility.

### Import libraries

In [None]:
from google.cloud.aiplatform import hyperparameter_tuning as hpt
from google_cloud_pipeline_components.experimental import hyperparameter_tuning_job
from google_cloud_pipeline_components.experimental.custom_job import CustomTrainingJobOp
from kfp.v2 import dsl
from kfp.v2 import compiler
from kfp.v2.google.client import AIPlatformClient

### Instantiate an API client object

In [None]:
api_client = AIPlatformClient(
    project_id=PROJECT_ID,
    region=REGION,
)

### Define specs for Hyperparameter Tuning

In [None]:
# The spec of the worker pools including machine type and Docker image
worker_pool_specs = [{
    "machine_spec": {
        "machine_type": "n1-standard-4",
        "accelerator_type": "NVIDIA_TESLA_T4",
        "accelerator_count": 1
    },
    "replica_count": 1,
    "container_spec": {
        "image_uri": IMAGE_URI
    }
}]

# List serialized from the dictionary representing metrics to optimize.
# The dictionary key is the metric_id, which is reported by your training job,
# and the dictionary value is the optimization goal of the metric.
metric_spec=hyperparameter_tuning_job.serialize_metrics({'accuracy': 'maximize'})

# List serialized from the parameter dictionary. The dictionary
# represents parameters to optimize. The dictionary key is the parameter_id,
# which is passed into your training job as a command line key word argument, and the
# dictionary value is the parameter specification of the metric.
parameter_spec = hyperparameter_tuning_job.serialize_parameters({
    "learning_rate": hpt.DoubleParameterSpec(min=0.001, max=1, scale="log"),
    "momentum": hpt.DoubleParameterSpec(min=0, max=1, scale="linear"),
    "num_neurons": hpt.DiscreteParameterSpec(values=[64, 128, 512], scale=None)
})

### Define the pipeline

In [None]:
PIPELINE_ROOT = 'gs://[your-base-output-directory]'  #@param {type:"string"}
    
@dsl.pipeline(pipeline_root=PIPELINE_ROOT, name='hp-tune-pipeline')
def hp_tune_pipeline():

    hp_tuning_task = hyperparameter_tuning_job.HyperparameterTuningJobRunOp(
        display_name='hp-job',
        project=PROJECT_ID,
        location=REGION,
        worker_pool_specs=worker_pool_specs,
        study_spec_metrics=metric_spec,
        study_spec_parameters=parameter_spec,
        max_trial_count=15,
        parallel_trial_count=3,
        base_output_directory=PIPELINE_ROOT
    )
    
    trials_task = hyperparameter_tuning_job.GetTrialsOp(
      gcp_resources=hp_tuning_task.outputs['gcp_resources'], region=REGION)

    best_trial_task = hyperparameter_tuning_job.GetBestTrialOp(
      trials=trials_task.output, study_spec_metrics=metric_spec)

    is_accuracy_beyond_threshold_task = hyperparameter_tuning_job.IsMetricBeyondThresholdOp(
      trial=best_trial_task.output, study_spec_metrics=metric_spec, threshold=0.7)

    with dsl.Condition(
        is_accuracy_beyond_threshold_task.output == "true",
        name="deploy_decision",
    ):
        best_hyperparameters_task = hyperparameter_tuning_job.GetHyperparametersOp(
          trial=best_trial_task.output)

        # Construct new worker_pool_specs based on best hyperparameters
        worker_pool_specs_task = hyperparameter_tuning_job.GetWorkerPoolSpecsOp(
          best_hyperparameters=best_hyperparameters_task.output,
          worker_pool_specs=worker_pool_specs
        )

        # Train new model based on new worker_pool_specs
        training_task = hyperparameter_tuning_job.CustomTrainingJobOp(
          project=PROJECT_ID,
          location=REGION,
          display_name='training-job',
          worker_pool_specs=worker_pool_specs_task.output
        )

### Compile and run the pipeline

In [None]:
compiler.Compiler().compile(
    pipeline_func=hp_tune_pipeline, package_path="hp_tune_pipeline_job.json"
)

response = api_client.create_run_from_job_spec(
    job_spec_path="hp_tune_pipeline_job.json",
    # pipeline_root=PIPELINE_ROOT  # this argument is necessary if you did not specify PIPELINE_ROOT as part of the pipeline definition.
)