## Dataset
The dataset used for this tutorial is the Iris dataset from TensorFlow Datasets.

In [None]:
import os

# Google Cloud Notebook
if os.path.exists("/opt/deeplearning/metadata/env_version"):
    USER_FLAG = "--user"
else:
    USER_FLAG = ""

!pip3 install --upgrade google-cloud-aiplatform $USER_FLAG

In [None]:
!pip3 install -U google-cloud-storage $USER_FLAG

## Set up your Google Cloud project

In [None]:
PROJECT_ID = ""  # @param {type:"string"}
REGION = "us-central1"
BUCKET_NAME=f"gs://{PROJECT_ID}-bucket"

## Set up variables and initialize Vertex SDK for Python

In [None]:
import google.cloud.aiplatform as aip
from google.cloud.aiplatform import hyperparameter_tuning as hpt

In [None]:
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_NAME)

## Set machine type

In [None]:
import os
import sys

if os.getenv("IS_TESTING_TRAIN_MACHINE"):
    MACHINE_TYPE = os.getenv("IS_TESTING_TRAIN_MACHINE")
else:
    MACHINE_TYPE = "n1-standard"

VCPU = "4"
TRAIN_COMPUTE = MACHINE_TYPE + "-" + VCPU
print("Train machine type", TRAIN_COMPUTE)

if os.getenv("IS_TESTING_DEPLOY_MACHINE"):
    MACHINE_TYPE = os.getenv("IS_TESTING_DEPLOY_MACHINE")
else:
    MACHINE_TYPE = "n1-standard"

VCPU = "4"
DEPLOY_COMPUTE = MACHINE_TYPE + "-" + VCPU
print("Deploy machine type", DEPLOY_COMPUTE)

## Create Dockerfile

In [None]:
%%writefile Dockerfile

FROM python:3.6.9-buster
WORKDIR /root

RUN pip install xgboost pandas
RUN pip install cloudml-hypertune
RUN pip install tensorflow_datasets==1.3.0

# Installs google cloud sdk, this is mostly for using gsutil to export model.
RUN wget -nv \
    https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz && \
    mkdir /root/tools && \
    tar xvzf google-cloud-sdk.tar.gz -C /root/tools && \
    rm google-cloud-sdk.tar.gz && \
    /root/tools/google-cloud-sdk/install.sh --usage-reporting=false \
        --path-update=false --bash-completion=false \
        --disable-installation-options && \
    rm -rf /root/.config/* && \
    ln -s /root/.config /config && \
    # Remove the backup directory that gcloud creates
    rm -rf /root/tools/google-cloud-sdk/.install/.backup

# Path configuration
ENV PATH $PATH:/root/tools/google-cloud-sdk/bin
# Make sure gsutil will use the default service account
RUN echo '[GoogleCompute]\nservice_account = default' > /etc/boto.cfg

run mkdir /root/trainer
COPY trainer/task.py /root/trainer/task.py

ENTRYPOINT ["python","-m","trainer.task"]

## Write Trainer

In [None]:
%%writefile trainer/task.py
# Single Instance Training for Iris

import datetime
import os
import subprocess
import sys
import hypertune
import pandas as pd
import xgboost as xgb

import argparse

def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model_dir', dest='model_dir',
        default=os.getenv('AIP_MODEL_DIR'), type=str, help='Model dir.')
    parser.add_argument(
        '--max_depth',
        help='Max depth of XGB tree',
        default=3)
    parser.add_argument(
        '--learning_rate',
        help='Learning rate of XGB model',
        default=0.1)
    parser.add_argument(
        '--num_boost_round',
        help='Number of boosting iterations.',
        type=int,
        default=10)
    args = parser.parse_args()
    return args
    
def main():
    args = get_args()
    # Download data
    iris_data_filename = 'iris_data.csv'
    iris_target_filename = 'iris_target.csv'
    data_dir = 'gs://cloud-samples-data/ai-platform/iris'

    # gsutil outputs everything to stderr so we need to divert it to stdout.
    subprocess.check_call(['gsutil', 'cp', os.path.join(data_dir,
                                                        iris_data_filename),
                           iris_data_filename], stderr=sys.stdout)
    subprocess.check_call(['gsutil', 'cp', os.path.join(data_dir,
                                                        iris_target_filename),
                           iris_target_filename], stderr=sys.stdout)


    # Load data into pandas, then use `.values` to get NumPy arrays
    iris_data = pd.read_csv(iris_data_filename).values
    iris_target = pd.read_csv(iris_target_filename).values

    # Convert one-column 2D array into 1D array for use with XGBoost
    iris_target = iris_target.reshape((iris_target.size,))

    # Load data into DMatrix object
    dtrain = xgb.DMatrix(iris_data, label=iris_target)
    
    param = {
        'max_depth': args.max_depth,  # the maximum depth of each tree
        'learning_rate' : args.learning_rate,
        'eval_metric' : 'mlogloss',
        'objective': 'multi:softprob',  # error evaluation for multiclass training
        'num_class': 3}  # the number of classes that exist in this datset

    # Train XGBoost model
    bst = xgb.train(param, dtrain, num_boost_round=args.num_boost_round)
    
    # Export the classifier to a file
    model_filename = 'model.bst'
    bst.save_model(model_filename)

    # Upload the saved model file to Cloud Storage
    gcs_model_path = os.path.join(args.model_dir, model_filename)
    subprocess.check_call(['gsutil', 'cp', model_filename, gcs_model_path],
        stderr=sys.stdout)

    # Start hp tuning metrics
    # Using dtrain for sample purposes. 
    mlogloss = float(bst.eval(dtrain).split(':')[1])
    # Report the mlogloss as hyperparameter tuning objective metric.
    hpt = hypertune.HyperTune()
    hpt.report_hyperparameter_tuning_metric(
        hyperparameter_metric_tag='mlogloss',
        metric_value=mlogloss,
        global_step=1)

    gcs_model_path = os.path.join(args.model_dir, model_filename)
    subprocess.check_call(['gsutil', 'cp', model_filename, gcs_model_path],
        stderr=sys.stdout)

if __name__ == '__main__':
    main()

## Build Image

In [None]:
IMAGE_URI=f"gcr.io/{PROJECT_ID}/iris:hypertune"
!docker build -f Dockerfile -t $IMAGE_URI ./

In [None]:
!docker run -e AIP_MODEL_DIR="/" $IMAGE_URI 

In [None]:
!docker push $IMAGE_URI

## Deploy HPT Job

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": "n1-standard-4"
        },
        "replica_count": 1,
        "container_spec": {
            "image_uri": IMAGE_URI
        },
    }
]

metric_spec={"mlogloss" : "minimize"}
parameter_spec = {
    "learning_rate" : hpt.DoubleParameterSpec(min=0.01, max=0.9, scale="log"),
    "max_depth" : hpt.IntegerParameterSpec(min=3,max=10,scale="linear"),
    "num_boost_round" : hpt.IntegerParameterSpec(min=1,max=30,scale="linear")
}

In [None]:
job = aip.CustomJob(
    display_name="iris_" + TIMESTAMP,
    worker_pool_specs=worker_pool_specs,
    staging_bucket=BUCKET_NAME)

In [None]:
hp_job = aip.HyperparameterTuningJob(
    display_name="iris_" + TIMESTAMP,
    custom_job=job,
    metric_spec=metric_spec,
    parameter_spec=parameter_spec,
    max_trial_count=15,
    parallel_trial_count=5
)
hp_job.run()

## Train a model with conditional parameters
### Create and run custom training job

In [None]:
from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

worker_pool_specs = [
    {
        "machine_spec": {
            "machine_type": "n1-standard-4"
        },
        "replica_count": 1,
        "python_package_spec" : {
            "executor_image_uri" : TRAIN_IMAGE,
            "package_uris" : [f"${BUCKET_NAME}/trainer_iris.tar.gz"],
            "python_module" : "custom/trainer/task.py"
        },
        "container_spec": {
            "image_uri": TRAIN_IMAGE
        },
    }
]

print(worker_pool_specs)

metric = {
    "metric_id" : "mlogloss",
    "goal" : aip.gapic.StudySpec.MetricSpec.GoalType.MINIMIZE,
}
conditional_parameter_learning_rate = {
    "parameter_spec": {
        "parameter_id": "learning_rate",
        "double_value_spec": {"min_value": 1e-07, "max_value": 1},
        "scale_type": aip.gapic.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
    },
    "parent_discrete_values": {"values": [4, 8, 16]},
}
conditional_parameter_num_boost_round = {
    "parameter_spec": {
        "parameter_id": "num_boost_round",
        "discrete_value_spec": {"values": [4, 8, 16, 32, 64, 128]},
        "scale_type": aip.gapic.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
    },
    "parent_discrete_values": {"values": [32, 64]},
}
parameter = {
    "parameter_id": "max_depth",
    "discrete_value_spec": {"values": [4, 8, 16, 32, 64, 128]},
    "scale_type": aip.gapic.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
    "conditional_parameter_specs": [
        conditional_parameter_learning_rate,
        conditional_parameter_num_boost_round
    ],
}
hyperparameter_tuning_job = {
    "display_name": "hpt",
    "max_trial_count": 4,
    "parallel_trial_count": 2,
    "study_spec": {
        "metrics": [metric],
        "parameters": [parameter],
        "algorithm": aip.gapic.StudySpec.Algorithm.RANDOM_SEARCH,
    },
    "trial_job_spec": {"worker_pool_specs": worker_pool_specs},
}

client_options = {"api_endpoint": "us-central1-aiplatform.googleapis.com"}
client = aip.gapic.JobServiceClient(client_options=client_options)

response = client.create_hyperparameter_tuning_job(
        parent=f"projects/{PROJECT_ID}/locations/us-central1", hyperparameter_tuning_job=hyperparameter_tuning_job
    )
print("response:", response)

## Other Resources
* https://github.com/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/migration/UJ9%20Vertex%20SDK%20Custom%20XGBoost%20with%20pre-built%20training%20container.ipynb
* https://github.com/GoogleCloudPlatform/cloudml-samples/tree/wenzhel-sklearn/xgboost/iris
* https://sararobinson.dev/2019/09/12/hyperparameter-tuning-xgboost.html
* https://cloud.google.com/vertex-ai/docs/training/using-hyperparameter-tuning#aiplatform_create_hyperparameter_tuning_job_python_package_sample-python