In [1]:
from google.cloud import aiplatform

from google.cloud.aiplatform import hyperparameter_tuning as hpt

import google.auth

# Vertex AI hyperparameter optimization

You can use our, or your own, docker containers with a version of the CODA library with the Vertex AI platform on GCP to tune hyperparameters for your models. You need to make sure this API is activated on your GCP account and that you use machines that you have quota for.

References:

- https://cloud.google.com/vertex-ai/docs/training/hyperparameter-tuning-overview
- https://cloud.google.com/vertex-ai/docs/training/using-hyperparameter-tuning#aiplatform_create_hyperparameter_tuning_job_python_package_sample-python_vertex_ai_sdk
- https://colab.research.google.com/github/GoogleCloudPlatform/vertex-ai-samples/blob/main/notebooks/official/migration/sdk-hyperparameter-tuning.ipynb
 

## Login and initialize your session
Make sure you fill in your username in the next cell

In [2]:
YOUR_USERNAME = 'replace-with-your-gcp-username'

In [3]:
credentials, project = google.auth.default()

In [4]:
aiplatform.init(
    # your Google Cloud Project ID or number
    # environment default used is not set
    project=project,

    # the Vertex AI region you will use
    # defaults to us-central1
    location='us-central1',

    # Google Cloud Storage bucket in same region as location
    # used to stage artifacts
    staging_bucket=f'gs://{YOUR_USERNAME}-aip-staging-us-central1',

    # custom google.auth.credentials.Credentials
    # environment default credentials used if not set
    credentials=credentials,

    # the name of the experiment to use to track
    # logged metrics and parameters
    experiment='full-hpo',

    # description of the experiment above
    experiment_description='Full hyperparameter search.'
)

## Set your worker specs

In [5]:
worker_pool_specs = [{
    ##########################################################
    ## Again, make sure you adjust these to fit your quotas ##
    ##########################################################
    "machine_spec": {
        "machine_type": "n1-standard-8",
        "accelerator_type": "NVIDIA_TESLA_V100",
        "accelerator_count": 1
    },
    "replica_count": 1,
    "disk_spec": {
      "boot_disk_type": "pd-ssd",
      "boot_disk_size_gb": 300
    },
    ##########################################################
    ## This machines pull a container when initialized. You ##
    ## to set constant args for the training script as part ##
    ## of the container spec. Change these to adjust the    ##
    ## training process                                     ##
    ##########################################################
    "container_spec": {
        "image_uri": "gcr.io/sabeti-encode/boda/train@sha256:8b90fcbe67a3a31b5aca9a983848b5449243dfd9bd8d310f2c84e3f35dd9e960",
        "args": [
            "--data_module=MPRA_DataModule",
            "--datafile_path=gs://syrgoth/data/MPRA_ALL_v3.txt",
            "--synth_seed=890207",
            "--synth_val_pct=0.0",
            "--synth_test_pct=99.98",
            "--num_workers=8",
            "--n_outputs=3",
            "--optimizer=Adam",
            "--scheduler=CosineAnnealingWarmRestarts",
            "--scheduler_interval=step",
            "--checkpoint_monitor=entropy_spearman",
            "--stopping_mode=max",
            "--stopping_patience=30",
            "--gpus=1",
            "--min_epochs=60",
            "--max_epochs=200",
            "--precision=16",
            "--default_root_dir=/tmp/output/artifacts",
            "--artifact_path=gs://sgosai-aip-staging-us-central1/aip_hpo_20231230",
        ]
    }
}]

## Set optimization objective
You choose both the metric and goal.

In [6]:
metric_spec = {'prediction_mean_spearman': 'maximize'}

## Define hyperparameters to optimize
Reference: https://cloud.google.com/vertex-ai/docs/reference/rest/v1/StudySpec

In [7]:
parameter_spec = {
    'model_module': hpt.CategoricalParameterSpec(
        values=['BassetBranched', 'BassetVL'],
        conditional_parameter_spec = {
            'n_branched_layers': hpt.IntegerParameterSpec(min=1, max=5, scale='linear', parent_values=['BassetBranched']),
            'branched_activation': hpt.CategoricalParameterSpec(values=['ReLU', 'ELU','ReLU6'], parent_values=['BassetBranched']),
            'branched_channels': hpt.IntegerParameterSpec(min=16, max=1024, scale='linear', parent_values=['BassetBranched']),
            'branched_dropout_p': hpt.DoubleParameterSpec(min=0.05, max=0.75, scale='linear', parent_values=['BassetBranched']),
        }
    ),
    'graph_module': hpt.CategoricalParameterSpec(
        values=['CNNBasicTraining', 'CNNTransferLearning'],
        conditional_parameter_spec = {
            'parent_weights': hpt.CategoricalParameterSpec(values=['gs://syrgoth/my-model.epoch_5-step_19885.pkl'], parent_values=['CNNTransferLearning']),
            'frozen_epochs': hpt.IntegerParameterSpec(min=0, max=60, scale='linear', parent_values=['CNNTransferLearning']),
            'linear_activation': hpt.CategoricalParameterSpec(values=['ReLU', 'ELU','ReLU6'], parent_values=['CNNBasicTraining']),
            'linear_channels': hpt.IntegerParameterSpec(min=16, max=4096, scale='log', parent_values=['CNNBasicTraining']),
            'conv1_channels': hpt.IntegerParameterSpec(min=16, max=2048, scale='log', parent_values=['CNNBasicTraining']),
            'conv2_channels': hpt.IntegerParameterSpec(min=16, max=2048, scale='log', parent_values=['CNNBasicTraining']),
            'conv3_channels': hpt.IntegerParameterSpec(min=16, max=2048, scale='log', parent_values=['CNNBasicTraining']),
            'conv1_kernel_size': hpt.IntegerParameterSpec(min=5, max=25, scale='linear', parent_values=['CNNBasicTraining']),
            'conv2_kernel_size': hpt.IntegerParameterSpec(min=5, max=25, scale='linear', parent_values=['CNNBasicTraining']),
            'conv3_kernel_size': hpt.IntegerParameterSpec(min=5, max=25, scale='linear', parent_values=['CNNBasicTraining']),
            'padded_seq_len': hpt.DiscreteParameterSpec(values=[216], scale='linear', parent_values=['CNNBasicTraining']),
            'input_len': hpt.DiscreteParameterSpec(values=[216], scale='linear', parent_values=['CNNBasicTraining']),
        }
    ),
    'batch_size': hpt.IntegerParameterSpec(min=128, max=3072, scale='log'),
    'use_reverse_complements': hpt.CategoricalParameterSpec(values=['True', 'False']),
    'duplication_cutoff': hpt.DoubleParameterSpec(min=0.5, max=5.0, scale='linear'),
    'n_linear_layers': hpt.IntegerParameterSpec(min=1, max=5, scale='linear'),
    'linear_dropout_p': hpt.DoubleParameterSpec(min=0.05, max=0.75, scale='linear'),
    'loss_criterion': hpt.CategoricalParameterSpec(values=['L1KLmixed', 'MSEKLmixed']),
    'beta': hpt.DoubleParameterSpec(min=0.2, max=5.0, scale='log'),
    'lr': hpt.DoubleParameterSpec(min=0.0001, max=0.01, scale='log'),
    'weight_decay': hpt.DoubleParameterSpec(min=0.00001, max=0.001, scale='log'),
    'beta1': hpt.DoubleParameterSpec(min=0.8, max=0.9999, scale='reverse_log'),
    'beta2': hpt.DoubleParameterSpec(min=0.8, max=0.9999, scale='reverse_log'),
    'amsgrad': hpt.CategoricalParameterSpec(['False', 'True']),
    'T_0': hpt.IntegerParameterSpec(min=2048, max=65536, scale='log'),
}

## Deploy HPO job

In [8]:
custom_job = aiplatform.CustomJob(
    display_name='custom_job',
    worker_pool_specs=worker_pool_specs,
)

In [None]:
hpt_job = aiplatform.HyperparameterTuningJob(
    display_name='hpo-full-02',
    custom_job=custom_job,
    metric_spec=metric_spec,
    parameter_spec=parameter_spec,
    max_trial_count=512,
    parallel_trial_count=12,
    labels={
        'hpo_run_id': '2'
    },
)

hpt_job.run()

print(hpt_job.resource_name)


Creating HyperparameterTuningJob
HyperparameterTuningJob created. Resource name: projects/482032041325/locations/us-central1/hyperparameterTuningJobs/659844072021688320
To use this HyperparameterTuningJob in another session:
hpt_job = aiplatform.HyperparameterTuningJob.get('projects/482032041325/locations/us-central1/hyperparameterTuningJobs/659844072021688320')
View HyperparameterTuningJob:
https://console.cloud.google.com/ai/platform/locations/us-central1/training/659844072021688320?project=482032041325
HyperparameterTuningJob projects/482032041325/locations/us-central1/hyperparameterTuningJobs/659844072021688320 current state:
JobState.JOB_STATE_PENDING
HyperparameterTuningJob projects/482032041325/locations/us-central1/hyperparameterTuningJobs/659844072021688320 current state:
JobState.JOB_STATE_RUNNING
HyperparameterTuningJob projects/482032041325/locations/us-central1/hyperparameterTuningJobs/659844072021688320 current state:
JobState.JOB_STATE_RUNNING
HyperparameterTuningJob pro