# Hyperparameter tuning with Cloud ML Engine

**Learning Objectives:**
  * Improve the accuracy of a model by hyperparameter tuning

In [1]:
import os
PROJECT = 'qwiklabs-gcp-9164a767555c7a33' # REPLACE WITH YOUR PROJECT ID
BUCKET = 'qwiklabs-gcp-9164a767555c7a33-bucket' # REPLACE WITH YOUR BUCKET NAME
REGION = 'us-central1' # REPLACE WITH YOUR BUCKET REGION e.g. us-central1
os.environ['TFVERSION'] = '1.8'  # Tensorflow version

In [2]:
# for bash
os.environ['PROJECT'] = PROJECT
os.environ['BUCKET'] = BUCKET
os.environ['REGION'] = REGION

In [3]:
%%bash
gcloud config set project $PROJECT
gcloud config set compute/region $REGION

Updated property [core/project].
Updated property [compute/region].


## Create command-line program

In order to submit to Cloud ML Engine, we need to create a distributed training program. Let's convert our housing example to fit that paradigm, using the Estimators API.

In [4]:
%%bash
rm -rf trainer
mkdir trainer
touch trainer/__init__.py

In [14]:
%%writefile trainer/house.py
import os
import math
import json
import shutil
import argparse
import numpy as np
import pandas as pd
import tensorflow as tf

def train(output_dir, batch_size, learning_rate, features_set):
  tf.logging.set_verbosity(tf.logging.INFO)
  
  # Read dataset and split into train and eval
  df = pd.read_csv("https://storage.googleapis.com/ml_universities/california_housing_train.csv", sep=",")
  df['num_rooms'] = df['total_rooms'] / df['households']
  df['num_bedrooms'] = df['total_bedrooms'] / df['households']
  df['persons_per_house'] = df['population'] / df['households']
  df.drop(['total_rooms', 'total_bedrooms', 'population', 'households'], axis = 1, inplace = True)
  msk = np.random.rand(len(df)) < 0.8
  traindf = df[msk]
  evaldf = df[~msk]

  # Train and eval input functions
  SCALE = 100000
  
  if features_set == 1:
    featcols = {
      'num_bedrooms': tf.feature_column.numeric_column('num_bedrooms')
    }
  elif features_set == 2:
    featcols = {
      'num_bedrooms': tf.feature_column.numeric_column('num_bedrooms'),
      'num_rooms': tf.feature_column.numeric_column('num_rooms'),
      'housing_median_age': tf.feature_column.numeric_column('housing_median_age')
    }
  elif features_set == 3:
    featcols = {
      'persons_per_house': tf.feature_column.numeric_column('persons_per_house'),
      'median_income': tf.feature_column.numeric_column('median_income'),
      # Bucketize lat, lon so it's not so high-res; California is mostly N-S, so more lats than lons
      'longitude': tf.feature_column.bucketized_column(tf.feature_column.numeric_column('longitude'),
                                                         np.linspace(-124.3, -114.3, 5).tolist()),
      'latitude': tf.feature_column.bucketized_column(tf.feature_column.numeric_column('latitude'),
                                                        np.linspace(32.5, 42, 10).tolist())
    }
  else:
    featcols = {
      colname : tf.feature_column.numeric_column(colname) \
        for colname in 'housing_median_age,median_income,num_rooms,num_bedrooms,persons_per_house'.split(',')
    }
    # Bucketize lat, lon so it's not so high-res; California is mostly N-S, so more lats than lons
    featcols['longitude'] = tf.feature_column.bucketized_column(tf.feature_column.numeric_column('longitude'),
                                                       np.linspace(-124.3, -114.3, 5).tolist())
    featcols['latitude'] = tf.feature_column.bucketized_column(tf.feature_column.numeric_column('latitude'),
                                                      np.linspace(32.5, 42, 10).tolist())
  
  train_input_fn = tf.estimator.inputs.pandas_input_fn(x = traindf[featcols.keys()],
                                                       y = traindf["median_house_value"] / SCALE,  # note the scaling
                                                       num_epochs = None,
                                                       batch_size = batch_size, # note the batch size
                                                       shuffle = True)
  
  eval_input_fn = tf.estimator.inputs.pandas_input_fn(x = evaldf[featcols.keys()],
                                                      y = evaldf["median_house_value"] / SCALE,  # note the scaling
                                                      num_epochs = 1,
                                                      batch_size = len(evaldf),
                                                      shuffle=False)
  
  def train_and_evaluate(output_dir):
    # Compute appropriate number of steps
    num_steps = (len(traindf) / batch_size) / learning_rate  # if learning_rate=0.01, hundred epochs

    # Create custom optimizer
    myopt = tf.train.FtrlOptimizer(learning_rate = learning_rate) # note the learning rate

    # Create rest of the estimator as usual
    estimator = tf.estimator.LinearRegressor(model_dir = output_dir, 
                                             feature_columns = featcols.values(), 
                                             optimizer = myopt)
    #Add rmse evaluation metric
    def rmse(labels, predictions):
      pred_values = tf.cast(predictions['predictions'],tf.float64)
      return {'rmse': tf.metrics.root_mean_squared_error(labels*SCALE, pred_values*SCALE)}
    estimator = tf.contrib.estimator.add_metrics(estimator,rmse)

    train_spec = tf.estimator.TrainSpec(input_fn = train_input_fn,
                                        max_steps = num_steps)
    eval_spec = tf.estimator.EvalSpec(input_fn = eval_input_fn,
                                      steps = None)
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)

  # Run the training
  shutil.rmtree(output_dir, ignore_errors=True) # start fresh each time
  train_and_evaluate(output_dir)
    
if __name__ == '__main__' and "get_ipython" not in dir():
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--learning_rate',
      type = float, 
      default = 0.01
  )
  parser.add_argument(
      '--batch_size',
      type = int, 
      default = 30
  ),
  parser.add_argument(
      '--job-dir',
      help = 'GCS location to write checkpoints and export models.',
      required = True
  ),
  parser.add_argument(
      '--features_set',
      type = int,
      default = 1
  )
  args = parser.parse_args()
  print("Writing checkpoints to {}".format(args.job_dir))
  train(args.job_dir, args.batch_size, args.learning_rate, args.features_set)

Overwriting trainer/house.py


In [16]:
%%bash
rm -rf house_trained
gcloud ml-engine local train \
    --module-name=trainer.house \
    --job-dir=house_trained \
    --package-path=$(pwd)/trainer \
    -- \
    --batch_size=30 \
    --learning_rate=0.02 \
    --features_set=2

Writing checkpoints to house_trained


  from ._conv import register_converters as _register_converters
INFO:tensorflow:TF_CONFIG environment variable: {u'environment': u'cloud', u'cluster': {}, u'job': {u'args': [u'--batch_size=30', u'--learning_rate=0.02', u'--features_set=2', u'--job-dir', u'house_trained'], u'job_name': u'trainer.house'}, u'task': {}}
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fbab6c61cd0>, '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': 'house_trained', '_global_id_in_cluster': 0, '_save_summary_steps': 100}
INFO:tensorflow:Using co

# Create hyperparam.yaml

In [17]:
%%writefile hyperparam.yaml
trainingInput:
  hyperparameters:
    goal: MINIMIZE
    maxTrials: 5
    maxParallelTrials: 1
    hyperparameterMetricTag: rmse
    params:
    - parameterName: batch_size
      type: INTEGER
      minValue: 8
      maxValue: 64
      scaleType: UNIT_LINEAR_SCALE
    - parameterName: learning_rate
      type: DOUBLE
      minValue: 0.01
      maxValue: 0.1
      scaleType: UNIT_LOG_SCALE
    - parameterName: features_set
      type: INTEGER
      minValue: 1
      maxValue: 4
      scaleType: UNIT_LINEAR_SCALE

Writing hyperparam.yaml


In [18]:
%%bash
OUTDIR=gs://${BUCKET}/house_trained   # CHANGE bucket name appropriately
gsutil rm -rf $OUTDIR
gcloud ml-engine jobs submit training house_$(date -u +%y%m%d_%H%M%S) \
   --config=hyperparam.yaml \
   --module-name=trainer.house \
   --package-path=$(pwd)/trainer \
   --job-dir=$OUTDIR \
   --runtime-version=$TFVERSION \

jobId: house_180730_032006
state: QUEUED


CommandException: 1 files/objects could not be removed.
Job [house_180730_032006] submitted successfully.
Your job is still active. You may view the status of your job with the command

  $ gcloud ml-engine jobs describe house_180730_032006

or continue streaming the logs with the command

  $ gcloud ml-engine jobs stream-logs house_180730_032006


In [None]:
!gcloud ml-engine jobs describe house_180730_032006 # CHANGE jobId appropriately

## Challenge exercise
Add a few engineered features to the housing model, and use hyperparameter tuning to choose which set of features the model uses.

<p>
Copyright 2018 Google Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License