In [None]:
import tensorflow as tf
from tensorflow import data

print(tf.__version__)

### 1- Define input function with process features

In [None]:
transformed_metadata = metadata_io.read_metadata(
    os.path.join(local_models_dir,TRANSFORM_ARTEFACTS_DIR,"transformed_metadata"))

transformed_feature_spec = transformed_metadata.schema.as_feature_spec()

print(transformed_feature_spec)

In [None]:
def parse_tf_example(example_proto):
    
    parsed_features = tf.parse_example(serialized=example_proto, features=transformed_feature_spec)
    parsed_features.pop(KEY_COLUMN)
    target = parsed_features.pop(TARGET_FEATURE_NAME)
    
    return parsed_features, target

In [None]:
# to be applied in traing and serving
# ideally, you put this logic in preprocess_tft, to avoid transforming the records during training several times

def process_features(features):
    return features

In [None]:
def tfrecords_input_fn(files_name_pattern, mode=tf.estimator.ModeKeys.EVAL,  
                 num_epochs=1, 
                 batch_size=500):
    
    shuffle = True if mode == tf.estimator.ModeKeys.TRAIN else False
    
    file_names = data.Dataset.list_files(files_name_pattern)

    dataset = data.TFRecordDataset(filenames=file_names)
    if shuffle:
        dataset = dataset.shuffle(buffer_size=2 * batch_size + 1)

    dataset = dataset.batch(batch_size)
    dataset = dataset.map(lambda tf_example: parse_tf_example(tf_example))
    dataset = dataset.map(lambda features, target: (process_features(features), target))
    dataset = dataset.repeat(num_epochs)
    iterator = dataset.make_one_shot_iterator()
    
    features, target = iterator.get_next()
    return features, target

### 2- Create Feature Columns with Extensions

In [None]:
def get_deep_and_wide_columns():

    assets_dir = os.path.join(local_models_dir, TRANSFORM_ARTEFACTS_DIR, 'transform_fn/assets')
    
    categorical_feature_columns = {feature_name: 
      tf.feature_column.categorical_column_with_vocabulary_file(feature_name, vocabulary_file=os.path.join(assets_dir,feature_name ))
      for feature_name in CATEGORICAL_FEATURE_NAMES}
    
    is_multiple = tf.feature_column.categorical_column_with_identity('is_multiple', num_buckets=2)
    gestation_weeks_scaled =  tf.feature_column.numeric_column('gestation_weeks_scaled')
    mother_age_log = tf.feature_column.numeric_column('mother_age_log')
    mother_age_normalized = tf.feature_column.numeric_column('mother_age_normalized')
    
    # extended feature columns
    cigarette_use_X_alcohol_use = tf.feature_column.crossed_column(
      [categorical_feature_columns['cigarette_use'], categorical_feature_columns['alcohol_use']], 9)
    
    #mother_age_bucketized = tf.feature_column.bucketized_column(mother_age, boundaries=[18, 22, 28, 32, 36, 40, 42, 45, 50])
    mother_age_bucketized = tf.feature_column.categorical_column_with_identity('mother_age_bucketized', num_buckets=5)
    
    mother_race_X_mother_age_bucketized = tf.feature_column.crossed_column( [mother_age_bucketized,categorical_feature_columns['mother_race']],  120)
    
    mother_race_X_mother_age_bucketized_embedded = tf.feature_column.embedding_column(mother_race_X_mother_age_bucketized, 5)
    
    # wide and deep columns
    wide_columns = categorical_feature_columns.values() + [is_multiple, cigarette_use_X_alcohol_use, mother_age_bucketized, mother_race_X_mother_age_bucketized] 
    deep_columns = [mother_age_log, gestation_weeks_scaled, mother_race_X_mother_age_bucketized_embedded]
    
    return wide_columns, deep_columns

# w,d = get_deep_and_wide_columns()
# print w

### 3- Create a DNN Regression Estimator

In [None]:
def metric_fn(labels, predictions):

    metrics = {}
    
    pred_values = predictions['predictions']
    
    metrics['rmse'] = tf.metrics.root_mean_squared_error(
      labels=labels,
      predictions=pred_values)
    
    metrics['mae'] = tf.metrics.mean_absolute_error(
      labels=labels,
      predictions=pred_values)
    
    
    return metrics


def create_DNNLinearCombinedRegressor(run_config, hparams):
  
    wide_columns, deep_columns = get_deep_and_wide_columns()

    dnn_optimizer = tf.train.AdamOptimizer(learning_rate=hparams.learning_rate)
    
    estimator = tf.estimator.DNNLinearCombinedRegressor(
                linear_feature_columns = wide_columns,
                dnn_feature_columns = deep_columns,
                dnn_optimizer=dnn_optimizer,
                dnn_hidden_units=hparams.hidden_units,
                config = run_config
                )
    
    
    estimator = tf.contrib.estimator.add_metrics(estimator, metric_fn)
    
    return estimator

### 4- Setup Local Experiment

##### a) RunConfig and Hyper-params

In [None]:
# Hyper-parameters
hparams  = tf.contrib.training.HParams(
    num_epochs=10,
    batch_size=500,
    hidden_units=[32, 16],
    max_steps=100,
    learning_rate=0.1,
    evaluate_after_sec=10
)

# RunConfig
model_dir = os.path.join(local_models_dir,"dnn_estimator")

run_config = tf.estimator.RunConfig(
    tf_random_seed=19830610,
    model_dir=model_dir
)

##### b) Serving Function

In [None]:
def generate_serving_input_fn():
    
    def _serving_fn():
        
        # get the feature_spec of raw data
        raw_metadata = create_raw_metadata()
        raw_placeholder_spec = raw_metadata.schema.as_batched_placeholders()
        raw_placeholder_spec.pop(TARGET_FEATURE_NAME)
    
        raw_input_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(raw_placeholder_spec)
        raw_features, recevier_tensors, _ = raw_input_fn()
        
        # apply tranform_fn on raw features
        _, transformed_features = (
            saved_transform_io.partially_apply_saved_transform(
                os.path.join(local_models_dir,TRANSFORM_ARTEFACTS_DIR,transform_fn_io.TRANSFORM_FN_DIR),
            raw_features)
        )
        
        # apply the process_features function to transformed features
        transformed_features = process_features(transformed_features)
        
        return tf.estimator.export.ServingInputReceiver(
            transformed_features, raw_features)
    
    return _serving_fn

##### c) TrainSpec and EvalSpec

In [None]:
train_data_files = os.path.join(local_data_dir,TRANSFORMED_DATA_DIR)+"/train-*.tfrecords"
eval_data_files = os.path.join(local_data_dir,TRANSFORMED_DATA_DIR)+"/eval-*.tfrecords"

# TrainSpec
train_spec = tf.estimator.TrainSpec(
  input_fn = lambda: tfrecords_input_fn(
    train_data_files,
    mode=tf.estimator.ModeKeys.TRAIN,
    num_epochs= hparams.num_epochs,
    batch_size = hparams.batch_size
  ),
  max_steps=hparams.max_steps,
)

# EvalSpec
eval_spec = tf.estimator.EvalSpec(
  input_fn =lambda: tfrecords_input_fn(eval_data_files),
  exporters=[tf.estimator.LatestExporter(
      name="estimate",  # the name of the folder in which the model will be exported to under export
      serving_input_receiver_fn=generate_serving_input_fn(),
      exports_to_keep=1,
      as_text=True)],
  steps = None,
  throttle_secs = hparams.evaluate_after_sec # evalute after each 10 training seconds!
)

### >> TensorBoard - Start

In [None]:
from google.datalab.ml import TensorBoard
TensorBoard().start(model_dir)
TensorBoard().list()

### 5- Run train_and_evaluate

In [None]:
import shutil
from datetime import datetime

# remove the following line of code to resume training
shutil.rmtree(model_dir, ignore_errors=True)

dnn_estimator = create_DNNLinearCombinedRegressor(run_config, hparams)

tf.logging.set_verbosity(tf.logging.INFO)

time_start = datetime.utcnow() 
print("")
print("Experiment started at {}".format(time_start.strftime("%H:%M:%S")))
print(".......................................") 

# run train and evaluate experiment
tf.estimator.train_and_evaluate(
  dnn_estimator,
  train_spec,
  eval_spec
)


time_end = datetime.utcnow() 
print(".......................................")
print("Experiment finished at {}".format(time_end.strftime("%H:%M:%S")))
print("")
time_elapsed = time_end - time_start
print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))
    


In [None]:
%%bash

ls models/babyweight/dnn_estimator/export/estimate

### >> TensorBoard - Stop

In [None]:
#to stop TensorBoard
TensorBoard().stop(23002)
print('stopped TensorBoard')
TensorBoard().list()

### 6- Use SavedModel for Predictions

In [None]:
saved_model_base_dir=os.path.join(model_dir,'export/estimate')
SAVED_MODEL_DIR=os.path.join(saved_model_base_dir, os.listdir(saved_model_base_dir)[0])

def estimate_local(instance):
 
    predictor_fn = tf.contrib.predictor.from_saved_model(
        export_dir=SAVED_MODEL_DIR,
        signature_def_key="predict"
    )
    
    instance = dict((k, [v]) for k, v in instance.items())
    value = predictor_fn(instance)['predictions'][0][0]
    return value

instance = {
        'is_male': 'True',
        'mother_age': 26.0,
        'mother_race': 'Asian Indian',
        'plurality': 1.0,
        'gestation_weeks': 39,
        'mother_married': 'True',
        'cigarette_use': 'False',
        'alcohol_use': 'False'
}

prediction = estimate_local(instance)
print(prediction)

## 5. Evaluate the model using TFMA

In [None]:
import tensorflow_model_analysis as tfma

### 5.1 Evaluate input function

In [None]:
def generate_eval_receiver_fn(transform_artefacts_dir):
    
    transformed_metadata = metadata_io.read_metadata(transform_artefacts_dir+"/transformed_metadata")
    transformed_feature_spec = transformed_metadata.schema.as_feature_spec()
    
    def _eval_receiver_fn():
        
        serialized_tf_example = tf.placeholder(
            dtype=tf.string, shape=[None], name='input_example_placeholder')

        receiver_tensors = {'examples': serialized_tf_example}
        transformed_features = tf.parse_example(serialized_tf_example, transformed_feature_spec)

        return tfma.export.EvalInputReceiver(
            features=transformed_features,
            receiver_tensors=receiver_tensors,
            labels=transformed_features[TARGET_FEATURE_NAME])

    return _eval_receiver_fn

### 5.2 Export Evaluation Saved Model

In [None]:
eval_model_dir = model_dir +"/export/evaluate"

transform_artefacts_dir = os.path.join(local_models_dir,TRANSFORM_ARTEFACTS_DIR)

tfma.export.export_eval_savedmodel(
        estimator=dnn_estimator,
        export_dir_base=eval_model_dir,
        eval_input_receiver_fn=generate_eval_receiver_fn(transform_artefacts_dir)
)

### 5.3 Produce Evaluation Results using the Saved Model

In [None]:
slice_spec = [tfma.SingleSliceSpec()]
for feature_name, feature_spec in transformed_feature_spec.items():
    if feature_name not in [KEY_COLUMN] + [TARGET_FEATURE_NAME] and feature_spec.dtype == tf.string:
        slice_spec += [tfma.SingleSliceSpec(columns=[feature_name])]

print slice_spec
print ""

saved_model_base_dir=os.path.join(model_dir,'export/evaluate')
model_location=os.path.join(saved_model_base_dir, os.listdir(saved_model_base_dir)[0])
data_location = os.path.join(local_data_dir, TRANSFORMED_DATA_DIR)+"/eval-*.tfrecords"

tf.logging.set_verbosity(tf.logging.ERROR)

eval_results = tfma.run_model_analysis(
    model_location=model_location , 
    data_location=data_location, 
    file_format='tfrecords', 
    slice_spec=slice_spec, 
#     output_path=None
)

print "Evaluation results are ready!"

### 5.4 Visualise the Results

In [None]:
print eval_results.slicing_metrics

In [None]:
tfma.view.render_slicing_metrics(
        eval_results, 
    slicing_column='mother_race'
)

## 6. Train the Model on Cloud ML Engine

In [None]:
%%bash

echo "Submitting a Cloud ML Engine job..."

REGION=europe-west1
TIER=BASIC # BASIC | BASIC_GPU | STANDARD_1 | PREMIUM_1
BUCKET=ksalama-gcs-cloudml

MODEL_NAME="babyweight_estimator"

PACKAGE_PATH=packages/babyweight-tf1.4/trainer
TRAIN_FILES=gs://${BUCKET}/data/babyweight/train-data.csv
VALID_FILES=gs://${BUCKET}/data/babyweight/eval-data.csv
MODEL_DIR=gs://${BUCKET}/models/babyweight/${MODEL_NAME}

#remove model directory, if you don't want to resume training, or if you have changed the model structure
#gsutil -m rm -r ${MODEL_DIR}

CURRENT_DATE=`date +%Y%m%d_%H%M%S`
JOB_NAME=train_${MODEL_NAME}_${TIER}_${CURRENT_DATE}

gcloud ml-engine jobs submit training ${JOB_NAME} \
        --job-dir=${MODEL_DIR} \
        --runtime-version=1.4 \
        --region=${REGION} \
        --scale-tier=${TIER} \
        --module-name=trainer.task \
        --package-path=${PACKAGE_PATH} \
        -- \
        --train-files=${TRAIN_FILES} \
        --num-epochs=100 \
        --train-batch-size=500 \
        --eval-files=${VALID_FILES} \
        --eval-batch-size=500 \
        --learning-rate=0.01 \
        --hidden-units="64,0,0" \
        --layer-sizes-scale-factor=0.5 \
        --num-layers=3 \
        --job-dir=${MODEL_DIR}

### Train the Model on Cloud ML Engine + GPUs

In [None]:
%%bash

echo "Submitting a Cloud ML Engine job..."

REGION=europe-west1
TIER=BASIC_GPU # BASIC | BASIC_GPU | STANDARD_1 | PREMIUM_1
BUCKET=ksalama-gcs-cloudml

MODEL_NAME="babyweight_estimator"

PACKAGE_PATH=packages/babyweight-tf1.4/trainer
TRAIN_FILES=gs://${BUCKET}/data/babyweight/train-*.csv
VALID_FILES=gs://${BUCKET}/data/babyweight/eval-*.csv
MODEL_DIR=gs://${BUCKET}/models/babyweight/${MODEL_NAME}_${TIER}

CURRENT_DATE=`date +%Y%m%d_%H%M%S`
JOB_NAME=train_${MODEL_NAME}_${TIER}_${CURRENT_DATE}

gcloud ml-engine jobs submit training ${JOB_NAME} \
        --job-dir=${MODEL_DIR} \
        --runtime-version=1.4 \
        --region=${REGION} \
        --scale-tier=${TIER} \
        --module-name=trainer.task \
        --package-path=${PACKAGE_PATH} \
        -- \
        --train-files=${TRAIN_FILES} \
        --num-epochs=10 \
        --train-batch-size=1000 \
        --eval-files=${VALID_FILES} \
        --eval-batch-size=1000 \
        --learning-rate=0.01 \
        --hidden-units="64,0,0" \
        --layer-sizes-scale-factor=0.5 \
        --num-layers=3 \
        --job-dir=${MODEL_DIR}

### Train the Model on Cloud ML Engine + Custom GPUs Cluster

In [None]:
%%bash

echo "Submitting a Cloud ML Engine job..."

REGION=europe-west1
TIER=CUSTOM # BASIC | BASIC_GPU | STANDARD_1 | PREMIUM_1
BUCKET=ksalama-gcs-cloudml

MODEL_NAME="babyweight_estimator"

PACKAGE_PATH=packages/babyweight-tf1.4/trainer
TRAIN_FILES=gs://${BUCKET}/data/babyweight/big_data/train-*.csv
VALID_FILES=gs://${BUCKET}/data/babyweight/big_data/eval-*.csv
MODEL_DIR=gs://${BUCKET}/models/babyweight/${MODEL_NAME}_${TIER}

CURRENT_DATE=`date +%Y%m%d_%H%M%S`
JOB_NAME=train_${MODEL_NAME}_${TIER}_${CURRENT_DATE}

gcloud ml-engine jobs submit training ${JOB_NAME} \
        --job-dir=${MODEL_DIR} \
        --runtime-version=1.4 \
        --region=${REGION} \
        --module-name=trainer.task \
        --package-path=${PACKAGE_PATH} \
        --config=ml-packages/babyweight-tf1.4/custom.yaml \
        -- \
        --train-files=${TRAIN_FILES} \
        --num-epochs=100 \
        --train-batch-size=1000 \
        --eval-files=${VALID_FILES} \
        --eval-batch-size=1000 \
        --learning-rate=0.001 \
        --hidden-units="64,0,0" \
        --layer-sizes-scale-factor=0.5 \
        --num-layers=3 \
        --job-dir=${MODEL_DIR}

### Hyper-parameters Tuning on Cloud ML Engine

In [None]:
%%bash

echo "Submitting a Cloud ML Engine job..."

REGION=europe-west1
BUCKET=ksalama-gcs-cloudml

MODEL_NAME="babyweight_estimator"

PACKAGE_PATH=packages/babyweight-tf1.4/trainer
TRAIN_FILES=gs://${BUCKET}/data/babyweight/big_data/train-*.csv
VALID_FILES=gs://${BUCKET}/data/babyweight/big_data/eval-*.csv
MODEL_DIR=gs://${BUCKET}/models/babyweight/${MODEL_NAME}_tune

CURRENT_DATE=`date +%Y%m%d_%H%M%S`
JOB_NAME=tune_${MODEL_NAME}_${TIER}_${CURRENT_DATE}

gcloud ml-engine jobs submit training ${JOB_NAME} \
        --job-dir=${MODEL_DIR} \
        --runtime-version=1.4 \
        --region=${REGION} \
        --module-name=trainer.task \
        --package-path=${PACKAGE_PATH} \
        --config=ml-packages/babyweight-tf1.4/hyperparams.yaml \
        -- \
        --train-files=${TRAIN_FILES} \
        --num-epochs=100 \
        --train-batch-size=1000 \
        --eval-files=${VALID_FILES} \
        --eval-batch-size=1000 \
        --job-dir=${MODEL_DIR}

## 7. Deploy the Model on Cloud ML Engine

In [None]:
%%bash

REGION=europe-west1
BUCKET=ksalama-gcs-cloudml

MODEL_NAME="babyweight_estimator"
MODEL_VERSION="v1"

MODEL_BINARIES=$(gsutil ls gs://${BUCKET}/models/babyweight/${MODEL_NAME}/export/estimate | tail -1)

gsutil ls ${MODEL_BINARIES}

# #delete model version
# gcloud ml-engine versions delete ${MODEL_VERSION} --model=${MODEL_NAME}

# #delete model
# gcloud ml-engine models delete ${MODEL_NAME}

# #deploy model to GCP
# gcloud ml-engine models create ${MODEL_NAME} --regions=${REGION}

# #deploy model version
# gcloud ml-engine versions create ${MODEL_VERSION} --model=${MODEL_NAME} --origin=${MODEL_BINARIES} --runtime-version=1.4

# echo  ${MODEL_NAME} ${MODEL_VERSION} 
# #invoke deployed model to make prediction given new data instances
# gcloud ml-engine predict --model=${MODEL_NAME} --version=${MODEL_VERSION} --json-instances=data/babyweight/new-data.json

## 8. Consume the Depoyed Model as API

In [None]:
from googleapiclient import discovery
from oauth2client.client import GoogleCredentials

def estimate(project, model_name, version, instances):

    credentials = GoogleCredentials.get_application_default()
    api = discovery.build('ml', 'v1', credentials=credentials,
                discoveryServiceUrl='https://storage.googleapis.com/cloud-ml/discovery/ml_v1_discovery.json')

    request_data = {'instances': instances}

    model_url = 'projects/{}/models/{}/versions/{}'.format(project, model_name, version)
    response = api.projects().predict(body=request_data, name=model_url).execute()

    estimates = list(map(lambda item: round(item["scores"],2)
        ,response["predictions"]
    ))

    return estimates

In [None]:
PROJECT='ksalama-gcp-playground'
MODEL_NAME='babyweight_estimator'
VERSION='v1'

instances = [
      {
        'is_male': 'True',
        'mother_age': 26.0,
        'mother_race': 'Asian Indian',
        'plurality': 1.0,
        'gestation_weeks': 39,
        'mother_married': 'True',
        'cigarette_use': 'False',
        'alcohol_use': 'False'
      },
      {
        'is_male': 'False',
        'mother_age': 29.0,
        'mother_race': 'Asian Indian',
        'plurality': 1.0,
        'gestation_weeks': 38,
        'mother_married': 'True',
        'cigarette_use': 'False',
        'alcohol_use': 'False'
      },
      {
        'is_male': 'True',
        'mother_age': 26.0,
        'mother_race': 'White',
        'plurality': 1.0,
        'gestation_weeks': 39,
        'mother_married': 'True',
        'cigarette_use': 'False',
        'alcohol_use': 'False'
      },
      {
        'is_male': 'True',
        'mother_age': 26.0,
        'mother_race': 'White',
        'plurality': 2.0,
        'gestation_weeks': 37,
        'mother_married': 'True',
        'cigarette_use': 'False',
        'alcohol_use': 'True'
      }
  ]

estimates = estimate(instances=instances
                     ,project=PROJECT
                     ,model_name=MODEL_NAME
                     ,version=VERSION)

print(estimates)

# The End :-)

In [None]:
%%bash 

pip list