[View in Colaboratory](https://colab.research.google.com/github/moonbury/ipython-notebook/blob/master/serving_embed.ipynb)

In [0]:
# change these to try this notebook out
BUCKET = 'gcp-learn-209814.appspot.com'
PROJECT = 'gcp-learn-209814'
REGION = 'ASIA-NORTHEAST1'

In [0]:
import os
os.environ['BUCKET'] = BUCKET
os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION

In [14]:
! gcloud config set project $PROJECT
! gcloud config set compute/region $REGION

Updated property [core/project].
Updated property [compute/region].


In [15]:
!pip3 install datalab

Collecting datalab
[?25l  Downloading https://files.pythonhosted.org/packages/34/20/3b4963045b1f5ebb79253a2b22225530faecf563f293a19acbbbbc097ddf/datalab-1.1.4.tar.gz (1.3MB)
[K    100% |████████████████████████████████| 1.3MB 5.9MB/s 
[?25hCollecting configparser>=3.5.0 (from datalab)
  Downloading https://files.pythonhosted.org/packages/7c/69/c2ce7e91c89dc073eb1aa74c0621c3eefbffe8216b3f9af9d3885265c01c/configparser-3.5.0.tar.gz
Collecting mock>=2.0.0 (from datalab)
[?25l  Downloading https://files.pythonhosted.org/packages/e6/35/f187bdf23be87092bd0f1200d43d23076cee4d0dec109f195173fd3ebc79/mock-2.0.0-py2.py3-none-any.whl (56kB)
[K    100% |████████████████████████████████| 61kB 11.5MB/s 
Collecting google-cloud>=0.30.0 (from datalab)
  Downloading https://files.pythonhosted.org/packages/ba/b1/7c54d1950e7808df06642274e677dbcedba57f75307adf2e5ad8d39e5e0e/google_cloud-0.34.0-py2.py3-none-any.whl
Collecting pandas-profiling>=1.0.0a2 (from datalab)
  Downloading https://files.pythonhos

In [0]:
from google.colab import auth
auth.authenticate_user()

In [0]:
query = """
#standardsql
WITH bicycle_rentals AS (
  SELECT
    COUNT(starttime) as num_trips,
    EXTRACT(DATE from starttime) as trip_date,
    MAX(EXTRACT(DAYOFWEEK from starttime)) as day_of_week,
    start_station_id
  FROM `bigquery-public-data.new_york.citibike_trips`
  GROUP BY trip_date, start_station_id
),

rainy_days AS
(
SELECT
  date,
  (MAX(prcp) > 5) AS rainy
FROM (
  SELECT
    wx.date AS date,
    IF (wx.element = 'PRCP', wx.value/10, NULL) AS prcp
  FROM
    `bigquery-public-data.ghcn_d.ghcnd_2016` AS wx
  WHERE
    wx.id = 'USW00094728'
)
GROUP BY
  date
)

SELECT
  num_trips,
  day_of_week,
  start_station_id,
  rainy
FROM bicycle_rentals AS bk
JOIN rainy_days AS wx
ON wx.date = bk.trip_date
"""
import google.datalab.bigquery as bq
df = bq.Query(query).execute().result().to_dataframe()

In [19]:
# shuffle the dataframe to make it easier to split into train/eval later
df = df.sample(frac=1.0)
df.head()

Unnamed: 0,num_trips,day_of_week,start_station_id,rainy
74907,13,5,3082,False
35134,3,2,3058,True
8916,33,1,367,False
89727,13,5,2022,False
28807,13,2,406,False


In [21]:
df.dtypes

num_trips           int64
day_of_week         int64
start_station_id    int64
rainy                bool
dtype: object

In [22]:
import numpy as np
df = df.astype({'num_trips': np.float32, 'day_of_week': np.int32, 'start_station_id': np.int32, 'rainy': str})
df.dtypes

num_trips           float32
day_of_week           int32
start_station_id      int32
rainy                object
dtype: object

In [0]:
df['num_trips'] = df['num_trips'] / 1000.0

In [24]:
num_train = (int) (len(df) * 0.8)
train_df = df.iloc[:num_train]
eval_df  = df.iloc[num_train:]
print("Split into {} training examples and {} evaluation examples".format(len(train_df), len(eval_df)))

Split into 104148 training examples and 26037 evaluation examples


In [26]:
train_df.head()

Unnamed: 0,num_trips,day_of_week,start_station_id,rainy
74907,0.013,5,3082,False
35134,0.003,2,3058,True
8916,0.033,1,367,False
89727,0.013,5,2022,False
28807,0.013,2,406,False


In [27]:
import tensorflow as tf
import pandas as pd

def make_input_fn(indf, num_epochs):
  return tf.estimator.inputs.pandas_input_fn(
    indf,
    indf['num_trips'],
    num_epochs=num_epochs,
    shuffle=True)

def serving_input_fn():
    feature_placeholders = {
      'day_of_week': tf.placeholder(tf.int32, [None]),
      'start_station_id': tf.placeholder(tf.int32, [None]),
      'rainy': tf.placeholder(tf.string, [None])
    }
    features = {
        key: tf.expand_dims(tensor, -1)
        for key, tensor in feature_placeholders.items()
    }
    return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)
  
def train_and_evaluate(output_dir, nsteps):
  station_embed = tf.feature_column.embedding_column(
      tf.feature_column.categorical_column_with_hash_bucket('start_station_id', 5000, tf.int32), 2)
  feature_cols = [
    tf.feature_column.categorical_column_with_identity('day_of_week', num_buckets = 8),
    station_embed,
    tf.feature_column.categorical_column_with_vocabulary_list('rainy', ['false', 'true'])
  ]
  estimator = tf.estimator.LinearRegressor(
                       model_dir = output_dir,
                       feature_columns = feature_cols)
  train_spec=tf.estimator.TrainSpec(
                       input_fn = make_input_fn(train_df, None),
                       max_steps = nsteps)
  exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
  eval_spec=tf.estimator.EvalSpec(
                       input_fn = make_input_fn(eval_df, 1),
                       steps = None,
                       start_delay_secs = 1, # start evaluating after N seconds
                       throttle_secs = 10,  # evaluate every N seconds
                       exporters = exporter)
  tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)
  
import shutil
OUTDIR='./model_trained'
shutil.rmtree(OUTDIR, ignore_errors=True)
train_and_evaluate(OUTDIR, 10)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './model_trained', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f2c7091f390>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 600.
INFO:tensorflow:Calling mod

In [30]:
%%writefile test.json
{"day_of_week": 3, "start_station_id": 384, "rainy": "false"}
{"day_of_week": 4, "start_station_id": 384, "rainy": "true"}

Writing test.json


In [32]:
!ls

adc.json  datalab  model_trained  sample_data  test.json


In [33]:
%%bash
EXPORTDIR=./model_trained/export/exporter/
MODELDIR=$(ls $EXPORTDIR | tail -1)
gcloud ml-engine local predict --model-dir=${EXPORTDIR}/${MODELDIR} --json-instances=./test.json

ERROR: (gcloud.ml-engine.local.predict) RuntimeError: Bad magic number in .pyc file

