In [6]:
PROJECT_ID =  "dotrzechrazy"

In [7]:
BUCKET_NAME = "bike_data_2"
REGION = "us-central1"

In [8]:
!gsutil mb -l $REGION gs://$BUCKET_NAME

Creating gs://bike_data_2/...
ServiceException: 409 Bucket bike_data_2 already exists.


In [3]:
!gsutil ls

gs://bike_data_2/
gs://dotrzech/
gs://spamdetector/


In [10]:
import tensorflow as tf 
import pandas as pd
import numpy as np 
import json
import time

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from tabulate import tabulate

# Should be 1.15.0
print(tf.__version__)

2.1.0


In [None]:
!gsutil cp 'gs://explanations_sample_data/bike-data.csv' ./

In [5]:
!gsutil cp 1.ipynb  'gs://dotrzech/'

Copying file://1.ipynb [Content-Type=application/octet-stream]...
/ [1 files][  2.7 KiB/  2.7 KiB]                                                
Operation completed over 1 objects/2.7 KiB.                                      


In [12]:
data = pd.read_csv('bike-data.csv')

# Shuffle the data
data = data.sample(frac=1, random_state=2)

# Drop rows with null values
data = data[data['wdsp'] != 999.9]
data = data[data['dewp'] != 9999.9]

# Rename some columns for readability
data=data.rename(columns = {'day_of_week':'weekday'})
data=data.rename(columns = {'max':'max_temp'})
data=data.rename(columns = {'dewp': 'dew_point'})

# Drop columns we won't use to train this model
data = data.drop(columns=['start_station_name', 'end_station_name', 'bike_id', 'snow_ice_pellets'])

# Convert trip duration from seconds to minutes so it's easier to understand
data['duration'] = data['duration'].apply(lambda x:float(x / 60))

In [13]:
data.head()

Unnamed: 0,start_hr,weekday,euclidean,temp,dew_point,wdsp,max_temp,fog,prcp,rain_drizzle,duration
1559391,21,4,582.72488,54.7,46.2,0.0,56.3,0,0.0,0,22.0
1881386,16,2,2358.922742,57.1,48.2,0.0,64.9,0,0.0,0,12.0
703461,23,1,766.066253,69.2,51.6,0.0,80.4,0,0.0,0,12.0
1688076,9,5,2271.877229,52.5,45.4,0.0,63.3,0,0.02,0,13.0
2079007,14,7,2476.718116,50.3,43.5,0.0,56.1,0,0.01,0,28.0


In [14]:
labels = data['duration']
data = data.drop(columns=['duration'])

In [15]:
# Use 80/20 train/test split
train_size = int(len(data) * .8)
print ("Train size: %d" % train_size)
print ("Test size: %d" % (len(data) - train_size))

# Split our data into train and test sets
train_data = data[:train_size]
train_labels = labels[:train_size]

test_data = data[train_size:]
test_labels = labels[train_size:]
train_data.head()

Train size: 757363
Test size: 189341


Unnamed: 0,start_hr,weekday,euclidean,temp,dew_point,wdsp,max_temp,fog,prcp,rain_drizzle
1559391,21,4,582.72488,54.7,46.2,0.0,56.3,0,0.0,0
1881386,16,2,2358.922742,57.1,48.2,0.0,64.9,0,0.0,0
703461,23,1,766.066253,69.2,51.6,0.0,80.4,0,0.0,0
1688076,9,5,2271.877229,52.5,45.4,0.0,63.3,0,0.02,0
2079007,14,7,2476.718116,50.3,43.5,0.0,56.1,0,0.01,0


In [16]:
# Build our model
model = tf.keras.Sequential(name="bike_predict")
model.add(tf.keras.layers.Dense(64, input_dim=len(train_data.iloc[0]), activation='relu'))
model.add(tf.keras.layers.Dense(32, activation='relu'))
model.add(tf.keras.layers.Dense(1))

In [17]:
# Compile the model and see a summary
optimizer = tf.keras.optimizers.Adam(0.001)
model.compile(loss='mean_squared_logarithmic_error', optimizer=optimizer)
model.summary()

Model: "bike_predict"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 64)                704       
_________________________________________________________________
dense_1 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 33        
Total params: 2,817
Trainable params: 2,817
Non-trainable params: 0
_________________________________________________________________


In [18]:
batch_size = 190
epochs = 3

input_train = tf.data.Dataset.from_tensor_slices(train_data.values)
output_train = tf.data.Dataset.from_tensor_slices(train_labels.values)
input_train = input_train.batch(batch_size).repeat()
output_train = output_train.batch(batch_size).repeat()
train_dataset = tf.data.Dataset.zip((input_train, output_train))



In [19]:
model.fit(train_dataset, steps_per_epoch=train_size // batch_size, epochs=epochs)

Train for 3986 steps
Epoch 1/3
Epoch 2/3
Epoch 3/3


<tensorflow.python.keras.callbacks.History at 0x7fa05c1ed630>

In [20]:
results = model.evaluate(test_data, test_labels)
print(results)

0.36099003442602756


In [21]:
predict = model.predict(test_data[:5])

In [22]:
for i, val in enumerate(predict):
    print('Predicted duration: {}'.format(round(val[0])))
    print('Actual duration: {} \n'.format(test_labels.iloc[i]))

Predicted duration: 19.0
Actual duration: 22.0 

Predicted duration: 28.0
Actual duration: 31.0 

Predicted duration: 27.0
Actual duration: 23.0 

Predicted duration: 18.0
Actual duration: 14.0 

Predicted duration: 13.0
Actual duration: 9.0 



In [23]:
keras_estimator = tf.keras.estimator.model_to_estimator(keras_model=model, model_dir='export')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using the Keras model provided.
INFO:tensorflow:Using config: {'_eval_distribute': None, '_is_chief': True, '_protocol': None, '_keep_checkpoint_every_n_hours': 10000, '_session_creation_timeout_secs': 7200, '_train_distribute': None, '_num_ps_replicas': 0, '_keep_checkpoint_max': 5, '_task_id': 0, '_num_worker_replicas': 1, '_model_dir': 'export', '_cluster_spec': ClusterSpec({}), '_master': '', '_save_summary_steps': 100, '_device_fn': None, '_global_id_in_cluster': 0, '_save_checkpoints_steps': None, '_log_step_count_steps': 100, '_evaluation_master': '', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_save_checkpoints_secs': 600, '_experimental_max_worker_delay_secs': None, '_service': None, '_experimental_distribute': None, '_tf_random_seed': None, '_task_type': 'worker'}


In [24]:
serving_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(
    {'dense_input': model.input}
)

In [25]:
export_path = keras_estimator.export_saved_model(
  'gs://' + BUCKET_NAME + '/explanations',
  serving_input_receiver_fn=serving_fn
).decode('utf-8')
print(export_path)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['serving_default']
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Restoring parameters from export/keras/keras_model.ckpt
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: gs://bike_data_2/explanations/temp-b'1582664383'/saved_model.pb
gs://bike_data_2/explanations/1582664383


In [26]:
!saved_model_cli show --dir $export_path --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['dense_input'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 10)
        name: dense_input:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['dense_2'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 1)
        name: dense_2/BiasAdd:0
  Method name is: tensorflow/serving/predict


In [27]:
# Print the names of our tensors
print('Model input tensor: ', model.input.name)
print('Model output tensor: ', model.output.name)

Model input tensor:  dense_input:0
Model output tensor:  dense_2/Identity:0


In [28]:
explanation_metadata = {
    "inputs": {
      "data": {
        "input_tensor_name": model.input.name,
        "input_baselines": [train_data.median().values.tolist()],
        "encoding": "bag_of_features", 
        "index_feature_mapping": train_data.columns.tolist()
      }
    },
    "outputs": {
      "duration": {
        "output_tensor_name": model.output.name
      }
    },
  "framework": "tensorflow"
  }

In [29]:
# Write the json to a local file
with open('explanation_metadata.json', 'w') as output_file:
    json.dump(explanation_metadata, output_file)

In [30]:
!gsutil cp explanation_metadata.json $export_path

Copying file://explanation_metadata.json [Content-Type=application/json]...
/ [1 files][  416.0 B/  416.0 B]                                                
Operation completed over 1 objects/416.0 B.                                      


In [35]:
MODEL = 'bike2'

In [36]:
!gcloud ai-platform models create $MODEL --enable-logging --regions=us-central1

Created ml engine model [projects/dotrzechrazy/models/bike2].


In [62]:
VERSION = 'v3'

In [63]:
# Create the version with gcloud
explain_method = 'integrated-gradients'
!gcloud beta ai-platform versions create $VERSION \
--model $MODEL \
--origin $export_path \
--runtime-version 1.15 \
--framework TENSORFLOW \
--python-version 3.7 \
--machine-type n1-standard-4 \
--num-integral-steps 25

Creating version (this might take a few minutes)......done.                    


In [81]:
# Format data for prediction to our model
prediction_json = {'dense_input': test_data.iloc[2].values.tolist()}
with open('bike-data4.txt', 'a') as outfile:
     json.dump(prediction_json, outfile)

In [83]:
!cat bike-data4.txt

{"dense_input": [18.0, 2.0, 3919.0952753459173, 53.2, 41.1, 0.0, 61.7, 0.0, 0.05, 0.0]}

In [84]:
MODEL_NAME="bike2"
INPUT_DATA_FILE="bike-data4.txt"
VERSION_NAME="v3"

In [85]:
resp_obj = !gcloud ml-engine predict --model $MODEL_NAME \
  --version $VERSION_NAME \
  --json-instances $INPUT_DATA_FILE

In [86]:
resp_obj

 '[[-859.313965]]']

In [87]:
resp_obj2 = !gcloud beta ai-platform predict --model $MODEL --version $VERSION --json-instances='bike-data4.txt'

In [88]:
resp_obj2

['[[-859.313965]]']