## Tutorial for using TF.Data, Keras, Estimator, and Google storage

In [1]:
import numpy as np
import matplotlib.pyplot as plt

Make sure you have TF >= 1.11

Please install tf-nightly-gpu by ```pip install --upgrade tf-nightly``` if you have TF < 1.11

In [2]:
from tensorflow import keras
import tensorflow as tf
print(tf.__version__)

1.11.0


### 0. Data & Model preparation

Load MNIST dataset

In [3]:
(x_train,y_train),(x_test,y_test) = keras.datasets.mnist.load_data()
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

(60000, 28, 28) (10000, 28, 28) (60000,) (10000,)


X and Y are usually prepared in float32 & int32

In [4]:
x_train = x_train.astype(np.float32)
x_test = x_test.astype(np.float32)
y_train = y_train.astype(np.int32)
y_test = y_test.astype(np.int32)

In [5]:
nClass = 10
nData_train = x_train.shape[0]
nData_test = x_test.shape[0]
nDimIn = x_train.shape[1]*x_train.shape[2]
nDimOut = nClass
print(nData_train, nData_test, nDimIn, nDimOut) 

60000 10000 784 10


My Keras Model

In [6]:
def myModel():
    dataIn = keras.Input(shape=(nDimIn,), name='X')
    fc1 = keras.layers.Dense(40, activation='relu', name='fc1')(dataIn)
    fc2 = keras.layers.Dense(40, activation='relu', name='fc2')(fc1)
    dataOut = keras.layers.Dense(nDimOut, activation='softmax', name='dataOut')(fc2)
    model = keras.Model(inputs=dataIn, outputs=dataOut, name='Y')
    model.compile(optimizer=tf.train.AdamOptimizer(0.001),loss='categorical_crossentropy',metrics=['accuracy'])
    return model
batchSize = 64
nEpoch = 5
nSteps = (int)(nData_train/batchSize)

number of steps for training & testing each epoch

In [7]:
nStep_train = (int)(nData_train/batchSize)
nStep_test = (int)(nData_test/batchSize)

In [8]:
model = myModel()
model.summary()
del model
keras.backend.clear_session()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
X (InputLayer)               (None, 784)               0         
_________________________________________________________________
fc1 (Dense)                  (None, 40)                31400     
_________________________________________________________________
fc2 (Dense)                  (None, 40)                1640      
_________________________________________________________________
dataOut (Dense)              (None, 10)                410       
Total params: 33,450
Trainable params: 33,450
Non-trainable params: 0
_________________________________________________________________


### 1. Using Keras with tf.data from numpy data on the memory

Data preprocessing

In [9]:
def myMapFunc_npy(xx, yy):
    norm = tf.constant(255, dtype=tf.float32, shape=(nDimIn,))
    xx = tf.div(tf.reshape(xx, [-1]), norm)
    yy = tf.one_hot(yy, nClass)
    return xx, yy

Prepare train and test datasets using tf.data

shuffle 10000 instances -> data transformation (e.g. normalization) -> minibatches

In [10]:
data_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
data_train = data_train.shuffle(10000).map(myMapFunc_npy).batch(batchSize).repeat()
data_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
data_test = data_test.shuffle(10000).map(myMapFunc_npy).batch(batchSize).repeat()

In [11]:
model = myModel()

In [12]:
model.fit(data_train, epochs=nEpoch, validation_data=data_test, steps_per_epoch=nStep_train, validation_steps=nStep_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f96457a7d68>

In [13]:
del data_train, data_test, model
keras.backend.clear_session()

### 2. Using Keras with tf.data from TFRecord files

#### 2-1. Write a TFRecord file

Write binary files

In [14]:
def _int64_feature(value):
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def _float_feature(array):
  return tf.train.Feature(float_list=tf.train.FloatList(value=array))
def _bytes_feature(value):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def myWriteTFRecord(filename, xx, yy):
    writer = tf.python_io.TFRecordWriter(filename)
    for ii in range(len(yy)):
        myFeat = tf.train.Features(feature={
                    'X': _float_feature(xx[ii]),
                    'Y': _int64_feature(yy[ii])})
        example = tf.train.Example(features=myFeat)
        writer.write(example.SerializeToString())
    writer.close()

Data should have 2-dim, (N*D)

In [15]:
x_train_vec = x_train.reshape([nData_train,-1])
x_test_vec = x_test.reshape([nData_test,-1])

In [16]:
file_train = 'mnist_train.tfrecords'
file_test = 'mnist_test.tfrecords'

In [17]:
myWriteTFRecord(file_train, x_train_vec, y_train)
myWriteTFRecord(file_test, x_test_vec, y_test)

#### 2-2. Training from the TFRecord files

Data preprocessing

In [18]:
def myMapFunc_onehot(example):
    feature_def = {'X': tf.FixedLenFeature(nDimIn, tf.float32),
                   'Y': tf.FixedLenFeature(1, tf.int64)}
    features = tf.parse_single_example(example, feature_def)
    norm = tf.constant(255, dtype=tf.float32, shape=(nDimIn,))
    xx = tf.div(features['X'], norm)
    yy = tf.reshape(tf.one_hot(features['Y'], nClass, dtype=tf.float32), [-1])
    return xx, yy

Prepare train and test datasets using tf.data

In [19]:
data_train = tf.data.TFRecordDataset(file_train)
data_train = data_train.shuffle(nData_train).map(myMapFunc_onehot).batch(batchSize).repeat()
data_test = tf.data.TFRecordDataset(file_test)
data_test = data_test.shuffle(nData_test).map(myMapFunc_onehot).batch(batchSize).repeat()

In [20]:
model = myModel()

In [21]:
model.fit(data_train, epochs=nEpoch, validation_data=data_test, steps_per_epoch=nStep_train, validation_steps=nStep_test)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f9634ed9cf8>

In [22]:
del data_train, data_test, model
keras.backend.clear_session()

### 3. Using a pre-made estimator with tf.data from TFRecord files

Data preprocessing

In [23]:
def myMapFunc_scalar(example):
    feature_def = {'X': tf.FixedLenFeature(nDimIn, tf.float32),
                   'Y': tf.FixedLenFeature(1, tf.int64)}
    features = tf.parse_single_example(example, feature_def)
    norm = tf.constant(255, dtype=tf.float32, shape=(nDimIn,))
    xx = tf.div(features['X'], norm)
    yy = features['Y']
    return {'X': xx}, yy

TF Estimator require an input function

In [24]:
def myInputFunc_scalar(filename,numData):
    data_temp = tf.data.TFRecordDataset(filename)
    data_temp = data_temp.shuffle(buffer_size=numData).map(myMapFunc_scalar).batch(batchSize).repeat()
    return data_temp

Create an estimator

In [25]:
feature_columns = [tf.feature_column.numeric_column('X', shape=[784,])]
estimator = tf.estimator.DNNClassifier(
 feature_columns=feature_columns,
 hidden_units=[40, 40],
 optimizer=tf.train.AdamOptimizer(0.001),
 n_classes=10,
)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_num_ps_replicas': 0, '_eval_distribute': None, '_log_step_count_steps': 100, '_device_fn': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f9634bc95c0>, '_model_dir': '/tmp/tmpcj7___ya', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_train_distribute': None, '_protocol': None, '_evaluation_master': '', '_save_checkpoints_steps': None, '_service': None, '_num_worker_replicas': 1, '_save_summary_steps': 100, '_is_chief': True, '_save_checkpoints_secs': 600, '_master': '', '_tf_random_seed': None, '_task_id': 0, '_keep_checkpoint_every_n_hours': 10000, '_task_type': 'worker', '_experimental_distribute': None, '_global_id_in_cluster': 0}


Training

In [26]:
estimator.train(input_fn=lambda:myInputFunc_scalar(file_train,nData_train), steps=nStep_train*nEpoch)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmpcj7___ya/model.ckpt.
INFO:tensorflow:loss = 149.6716, step = 0
INFO:tensorflow:global_step/sec: 149.576
INFO:tensorflow:loss = 30.462986, step = 100 (0.670 sec)
INFO:tensorflow:global_step/sec: 160.669
INFO:tensorflow:loss = 15.723204, step = 200 (0.622 sec)
INFO:tensorflow:global_step/sec: 162.861
INFO:tensorflow:loss = 25.859535, step = 300 (0.614 sec)
INFO:tensorflow:global_step/sec: 158.054
INFO:tensorflow:loss = 28.355984, step = 400 (0.633 sec)
INFO:tensorflow:global_step/sec: 162.849
INFO:tensorflow:loss = 14.388325, step = 500 (0.615 sec)
INFO:tensorflow:global_step/sec: 162.55
INFO:tensorflow:loss = 10.755209, step = 600 (0.615 sec)
INFO:tensorflow:global_step/sec: 163.504
INFO:tensorflow:lo

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f9634bc9518>

Test

In [27]:
eval_result_1 = estimator.evaluate(input_fn=lambda:myInputFunc_scalar(file_test,nData_test), steps=nStep_test)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-10-02-20:32:36
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpcj7___ya/model.ckpt-4685
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [15/156]
INFO:tensorflow:Evaluation [30/156]
INFO:tensorflow:Evaluation [45/156]
INFO:tensorflow:Evaluation [60/156]
INFO:tensorflow:Evaluation [75/156]
INFO:tensorflow:Evaluation [90/156]
INFO:tensorflow:Evaluation [105/156]
INFO:tensorflow:Evaluation [120/156]
INFO:tensorflow:Evaluation [135/156]
INFO:tensorflow:Evaluation [150/156]
INFO:tensorflow:Evaluation [156/156]
INFO:tensorflow:Finished evaluation at 2018-10-02-20:32:37
INFO:tensorflow:Saving dict for global step 4685: accuracy = 0.9672476, average_loss = 0.10643749, global_step = 4685, loss = 6.8119993
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 4685: /tmp/t

In [28]:
print(eval_result_1)

{'global_step': 4685, 'loss': 6.8119993, 'accuracy': 0.9672476, 'average_loss': 0.10643749}


### 4. Using an estimator from a Keras model with tf.data from TFRecord files

Data preprocessing

In [29]:
def myMapFunc_onehot(example):
    feature_def = {'X': tf.FixedLenFeature(nDimIn, tf.float32),
                   'Y': tf.FixedLenFeature(1, tf.int64)}
    features = tf.parse_single_example(example, feature_def)
    norm = tf.constant(255, dtype=tf.float32, shape=(nDimIn,))
    xx = tf.div(features['X'], norm)
    yy = tf.reshape(tf.one_hot(features['Y'], nClass, dtype=tf.float32), [-1])
    return xx, yy

Input function for the estimator

In [30]:
def myInputFunc_onehot(filename,numData):
    data_temp = tf.data.TFRecordDataset(filename)
    data_temp = data_temp.shuffle(buffer_size=numData).map(myMapFunc_onehot).batch(batchSize).repeat()
    return data_temp

Convert a Keras model to an estimator

In [31]:
model = myModel()
estimator = keras.estimator.model_to_estimator(model)

INFO:tensorflow:Using the Keras model provided.
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_num_ps_replicas': 0, '_eval_distribute': None, '_log_step_count_steps': 100, '_device_fn': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f961864a400>, '_model_dir': '/tmp/tmpzezltl9h', '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_train_distribute': None, '_protocol': None, '_evaluation_master': '', '_save_checkpoints_steps': None, '_service': None, '_num_worker_replicas': 1, '_save_summary_steps': 100, '_is_chief': True, '_save_checkpoints_secs': 600, '_master': '', '_tf_random_seed': None, '_task_id': 0, '_keep_checkpoint_every_n_hours': 10000, '_task_type': 'worker', '_experimental_distribute': None, '_global_id_in_cluster': 0}


Training

In [32]:
estimator.train(input_fn=lambda:myInputFunc_onehot(file_train,nData_train), steps=nStep_train*nEpoch)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Warm-starting with WarmStartSettings: WarmStartSettings(ckpt_to_initialize_from='/tmp/tmpzezltl9h/keras/keras_model.ckpt', vars_to_warm_start='.*', var_name_to_vocab_info={}, var_name_to_prev_var_name={})
INFO:tensorflow:Warm-starting from: ('/tmp/tmpzezltl9h/keras/keras_model.ckpt',)
INFO:tensorflow:Warm-starting variable: fc2/bias; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dataOut/bias; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dataOut/kernel; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: fc2/kernel; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: fc1/bias; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: fc1/kernel; prev_var_name: Unchanged
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_o

<tensorflow.python.estimator.estimator.Estimator at 0x7f96186739e8>

Test

In [33]:
eval_result_2 = estimator.evaluate(input_fn=lambda:myInputFunc_onehot(file_test, nData_test), steps=nStep_test)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-10-02-20:33:05
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpzezltl9h/model.ckpt-4685
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [15/156]
INFO:tensorflow:Evaluation [30/156]
INFO:tensorflow:Evaluation [45/156]
INFO:tensorflow:Evaluation [60/156]
INFO:tensorflow:Evaluation [75/156]
INFO:tensorflow:Evaluation [90/156]
INFO:tensorflow:Evaluation [105/156]
INFO:tensorflow:Evaluation [120/156]
INFO:tensorflow:Evaluation [135/156]
INFO:tensorflow:Evaluation [150/156]
INFO:tensorflow:Evaluation [156/156]
INFO:tensorflow:Finished evaluation at 2018-10-02-20:33:06
INFO:tensorflow:Saving dict for global step 4685: accuracy = 0.96684694, global_step = 4685, loss = 0.11055692
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 4685: /tmp/tmpzezltl9h/model.ckpt-468

In [34]:
print(eval_result_2)

{'global_step': 4685, 'accuracy': 0.96684694, 'loss': 0.11055692}


### 5. Accessing data on google cloud (google storage)

- Before starting the tutorial, install google cloud storage by ```pip install google-cloud-storage```

- [Important] You cannot write or upload a file on google cloud using a VM with the default setting. If you see a permission error (related to 403 POST), go to VM instance details and edit your "Cloud API access scopes" on the bottom from "defalt" to "full access" 

In [35]:
from google.cloud import storage
client = storage.Client()
bucketname = 'your-bucket-name'
bucket = client.get_bucket(bucketname)

Upload files to 'gs://data-push-wearableband/temp/'

In [36]:
target_folder = 'sub-folder-name/'

In [37]:
blob_train = storage.Blob(target_folder+file_train, bucket) # destination
blob_train.upload_from_filename(file_train) # source file
blob_test = storage.Blob(target_folder+file_test, bucket) # destination
blob_test.upload_from_filename(file_test) # source file

Get the list of files in the bucket

In [38]:
filelist = []
blobs = bucket.list_blobs(prefix=target_folder)
for blob in blobs:
    filelist.append(blob.name)   
print(filelist) 
# Note that 'temp/' is included in the file list

['temp/', 'temp/mnist_test.tfrecords', 'temp/mnist_train.tfrecords']


Train a model using the data on the cloud

In [39]:
fullpath_train = 'gs://'+bucketname+'/'+target_folder+file_train
fullpath_test = 'gs://'+bucketname+'/'+target_folder+file_test
print(fullpath_train)
print(fullpath_test)

gs://data-push-wearableband/temp/mnist_train.tfrecords
gs://data-push-wearableband/temp/mnist_test.tfrecords


Convert a Keras model to an estimator

Training

In [40]:
estimator.train(input_fn=lambda:myInputFunc_onehot(file_train, nData_train), steps=nStep_train*nEpoch)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Warm-starting with WarmStartSettings: WarmStartSettings(ckpt_to_initialize_from='/tmp/tmpzezltl9h/keras/keras_model.ckpt', vars_to_warm_start='.*', var_name_to_vocab_info={}, var_name_to_prev_var_name={})
INFO:tensorflow:Warm-starting from: ('/tmp/tmpzezltl9h/keras/keras_model.ckpt',)
INFO:tensorflow:Warm-starting variable: fc2/bias; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dataOut/bias; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: dataOut/kernel; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: fc2/kernel; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: fc1/bias; prev_var_name: Unchanged
INFO:tensorflow:Warm-starting variable: fc1/kernel; prev_var_name: Unchanged
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpzezltl9h/model.ckpt-4685
INFO:

<tensorflow.python.estimator.estimator.Estimator at 0x7f96186739e8>

Test

In [41]:
eval_result_3 = estimator.evaluate(input_fn=lambda:myInputFunc_onehot(fullpath_test, nData_test), steps=nStep_test)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-10-02-20:33:38
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpzezltl9h/model.ckpt-9370
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Evaluation [15/156]
INFO:tensorflow:Evaluation [30/156]
INFO:tensorflow:Evaluation [45/156]
INFO:tensorflow:Evaluation [60/156]
INFO:tensorflow:Evaluation [75/156]
INFO:tensorflow:Evaluation [90/156]
INFO:tensorflow:Evaluation [105/156]
INFO:tensorflow:Evaluation [120/156]
INFO:tensorflow:Evaluation [135/156]
INFO:tensorflow:Evaluation [150/156]
INFO:tensorflow:Evaluation [156/156]
INFO:tensorflow:Finished evaluation at 2018-10-02-20:33:40
INFO:tensorflow:Saving dict for global step 9370: accuracy = 0.96935093, global_step = 9370, loss = 0.10938837
INFO:tensorflow:Saving 'checkpoint_path' summary for global step 9370: /tmp/tmpzezltl9h/model.ckpt-937

In [42]:
print(eval_result_3)

{'global_step': 9370, 'accuracy': 0.96935093, 'loss': 0.10938837}
