In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf

## In this notebook,I will learn the high level of tensorflow API invoke :
    1.using predefine Model
    2.create dataset out of memory
    3.distribution train
    4.monitor training processing
    5.deploy your model

## 1. PreDefine Model

In [2]:
class MyModel():
    def __init__(self,filename='outputs/tf_week2/model1'):
        self.columns=['pickuplon','pickuplat','dropofflon','dropofflat','passengers']
        features=[tf.feature_column.numeric_column(f) for f in self.columns]
        self.model=tf.estimator.LinearRegressor(feature_columns=features,model_dir=filename)
        tf.logging.set_verbosity(tf.logging.INFO)
    def make_train_input_fn(self,epochs):
        df_train=pd.read_csv('../AI_database/trips/taxi-train.csv')
        return tf.estimator.inputs.pandas_input_fn(x=df_train,
                                        y=df_train['fare_amount'],
                                        batch_size=128,
                                        num_epochs=epochs,
                                        shuffle=True,
                                        queue_capacity=1000)
    def make_valid_input_fn(self):
        df_dev=pd.read_csv('../AI_database/trips/taxi-valid.csv')
        return tf.estimator.inputs.pandas_input_fn(x=df_dev,
                                              y=df_dev["fare_amount"],
                                              batch_size=128,
                                              num_epochs=1,
                                              shuffle=True,
                                              queue_capacity=1000)
    def make_predict_input_fn(self):
        df_test=pd.read_csv('../AI_database/trips/taxi-test.csv')
        return tf.estimator.inputs.pandas_input_fn(x=df_test,y=None,
                                              batch_size=128,num_epochs=1,
                                              shuffle=False)
    def fit(self,epochs=10):
        self.model.train(self.make_train_input_fn(epochs))
    def evaluate(self):
        result=self.model.evaluate(self.make_valid_input_fn())
        print('Eval rms is {}'.format(np.sqrt(result['average_loss'])))
    def predict(self):
        return self.model.predict(self.make_predict_input_fn())

In [3]:
model=MyModel('outputs/tf_week2/model1')
print('Train  Step:')
model.fit(1)
print('Evaluatuin Step:')
model.evaluate()

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from outputs/tf_week2/model1/model.ckpt-1146
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1147 into outputs/tf_week2/model1/model.ckpt.
INFO:tensorflow:loss = 2979.67, step = 1147
INFO:tensorflow:global_step/sec: 541.478
INFO:tensorflow:loss = 6085.03, step = 1247 (0.186 sec)
INFO:tensorflow:global_step/sec: 711.187
INFO:tensorflow:loss = 1897.96, step = 1347 (0.141 sec)
INFO:tensorflow:global_step/sec: 723.77
INFO:tensorflow:loss = 15.65, step = 1447 (0.138 sec)
INFO:tensorflow:global_step/sec: 706.059
INFO:tensorflow:loss = 12761.9, step = 1547 (0.142 sec)
INFO:tensorflow:global_step/sec: 697.564
INFO:tensorflow:loss = 19814.8, step = 1647 (0.143 sec)
INFO:tensorflow:Saving checkpoints for 1719 into outputs/tf_week2/mode

Train  Step:
Evaluatuin Step:

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-09-10-09:25:57
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from outputs/tf_week2/model1/model.ckpt-1719
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-09-10-09:25:57
INFO:tensorflow:Saving dict for global step 1719: average_loss = 107.672, global_step = 1719, loss = 13764.9



Eval rms is 10.376500129699707


In [5]:
#take a look at train,valid,test input function
print(model.make_train_input_fn(1)())
print('--------------------------------------')
print(model.make_valid_input_fn()())
print('--------------------------------------')
print(model.make_predict_input_fn()())

({'fare_amount': <tf.Tensor 'random_shuffle_queue_DequeueUpTo:1' shape=(?,) dtype=float64>, 'passengers': <tf.Tensor 'random_shuffle_queue_DequeueUpTo:6' shape=(?,) dtype=float64>, 'dropofflat': <tf.Tensor 'random_shuffle_queue_DequeueUpTo:5' shape=(?,) dtype=float64>, 'key': <tf.Tensor 'random_shuffle_queue_DequeueUpTo:7' shape=(?,) dtype=int64>, 'dropofflon': <tf.Tensor 'random_shuffle_queue_DequeueUpTo:4' shape=(?,) dtype=float64>, 'pickuplat': <tf.Tensor 'random_shuffle_queue_DequeueUpTo:3' shape=(?,) dtype=float64>, 'pickuplon': <tf.Tensor 'random_shuffle_queue_DequeueUpTo:2' shape=(?,) dtype=float64>}, <tf.Tensor 'random_shuffle_queue_DequeueUpTo:8' shape=(?,) dtype=float64>)
--------------------------------------
({'fare_amount': <tf.Tensor 'random_shuffle_queue_DequeueUpTo_1:1' shape=(?,) dtype=float64>, 'passengers': <tf.Tensor 'random_shuffle_queue_DequeueUpTo_1:6' shape=(?,) dtype=float64>, 'dropofflat': <tf.Tensor 'random_shuffle_queue_DequeueUpTo_1:5' shape=(?,) dtype=floa

## 2. training on large dataset,using tf.data.* instead of  tf.estimator.input.pandas_input_fn 

In [None]:
#input a raw line,return features and labels
def decoder(row):
    cols=tf.decode_csv(row,[[0.0],[0.0],[0.0],[0.0],[0.0],[0.0],[0.0]])
    features={'pickuplon':cols[1],'pockuplat':cols[2]}
    label=cols[0]
    return features,label

### first let's see how dataset tensor work

In [None]:
! head ../AI_database/trips/taxi-train.csv -n11

In [None]:
#the workflow look like follow:
'''
    filename->TextDataset->shuffle->batch->repeat->map->iterator->next
    all about are node or tensor in TF
    
    the output should have the format for model.train()
    {featurename:tensor},label tensor
'''
dataset=tf.data.TextLineDataset('../AI_database/trips/taxi-train.csv')
dataset=dataset.skip(1)

# dataset=dataset.map(decoder)
dataset=dataset.batch(3)
dataset.shuffle(1000)
mynext=dataset.make_one_shot_iterator().get_next()
with tf.Session() as sess:
    for i in range(5):
        print(sess.run(mynext))

In [20]:
class MyModel2(MyModel):
    def decoder_line(self,row):
        DEFAULTS = [[0.0], [-74.0], [40.0], [-74.0], [40.7], [1.0], ['nokey']]
        cols=tf.decode_csv(row,DEFAULTS)
        features={self.columns[i]:cols[i+1] for i in range(0,len(self.columns))}
        labels=cols[0]
        return features,labels
    def _getDataset(self,filename,epochs):
        def ret_func():
            dataset=tf.data.Dataset.list_files(filename)
            dataset=dataset.flat_map(tf.data.TextLineDataset)
            dataset=dataset.skip(1)
            dataset=dataset.shuffle(1000)
            dataset=dataset.batch(128)
            dataset=dataset.repeat(epochs)
            dataset=dataset.map(lambda r:self.decoder_line(r))   
            features,labels=dataset.make_one_shot_iterator().get_next()
            return features,labels
        return ret_func
    
    def make_train_input_fn(self,epochs):
        print('======================Model 2 using large dataset======================')
        return self._getDataset('../AI_database/trips/taxi-train.csv',epochs)
    def make_valid_input_fn(self):
        print('======================Model 2 using large dataset======================')
        return self._getDataset('../AI_database/trips/taxi-valid.csv',1)
    def make_predict_input_fn(self):
        print('======================Model 2 using large dataset======================')
        return self._getDataset('../AI_database/trips/taxi-test.csv',1)

In [21]:
model=MyModel2('outputs/tf_week2/model2')
print('Train step:')
model.fit(5)
print('Evaluation step:')
model.evaluate()

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_tf_random_seed': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f0bd33306a0>, '_save_checkpoints_steps': None, '_session_config': None, '_global_id_in_cluster': 0, '_keep_checkpoint_every_n_hours': 10000, '_keep_checkpoint_max': 5, '_master': '', '_task_id': 0, '_num_worker_replicas': 1, '_task_type': 'worker', '_save_checkpoints_secs': 600, '_model_dir': 'outputs/tf_week2/model2', '_log_step_count_steps': 100, '_num_ps_replicas': 0, '_save_summary_steps': 100, '_evaluation_master': '', '_is_chief': True, '_service': None}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from outputs/tf_week2/model2/model.ckpt-17766
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1

Train step:
Evaluation step:

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-09-10-09:37:18
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from outputs/tf_week2/model2/model.ckpt-20631
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-09-10-09:37:18
INFO:tensorflow:Saving dict for global step 20631: average_loss = 96.0239, global_step = 20631, loss = 12275.9



Eval rms is 9.799177169799805


In [22]:
gen=model.predict()
for i in range(5):
    print(next(gen))

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from outputs/tf_week2/model2/model.ckpt-20631
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.


{'predictions': array([ 12.75143337], dtype=float32)}
{'predictions': array([ 12.75020123], dtype=float32)}
{'predictions': array([ 12.75318718], dtype=float32)}
{'predictions': array([ 12.75337791], dtype=float32)}
{'predictions': array([ 12.74979973], dtype=float32)}
