In [57]:
#Estimators + Dataset(new input pipeline) Example - Categorize Iris flowers

In [65]:

import tensorflow as tf
import os
import sys
if sys.version_info < (3, 0, 0):
    from urllib import urlopen
else:
    from urllib.request import urlopen

# Check that we have correct TensorFlow version installed
tf_version = tf.__version__
print("TensorFlow version: {}".format(tf_version))
assert "1.3" <= tf_version, "TensorFlow r1.3 or later is needed"

# set Path
PATH = "/Users/shishir/Documents/botconnect Playground/TF1.3_input_pipeline/"

# Fetch and store Training and Test dataset files
PATH_DATASET = PATH + os.sep + "dataset"
FILE_TRAIN = PATH_DATASET + os.sep + "iris_training.csv"
FILE_TEST = PATH_DATASET + os.sep + "iris_test.csv"
URL_TRAIN = "http://download.tensorflow.org/data/iris_training.csv"
URL_TEST = "http://download.tensorflow.org/data/iris_test.csv"

TensorFlow version: 1.3.0


In [66]:
def downloadDataset(url, file):
    if not os.path.exists(PATH_DATASET):
        os.makedirs(PATH_DATASET)
    if not os.path.exists(file):
        data = urlopen(url).read()
        with open(file, "wb") as f:
            f.write(data)
            f.close()
downloadDataset(URL_TRAIN, FILE_TRAIN)
downloadDataset(URL_TEST, FILE_TEST)

tf.logging.set_verbosity(tf.logging.INFO)

In [67]:
# The CSV features in our training & test data
feature_names = [
    'SepalLength',
    'SepalWidth',
    'PetalLength',
    'PetalWidth']

In [68]:
#New Input Pipeline API

def input_pipeline(file_path, perform_shuffle=False, repeat_count=1):
    def decode_csv(line):
        parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])
        label = parsed_line[-1:]  # Last element is the label
        del parsed_line[-1]  # Delete last element
        features = parsed_line  # Everything but last elements are the features
        d = dict(zip(feature_names, features)), label
        return d

    dataset = (tf.contrib.data.TextLineDataset(file_path)  # Read text file using new API
               .skip(1)  # Skip header row
               .map(decode_csv))  # Transform each elem by applying decode_csv fn
    if perform_shuffle:
        # Randomizes input using a window of 256 elements (read into memory)
        dataset = dataset.shuffle(buffer_size=256)
    dataset = dataset.repeat(repeat_count)  # Repeats dataset this # times
    dataset = dataset.batch(32)  # Batch size to use
    batch_features, batch_labels = dataset.make_one_shot_iterator().get_next()
    
    
    #Allowed structure had changed. Now x = make_iterator
    #x.get_next() returns an error
    
    
    
    return batch_features, batch_labels

In [69]:
next_batch = input_pipeline(FILE_TRAIN, True) # Will return 32 random elements

#Test our pipeline
with tf.Session() as sess:
    first_batch = sess.run(next_batch)
print(first_batch)

({'PetalLength': array([ 1.20000005,  6.9000001 ,  1.29999995,  4.        ,  5.5999999 ,
        5.5999999 ,  1.5       ,  3.9000001 ,  6.30000019,  6.5999999 ,
        5.19999981,  1.60000002,  4.5999999 ,  1.60000002,  3.5999999 ,
        5.5999999 ,  1.5       ,  5.80000019,  4.5999999 ,  4.4000001 ,
        4.19999981,  1.39999998,  6.69999981,  1.29999995,  1.        ,
        1.60000002,  1.39999998,  5.5       ,  1.29999995,  4.4000001 ,
        5.5999999 ,  1.5       ], dtype=float32), 'PetalWidth': array([ 0.2       ,  2.29999995,  0.2       ,  1.29999995,  1.39999998,
        2.4000001 ,  0.1       ,  1.10000002,  1.79999995,  2.0999999 ,
        2.29999995,  0.2       ,  1.5       ,  0.2       ,  1.29999995,
        2.20000005,  0.40000001,  2.20000005,  1.29999995,  1.29999995,
        1.29999995,  0.2       ,  2.20000005,  0.2       ,  0.2       ,
        0.40000001,  0.2       ,  1.79999995,  0.40000001,  1.39999998,
        2.4000001 ,  0.2       ], dtype=float32), 'Sepa

In [70]:
# Create the feature_columns, which specifies the input to our model
# All our input features are numeric, so use numeric_column for each one
#https://www.tensorflow.org/versions/master/api_docs/python/tf/feature_column




feature_col = [tf.feature_column.numeric_column(k) for k in feature_names]

In [71]:
#create a DNN regression classifier, using the DNNClassifier 
#pre-made estimator
#https://www.tensorflow.org/versions/master/api_docs/python/tf/estimator/DNNClassifier




estimator = tf.estimator.DNNClassifier(
    feature_columns = feature_col, #feed input features to the model
    hidden_units = [10, 10], # 2 layers, each with 10 nuerons
    n_classes = 3,
    model_dir = "/Users/shishir/Documents/botconnect Playground/TF1.3_input_pipeline/ModelStats") #PATH where checkpoints, model, tb graph etc is stored


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/Users/shishir/Documents/botconnect Playground/TF1.3_input_pipeline/ModelStats', '_tf_random_seed': 1, '_save_summary_steps': 100, '_save_checkpoints_secs': 600, '_save_checkpoints_steps': None, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100}


In [72]:
# Train our model using input_pipeline
# Input to training is a file with training example
# Stop training after 8 iterations of train data (epochs)
# Estimators require an input_fn with no arguments, so we createa function with no arguments 
# using lambda, which calls our input_fn with the desired arguments: the file_path, shuffle 
# setting, and repeat_count. In our case, we use our new input_pipeline




estimator.train(
    input_fn= lambda: input_pipeline(FILE_TRAIN, True, 30), steps = 10000)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /Users/shishir/Documents/botconnect Playground/TF1.3_input_pipeline/ModelStats/model.ckpt.
INFO:tensorflow:loss = 52.255, step = 1
INFO:tensorflow:global_step/sec: 420.069
INFO:tensorflow:loss = 6.00833, step = 101 (0.239 sec)
INFO:tensorflow:Saving checkpoints for 113 into /Users/shishir/Documents/botconnect Playground/TF1.3_input_pipeline/ModelStats/model.ckpt.
INFO:tensorflow:Loss for final step: 3.37632.


<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x1a1fbcacf8>

In [73]:
# Evaluate our model using the examples contained in FILE_TEST
# Return value will contain evaluation_metrics such as: loss & average_loss



evaluate_result = estimator.evaluate(
    input_fn=lambda: input_pipeline(FILE_TEST, False, 4))
print("Evaluation results")
for key in evaluate_result:
    print("   {}, was: {}".format(key, evaluate_result[key]))

INFO:tensorflow:Starting evaluation at 2017-10-23-21:05:55
INFO:tensorflow:Restoring parameters from /Users/shishir/Documents/botconnect Playground/TF1.3_input_pipeline/ModelStats/model.ckpt-113
INFO:tensorflow:Finished evaluation at 2017-10-23-21:05:56
INFO:tensorflow:Saving dict for global step 113: accuracy = 0.966667, average_loss = 0.155541, global_step = 113, loss = 4.66622
Evaluation results
   accuracy, was: 0.9666666388511658
   average_loss, was: 0.15554076433181763
   loss, was: 4.666223049163818
   global_step, was: 113


In [74]:
#Predicting different iris flowers in testfile csv



predict_results = estimator.predict(
    input_fn=lambda: input_pipeline(FILE_TEST, False, 1))
print("Predictions on test data")
for p in predict_results:
    #Will classify 0,1 or 2 if Sentosa, Vericolor, Verginia resp.
    print(p["class_ids"][0])


Predictions on test data
INFO:tensorflow:Restoring parameters from /Users/shishir/Documents/botconnect Playground/TF1.3_input_pipeline/ModelStats/model.ckpt-113
1
2
0
1
1
1
0
2
1
2
2
0
2
1
1
0
1
0
0
2
0
1
2
2
1
1
0
1
2
1


In [75]:
# Let create a dataset for prediction
# We've taken the first 3 examples in FILE_TEST



prediction_input = [[5.9, 3.0, 4.2, 1.5],  # -> 1, Iris Versicolor
                    [6.9, 3.1, 5.4, 2.1],  # -> 2, Iris Virginica
                    [5.1, 3.3, 1.7, 0.5]]  # -> 0, Iris Sentosa

In [76]:
def test_input_pipeline():
    def decode(x):
        x = tf.split(x, 4)  # Need to split into our 4 features
        return dict(zip(feature_names, x))  # To build a dict of them

    dataset = tf.contrib.data.Dataset.from_tensor_slices(prediction_input)
    dataset = dataset.map(decode)
    next_feature_batch = dataset.make_one_shot_iterator().get_next()
    return next_feature_batch, None  # In prediction, we have no labels

In [77]:

# Predict all our prediction_input
predict_results = estimator.predict(input_fn=test_input_pipeline)

In [78]:
# Print results
print("Predictions on memory")
for idx, prediction in enumerate(predict_results):
    type = prediction["class_ids"][0]  # Get the predicted class (index)
    if type == 0:
        print("{} - Iris Sentosa".format(prediction_input[idx]))
    elif type == 1:
        print("{} - Iris Versicolor".format(prediction_input[idx]))
    else:
        print("{} - Iris Virginica".format(prediction_input[idx]))


Predictions on memory
INFO:tensorflow:Restoring parameters from /Users/shishir/Documents/botconnect Playground/TF1.3_input_pipeline/ModelStats/model.ckpt-113
I think: [5.9, 3.0, 4.2, 1.5], is Iris Versicolor
I think: [6.9, 3.1, 5.4, 2.1], is Iris Virginica
I think: [5.1, 3.3, 1.7, 0.5], is Iris Sentosa
