In [11]:
"""Tensorflow tutorial for predicting on the IRIS dataset
Also contains general methods which will be useful for:

* Downloading training and test sets given their URLs.
* """


import pandas as pd
import tensorflow as tf

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
                    'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Sentosa', 'Versicolor', 'Virginica']


"""The following two methods contain functionality for loading and reading
data into the training and test sets."""

def download_if_not_in_cache(train_url, test_url, train_file_name, test_file_name):
    """Method to download a training and test set given their URLs.
    * The method checks if the file already exists in the cache.
    * If not it downloads and stores the file in the cache.
    * Finally, it returns the path to the file which has been downloaded."""
    
    train_file_path = tf.keras.utils.get_file(train_file_name, train_url)
    test_file_path = tf.keras.utils.get_file(test_file_name, test_url)
    
    return train_file_path, test_file_path


def load_data(train_url, test_url):
    train_path, test_path = download_if_not_in_cache(train_url, test_url, train_url.split('/')[-1], test_url.split('/')[-1])

    train = pd.read_csv(train_path, names = CSV_COLUMN_NAMES, header=0)
    train_x, train_y = train, train.pop('Species')
    
    test = pd.read_csv(test_path, names = CSV_COLUMN_NAMES, header=0)
    test_x, test_y = test, test.pop('Species')
    
    return ((train_x, train_y), (test_x, test_y))


def train_input_function(train_x, train_y, batch_size):
    """Function to produce a dataset type object from the training data produced
    by the load_data function above. The dataset type object produced by this method
    can be passed to the 'train' function of the estimator (like DNNClassifier)"""
    
    # train_x is a DataFrame type object and we are converting it into a dictionary
    # train_y is a Series type object <class 'pandas.core.series.Series'>
    dataset = tf.data.Dataset.from_tensor_slices((dict(train_x), train_y))
    
    # Shuffling, Repeating and Batching the dataset
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
    
    return dataset


def test_input_function(test_x, test_y, batch_size):
    """Similar function as above but for the test data set"""
    features = dict(test_x)
    
    # This case may turn up when we are trying to make predictions
    # on single points using a trained classifier
    if test_y is None:
        inputs = features
    else:
        inputs = (features, test_y)
    
    dataset = tf.data.Dataset.from_tensor_slices(inputs)
    
    #Batch size should not be none here.
    dataset = dataset.batch(batch_size)
    
    return dataset


((train_x, train_y), (test_x, test_y)) = load_data(TRAIN_URL, TEST_URL)
print (train_input_function(train_x, train_y, 100))

<BatchDataset shapes: ({SepalLength: (?,), SepalWidth: (?,), PetalLength: (?,), PetalWidth: (?,)}, (?,)), types: ({SepalLength: tf.float64, SepalWidth: tf.float64, PetalLength: tf.float64, PetalWidth: tf.float64}, tf.int64)>


In [12]:
"""This part of the code deals with using a pre-existing Tensorflow estimator
for the purpose of learning from the training set."""


def train_DNN_classifier(train_x, train_y, batch_size, num_steps):
    """Function to train a DNN classifier on the iris dataset"""
    
    # Step 1: Build the features
    # At each step of the loop we are going to add a numeric column to
    # the feature set.
    feature_columns = []
    for key in train_x.keys():
        feature_columns.append(tf.feature_column.numeric_column(key = key))
    
    
    # Step 2: Build the DNN model
    # * We input the feature_column names we created above.
    # * We input the structure of the hidden layers (two hidden layers each of 10 units)
    # * We input the number of classes (because the default value = 2)
    dnn_classifier = tf.estimator.DNNClassifier(
        feature_columns = feature_columns,
        hidden_units = [10,10],
        n_classes = 3)
    
    
    # Step 3: Train the DNN on the training set provided as parameters
    # * Create a dataset out of the train_x and train_y objects
    # * train the classifier on the dataset object thus produced
    dnn_classifier.train(input_fn=lambda:train_input_function(train_x, train_y, batch_size), steps = num_steps)
    
    return dnn_classifier

trained_classifier = train_DNN_classifier(train_x, train_y, 100, 5000)
print (trained_classifier.model_dir)



INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\USER\\AppData\\Local\\Temp\\tmp809azwzm', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000000000C5009B0>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\USER\AppData\Local\Temp\tmp809azwzm\model.ckpt.
INFO:tensorflow:loss = 164.686, step = 1
INFO:tensorflow:global_step/sec: 537.602
INFO:tensorflow:loss = 24.9764, step = 101 (0.188 sec)
INFO:tensorflow:global_step/sec: 689.617
INFO:tensorflow:loss = 14.5913, step = 201 (0.145 sec)
INF

In [13]:
"""This part of the code tests the trained DNN classifier on the test set."""

# Calculating overall test set error
batch_size = 100
eval_result = trained_classifier.evaluate(
    input_fn = lambda:test_input_function(test_x, test_y, batch_size))

print (eval_result)
print ('Accuracy: ', eval_result['accuracy'] * 100, '%')

INFO:tensorflow:Starting evaluation at 2018-02-10-12:26:02
INFO:tensorflow:Restoring parameters from C:\Users\USER\AppData\Local\Temp\tmp809azwzm\model.ckpt-5000
INFO:tensorflow:Finished evaluation at 2018-02-10-12:26:03
INFO:tensorflow:Saving dict for global step 5000: accuracy = 0.966667, average_loss = 0.0578378, global_step = 5000, loss = 1.73514
{'accuracy': 0.96666664, 'average_loss': 0.057837836, 'loss': 1.7351351, 'global_step': 5000}
Accuracy:  96.6666638851 %


In [14]:
# Calculating predictions on some inputs
expected = ['Setosa', 'Versicolor', 'Virginica']
predict_x = {
    'SepalLength': [5.1, 5.9, 6.9],
    'SepalWidth': [3.3, 3.0, 3.1],
    'PetalLength': [1.7, 4.2, 5.4],
    'PetalWidth': [0.5, 1.5, 2.1],
}

predictions = trained_classifier.predict(
    input_fn=lambda:test_input_function(predict_x,
                                test_y=None,
                                batch_size=batch_size))

for pred_dict, expec in zip(predictions, expected):
    template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')

    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]

    print(template.format(SPECIES[class_id],
                          100 * probability, expec))
    

INFO:tensorflow:Restoring parameters from C:\Users\USER\AppData\Local\Temp\tmp809azwzm\model.ckpt-5000

Prediction is "Sentosa" (99.8%), expected "Setosa"

Prediction is "Versicolor" (100.0%), expected "Versicolor"

Prediction is "Virginica" (99.6%), expected "Virginica"


In [15]:
# This piece of code deals with a DNNClassifier with checkpoints
def train_DNN_classifier_with_checkpoints(train_x, train_y, batch_size, num_steps):
    """Function to train a DNN classifier on the iris dataset with configured checkpoints"""
    
    # Step 1: Build the features
    # At each step of the loop we are going to add a numeric column to
    # the feature set.
    feature_columns = []
    for key in train_x.keys():
        feature_columns.append(tf.feature_column.numeric_column(key = key))
    
    
    # Step 2: Build the DNN model
    # * We input the feature_column names we created above.
    # * We input the structure of the hidden layers (two hidden layers each of 10 units)
    # * We input the number of classes (because the default value = 2)
    dnn_classifier = tf.estimator.DNNClassifier(
        feature_columns = feature_columns,
        hidden_units = [10,10],
        model_dir = 'C:\My personal folder\Computer Science Docs\Postgrad(MSc)\Project\Tensorflow tutorial\Iris\model_checkpoints',
        n_classes = 3)
    
    
    # Step 3: Train the DNN on the training set provided as parameters
    # * Create a dataset out of the train_x and train_y objects
    # * train the classifier on the dataset object thus produced
    dnn_classifier.train(input_fn=lambda:train_input_function(train_x, train_y, batch_size), steps = num_steps)
    
    return dnn_classifier


dnn_classifier = train_DNN_classifier_with_checkpoints(train_x, train_y, 100, 10000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\My personal folder\\Computer Science Docs\\Postgrad(MSc)\\Project\\Tensorflow tutorial\\Iris\\model_checkpoints', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000000000E8F72E8>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Restoring parameters from C:\My personal folder\Computer Science Docs\Postgrad(MSc)\Project\Tensorflow tutorial\Iris\model_checkpoints\model.ckpt-10000
INFO:tensorflow:Saving checkpoints for 10001 into C:\My personal folder\Computer Science Docs\Postgrad(MSc

INFO:tensorflow:loss = 2.29234, step = 17301 (0.152 sec)
INFO:tensorflow:global_step/sec: 653.558
INFO:tensorflow:loss = 5.78773, step = 17401 (0.154 sec)
INFO:tensorflow:global_step/sec: 671.102
INFO:tensorflow:loss = 1.72691, step = 17501 (0.149 sec)
INFO:tensorflow:global_step/sec: 666.629
INFO:tensorflow:loss = 4.18541, step = 17601 (0.149 sec)
INFO:tensorflow:global_step/sec: 671.103
INFO:tensorflow:loss = 2.7016, step = 17701 (0.149 sec)
INFO:tensorflow:global_step/sec: 704.184
INFO:tensorflow:loss = 1.69407, step = 17801 (0.142 sec)
INFO:tensorflow:global_step/sec: 675.638
INFO:tensorflow:loss = 4.12515, step = 17901 (0.149 sec)
INFO:tensorflow:global_step/sec: 666.628
INFO:tensorflow:loss = 1.35704, step = 18001 (0.149 sec)
INFO:tensorflow:global_step/sec: 675.638
INFO:tensorflow:loss = 2.91923, step = 18101 (0.149 sec)
INFO:tensorflow:global_step/sec: 684.892
INFO:tensorflow:loss = 1.73848, step = 18201 (0.146 sec)
INFO:tensorflow:global_step/sec: 640.989
INFO:tensorflow:loss 

In [32]:
"""In the following section of code we create a custom estimator for the IRIS data."""

def get_feature_columns (train_x):
    """Get feature columns from the training examples train_x"""
    res_feature_columns = []
    for key in train_x.keys():
        res_feature_columns.append(tf.feature_columns.numeric_column(key = key))
    return res_feature_columns


def train_input_function (features, labels, batch_size):
    """Training input function to be passed to the model"""
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
    return dataset


def my_model_function(features, labels, mode, params):
    """Model function to be used by the estimator.
    Contains branching code for the three functionalities: TRAIN, PREDICT and EVAL
    In this function we create a DNN with:
    * 3 hidden layers and 0.1 dropout probability"""
    
    # Step 1: Define the model
    # Defining input layer
    net = tf.feature_column.input_layer(features, params['feature_columns'])
    
    # Defining the hidden layers
    for units in params['hidden_units']:
        net = tf.layers.dense(net, units = units, activation = tf.nn.relu)
        
    # Defining the output layer
    logits = tf.layers.dense(net, units = params['n_classes'], activation = None)
    
    # Code for PREDICT
    predicted_classes = tf.argmax(logits, 1)
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'class_ids': predicted_classes[:, tf.newaxis],
            'probabilities': tf.nn.softmax(logits),
            'logits': 'logits'
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    
    # Code for EVAL
    loss = tf.losses.sparse_softmax_cross_entropy(labels = labels, logits = logits)
    
    accuracy = tf.metrics.accuracy(labels = labels, predictions = predicted_classes, name = 'acc_op')
    metrics = {'accuracy':accuracy}
    
    if mode == tf.estimator.ModeKeys.EVAL:
        return tf.estimator.EstimatorSpec(mode, loss = loss, eval_metric_ops = metrics)
    
    # Code for TRAIN
    assert mode == tf.estimator.ModeKeys.TRAIN
    
    optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    return tf.estimator.EstimatorSpec(mode, loss = loss, train_op = train_op)


def train_custom_classifier(train_x, train_y, batch_size, num_steps):
    """Function to train a DNN classifier on the iris dataset with configured checkpoints"""
    
    # Step 1: Build the features
    # At each step of the loop we are going to add a numeric column to
    # the feature set.
    feature_columns = []
    for key in train_x.keys():
        feature_columns.append(tf.feature_column.numeric_column(key = key))
    
    
    # Step 2: Build the custom classifier
    dnn_classifier = tf.estimator.Estimator(
        model_fn = my_model_function,
        params = {
            'feature_columns': feature_columns,
            'hidden_units': [10,10],
            'n_classes': 3
        })
    
    
    # Step 3: Train the DNN on the training set provided as parameters
    # * Create a dataset out of the train_x and train_y objects
    # * train the classifier on the dataset object thus produced
    dnn_classifier.train(input_fn=lambda:train_input_function(train_x, train_y, batch_size), steps = num_steps)
    
    return dnn_classifier

custom_classifier = train_custom_classifier(train_x, train_y, 100, 10000)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\USER\\AppData\\Local\\Temp\\tmputhqe7fi', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000000000CB14BE0>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\USER\AppData\Local\Temp\tmputhqe7fi\model.ckpt.
INFO:tensorflow:loss = 1.40347, step = 1
INFO:tensorflow:global_step/sec: 833.286
INFO:tensorflow:loss = 0.171105, step = 101 (0.120 sec)
INFO:tensorflow:global_step/sec: 1098.84
INFO:tensorflow:loss = 0.122395, step = 201 (0.091 sec)
I

INFO:tensorflow:global_step/sec: 1030.87
INFO:tensorflow:loss = 0.0369499, step = 7501 (0.097 sec)
INFO:tensorflow:global_step/sec: 1075.21
INFO:tensorflow:loss = 0.0356887, step = 7601 (0.092 sec)
INFO:tensorflow:global_step/sec: 1030.87
INFO:tensorflow:loss = 0.0611081, step = 7701 (0.097 sec)
INFO:tensorflow:global_step/sec: 1086.89
INFO:tensorflow:loss = 0.0358283, step = 7801 (0.092 sec)
INFO:tensorflow:global_step/sec: 1075.21
INFO:tensorflow:loss = 0.0273768, step = 7901 (0.093 sec)
INFO:tensorflow:global_step/sec: 1075.21
INFO:tensorflow:loss = 0.0592074, step = 8001 (0.093 sec)
INFO:tensorflow:global_step/sec: 1030.87
INFO:tensorflow:loss = 0.0415394, step = 8101 (0.097 sec)
INFO:tensorflow:global_step/sec: 1086.9
INFO:tensorflow:loss = 0.0326484, step = 8201 (0.092 sec)
INFO:tensorflow:global_step/sec: 1098.84
INFO:tensorflow:loss = 0.0364052, step = 8301 (0.091 sec)
INFO:tensorflow:global_step/sec: 1086.89
INFO:tensorflow:loss = 0.0339567, step = 8401 (0.092 sec)
INFO:tensor

In [None]:
batch_size = 100
eval_result = trained_classifier.evaluate(
    input_fn = lambda:test_input_function(test_x, test_y, batch_size))

print (eval_result)
print ('Accuracy: ', eval_result['accuracy'] * 100, '%')