In [17]:
"""Tensorflow tutorial for predicting on the IRIS dataset
Also contains general methods which will be useful for:

* Downloading training and test sets given their URLs.
* """


import pandas as pd
import tensorflow as tf

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
                    'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Sentosa', 'Versicolor', 'Virginica']


"""The following two methods contain functionality for loading and reading
data into the training and test sets."""

def download_if_not_in_cache(train_url, test_url, train_file_name, test_file_name):
    """Method to download a training and test set given their URLs.
    * The method checks if the file already exists in the cache.
    * If not it downloads and stores the file in the cache.
    * Finally, it returns the path to the file which has been downloaded."""
    
    train_file_path = tf.keras.utils.get_file(train_file_name, train_url)
    test_file_path = tf.keras.utils.get_file(test_file_name, test_url)
    
    return train_file_path, test_file_path


def load_data(train_url, test_url):
    train_path, test_path = download_if_not_in_cache(train_url, test_url, train_url.split('/')[-1], test_url.split('/')[-1])

    train = pd.read_csv(train_path, names = CSV_COLUMN_NAMES, header=0)
    train_x, train_y = train, train.pop('Species')
    
    test = pd.read_csv(test_path, names = CSV_COLUMN_NAMES, header=0)
    test_x, test_y = test, test.pop('Species')
    
    return ((train_x, train_y), (test_x, test_y))


def train_input_function(train_x, train_y, batch_size):
    """Function to produce a dataset type object from the training data produced
    by the load_data function above. The dataset type object produced by this method
    can be passed to the 'train' function of the estimator (like DNNClassifier)"""
    
    # train_x is a DataFrame type object and we are converting it into a dictionary
    # train_y is a Series type object <class 'pandas.core.series.Series'>
    dataset = tf.data.Dataset.from_tensor_slices((dict(train_x), train_y))
    
    # Shuffling, Repeating and Batching the dataset
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
    
    return dataset


def test_input_function(test_x, test_y, batch_size):
    """Similar function as above but for the test data set"""
    features = dict(test_x)
    
    # This case may turn up when we are trying to make predictions
    # on single points using a trained classifier
    if test_y is None:
        inputs = features
    else:
        inputs = (features, test_y)
    
    dataset = tf.data.Dataset.from_tensor_slices(inputs)
    
    #Batch size should not be none here.
    dataset = dataset.batch(batch_size)
    
    return dataset


((train_x, train_y), (test_x, test_y)) = load_data(TRAIN_URL, TEST_URL)
print (train_input_function(train_x, train_y, 100))

<BatchDataset shapes: ({SepalLength: (?,), SepalWidth: (?,), PetalLength: (?,), PetalWidth: (?,)}, (?,)), types: ({SepalLength: tf.float64, SepalWidth: tf.float64, PetalLength: tf.float64, PetalWidth: tf.float64}, tf.int64)>


In [18]:
"""This part of the code deals with using a pre-existing Tensorflow estimator
for the purpose of learning from the training set."""


def train_DNN_classifier(train_x, train_y, batch_size, num_steps):
    """Function to train a DNN classifier on the iris dataset"""
    
    # Step 1: Build the features
    # At each step of the loop we are going to add a numeric column to
    # the feature set.
    feature_columns = []
    for key in train_x.keys():
        feature_columns.append(tf.feature_column.numeric_column(key = key))
    
    
    # Step 2: Build the DNN model
    # * We input the feature_column names we created above.
    # * We input the structure of the hidden layers (two hidden layers each of 10 units)
    # * We input the number of classes (because the default value = 2)
    dnn_classifier = tf.estimator.DNNClassifier(
        feature_columns = feature_columns,
        hidden_units = [10,10],
        n_classes = 3)
    
    
    # Step 3: Train the DNN on the training set provided as parameters
    # * Create a dataset out of the train_x and train_y objects
    # * train the classifier on the dataset object thus produced
    dnn_classifier.train(input_fn=lambda:train_input_function(train_x, train_y, batch_size), steps = num_steps)
    
    return dnn_classifier

trained_classifier = train_DNN_classifier(train_x, train_y, 100, 5000)



INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'C:\\Users\\USER\\AppData\\Local\\Temp\\tmp91o93vb5', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000000000C72C198>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into C:\Users\USER\AppData\Local\Temp\tmp91o93vb5\model.ckpt.
INFO:tensorflow:loss = 152.044, step = 1
INFO:tensorflow:global_step/sec: 746.227
INFO:tensorflow:loss = 24.6768, step = 101 (0.135 sec)
INFO:tensorflow:global_step/sec: 917.377
INFO:tensorflow:loss = 15.4043, step = 201 (0.109 sec)
INF

In [19]:
"""This part of the code tests the trained DNN classifier on the test set."""

# Calculating overall test set error
batch_size = 100
eval_result = trained_classifier.evaluate(
    input_fn = lambda:test_input_function(test_x, test_y, batch_size))

print (eval_result)
print ('Accuracy: ', eval_result['accuracy'] * 100, '%')

INFO:tensorflow:Starting evaluation at 2018-02-09-17:41:16
INFO:tensorflow:Restoring parameters from C:\Users\USER\AppData\Local\Temp\tmp91o93vb5\model.ckpt-5000
INFO:tensorflow:Finished evaluation at 2018-02-09-17:41:16
INFO:tensorflow:Saving dict for global step 5000: accuracy = 0.966667, average_loss = 0.0749384, global_step = 5000, loss = 2.24815
{'accuracy': 0.96666664, 'average_loss': 0.074938416, 'loss': 2.2481525, 'global_step': 5000}
Accuracy:  96.6666638851 %


In [21]:
# Calculating predictions on some inputs
expected = ['Setosa', 'Versicolor', 'Virginica']
predict_x = {
    'SepalLength': [5.1, 5.9, 6.9],
    'SepalWidth': [3.3, 3.0, 3.1],
    'PetalLength': [1.7, 4.2, 5.4],
    'PetalWidth': [0.5, 1.5, 2.1],
}

predictions = trained_classifier.predict(
    input_fn=lambda:test_input_function(predict_x,
                                test_y=None,
                                batch_size=batch_size))

for pred_dict, expec in zip(predictions, expected):
    template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')

    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]

    print(template.format(SPECIES[class_id],
                          100 * probability, expec))
    

INFO:tensorflow:Restoring parameters from C:\Users\USER\AppData\Local\Temp\tmp91o93vb5\model.ckpt-5000

Prediction is "Sentosa" (100.0%), expected "Setosa"

Prediction is "Versicolor" (100.0%), expected "Versicolor"

Prediction is "Virginica" (99.5%), expected "Virginica"
