In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

In [2]:
import os
import collections
import itertools

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [5]:
from six.moves import urllib

In [6]:
print(np.__version__)
print(pd.__version__)
print(tf.__version__)

1.13.3
0.20.1
1.4.1


In [7]:
# The Iris dataset is also available here: https://archive.ics.uci.edu/ml/datasets/iris

URL_TRAIN_PATH = "http://download.tensorflow.org/data/iris_training.csv"
URL_TEST_PATH = "http://download.tensorflow.org/data/iris_test.csv"

DOWNLOADED_FILENAME_TRAIN = "iris_training.csv"
DOWNLOADED_FILENAME_TEST = "iris_test.csv"

def download_data():

    if not os.path.exists(DOWNLOADED_FILENAME_TRAIN):
        filename, _ = urllib.request.urlretrieve(URL_TRAIN_PATH, DOWNLOADED_FILENAME_TRAIN)

    print('Found and verified file from this path: ', URL_TRAIN_PATH)
    print('Downloaded file: ', DOWNLOADED_FILENAME_TRAIN)

    if not os.path.exists(DOWNLOADED_FILENAME_TEST):
        filename, _ = urllib.request.urlretrieve(URL_TEST_PATH, DOWNLOADED_FILENAME_TEST)

    print('Found and verified file from this path: ', URL_TEST_PATH)
    print('Downloaded file: ', DOWNLOADED_FILENAME_TEST)

In [8]:
download_data()

Found and verified file from this path:  http://download.tensorflow.org/data/iris_training.csv
Downloaded file:  iris_training.csv
Found and verified file from this path:  http://download.tensorflow.org/data/iris_test.csv
Downloaded file:  iris_test.csv


In [9]:
FEATURE_NAMES = [
    'SepalLengthCm',
    'SepalWidthCm',
    'PetalLengthCm',
    'PetalWidthCm'
]

### Labels for the type of Iris flower
 
* 0 -- Iris Sentosa 
* 1 -- Iris Versicolour 
* 2 -- Iris Virginica

In [10]:
def parse_csv(line):
    
    parsed_line = tf.decode_csv(line, [[0.], [0.], [0.], [0.], [0]])

    # Labels are 0, 1 or 2
    labels = parsed_line[-1:]

    del parsed_line[-1]

    features = dict(zip(FEATURE_NAMES, parsed_line))

    return features, labels    

In [11]:
def get_features_labels(filename, shuffle=False, repeat_count=1):
    
    dataset = (tf.data.TextLineDataset(filename).skip(1).map(parse_csv))

    if shuffle:
        dataset = dataset.shuffle(buffer_size=256) 

    dataset = dataset.repeat(repeat_count)
    dataset = dataset.batch(32)

    iterator = dataset.make_one_shot_iterator()

    batch_features, batch_labels = iterator.get_next()

    return batch_features, batch_labels    

In [12]:
batch_features, batch_labels = get_features_labels(DOWNLOADED_FILENAME_TRAIN)

In [13]:
batch_features

{'PetalLengthCm': <tf.Tensor 'IteratorGetNext:0' shape=(?,) dtype=float32>,
 'PetalWidthCm': <tf.Tensor 'IteratorGetNext:1' shape=(?,) dtype=float32>,
 'SepalLengthCm': <tf.Tensor 'IteratorGetNext:2' shape=(?,) dtype=float32>,
 'SepalWidthCm': <tf.Tensor 'IteratorGetNext:3' shape=(?,) dtype=float32>}

In [14]:
batch_labels

<tf.Tensor 'IteratorGetNext:4' shape=(?, 1) dtype=int32>

In [15]:
feature_columns = [tf.feature_column.numeric_column(k) for k in FEATURE_NAMES]

In [25]:
classifier_model = tf.estimator.DNNClassifier(
    feature_columns=feature_columns,
    hidden_units=[16, 12, 16],
    n_classes=3)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x116e5b8d0>, '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '/var/folders/yd/1rlyjfk975d3bb98d7_nyt740000gn/T/tmpEH7W9I', '_save_summary_steps': 100}


In [26]:
classifier_model.train(
    input_fn=lambda: get_features_labels(DOWNLOADED_FILENAME_TRAIN, shuffle=True, repeat_count=20))


INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/yd/1rlyjfk975d3bb98d7_nyt740000gn/T/tmpEH7W9I/model.ckpt.
INFO:tensorflow:loss = 44.2092, step = 1
INFO:tensorflow:Saving checkpoints for 75 into /var/folders/yd/1rlyjfk975d3bb98d7_nyt740000gn/T/tmpEH7W9I/model.ckpt.
INFO:tensorflow:Loss for final step: 5.32145.


<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x116f2f110>

In [27]:
results = classifier_model.evaluate(
    input_fn=lambda: get_features_labels(DOWNLOADED_FILENAME_TEST, shuffle=False, repeat_count=4))


INFO:tensorflow:Starting evaluation at 2017-12-21-04:48:35
INFO:tensorflow:Restoring parameters from /var/folders/yd/1rlyjfk975d3bb98d7_nyt740000gn/T/tmpEH7W9I/model.ckpt-75
INFO:tensorflow:Finished evaluation at 2017-12-21-04:48:36
INFO:tensorflow:Saving dict for global step 75: accuracy = 0.966667, average_loss = 0.150384, global_step = 75, loss = 4.51153


In [28]:
for key in sorted(results):
    print("%s: %s" % (key, results[key]))

accuracy: 0.966667
average_loss: 0.150384
global_step: 75
loss: 4.51153


In [29]:
predict_results = classifier_model.predict(
    input_fn=lambda: get_features_labels(DOWNLOADED_FILENAME_TEST, shuffle=False)) 

In [30]:
predictions = [prediction["class_ids"][0] for idx, prediction in enumerate(predict_results)]

INFO:tensorflow:Restoring parameters from /var/folders/yd/1rlyjfk975d3bb98d7_nyt740000gn/T/tmpEH7W9I/model.ckpt-75


In [31]:
df = pd.read_csv(DOWNLOADED_FILENAME_TEST, names=FEATURE_NAMES + ['Labels'], skiprows=1)

In [32]:
df['PredictedLabels'] = predictions

In [33]:
df

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Labels,PredictedLabels
0,5.9,3.0,4.2,1.5,1,1
1,6.9,3.1,5.4,2.1,2,2
2,5.1,3.3,1.7,0.5,0,0
3,6.0,3.4,4.5,1.6,1,1
4,5.5,2.5,4.0,1.3,1,1
5,6.2,2.9,4.3,1.3,1,1
6,5.5,4.2,1.4,0.2,0,0
7,6.3,2.8,5.1,1.5,2,2
8,5.6,3.0,4.1,1.3,1,1
9,6.7,2.5,5.8,1.8,2,2
