In [None]:
import os
import sys 
import urllib.request

import tensorflow as tf

In [None]:
import pandas as pd

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

In [None]:
TEMP_DIR = '/tmp/tensorflow_tutorials'

def download_and_cache(url, fname=None, dest=TEMP_DIR):
    if not os.path.exists(dest):
        os.makedirs(dest)
    if fname is None:
        fname = url.split('/')[-1]
        print("Using fname:", fname)
    fpath = os.path.join(dest, fname)
    if not os.path.exists(fpath):
        def _progress(count, block_size, total_size):
            percentage = float(count * block_size) / float(total_size) * 100.0
            sys.stdout.write('\r>> Downloading {} {:1.1f}%'.format(fname, percentage))
            sys.stdout.flush()
        fpath, _ = urllib.request.urlretrieve(url, fpath, _progress)
        print()
        statinfo = os.stat(fpath)
        print('Successfully downloaded', fname, statinfo.st_size, 'bytes.')
    return fpath


In [None]:
CSV_TYPES = [[0.0], [0.0], [0.0], [0.0], [0]]
CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
                    'PetalLength', 'PetalWidth', 'Species']

FEAT_COLUMNS = ['SepalLength', 'SepalWidth',
                'PetalLength', 'PetalWidth']

SPECIES = ['Setosa', 'Versicolor', 'Virginica']

def _parse_line(line):
    fields = tf.decode_csv(line, record_defaults=CSV_TYPES)
    features = dict(zip(CSV_COLUMN_NAMES, fields))
    label = features.pop('Species')
    return features, label


def csv_input_fn(csv_path, batch_size, shuffle_repeat=True):
    
    dataset = tf.data.TextLineDataset(csv_path).skip(1)
    dataset = dataset.map(_parse_line)
    
    # no need to repeat and shuffle during eval or pred mode
    if shuffle_repeat:
        dataset = dataset.shuffle(1000).repeat().batch(batch_size)
    else:
        dataset = dataset.batch(batch_size)
        
    return dataset

def input_fn(url, shuffle_repeat=True, batch_size=100):
    path = download_and_cache(url) 
    return csv_input_fn(path, batch_size=batch_size, 
                        shuffle_repeat=shuffle_repeat)

def infer_input_fn(features, batch_size=100):
    dataset = tf.data.Dataset.from_tensor_slices(dict(features))
    dataset = dataset.batch(batch_size)
    return dataset


In [None]:
def neural_net(features, feature_column, 
               hidden_units, output_units):
    
    layer = tf.feature_column.input_layer(features, feature_column)
    for units in hidden_units:
        layer = tf.layers.dense(layer, units=units, activation=tf.nn.relu)
    
    logits = tf.layers.dense(layer, units=output_units, activation=None)
    
    return logits

In [None]:
def model_function(features, labels, mode, params):
    feature_columns = params['feature_columns']
    hidden_units = params['hidden_units']
    output_units = params['output_layer_class_num']
    
    logits = neural_net(features, feature_columns, hidden_units, output_units)
    
    pred_classes = tf.argmax(logits, axis=1)
    pred_probs = tf.nn.softmax(logits)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {'logits': logits,
                       'probs': pred_probs,
                       'class': pred_classes}
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    
    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
    
    accuracy = tf.metrics.accuracy(labels=labels,
                                   predictions=pred_classes,
                                   name='accuracy_op')
    
    
    # See https://stackoverflow.com/questions/46409626/how-to-properly-use-tf-metrics-accuracy
    # As to why accuracy returns 2 values. 
    tf.summary.scalar('accuracy', accuracy[1])
    
    
    if mode == tf.estimator.ModeKeys.EVAL:
        metrics = {'accuracy': accuracy}
        return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
    
    optimizer = tf.train.AdagradOptimizer(learning_rate=0.1)
    train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
    
    return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)

In [None]:
feature_columns = [tf.feature_column.numeric_column(key=key) for key in FEAT_COLUMNS]

model = tf.estimator.Estimator(model_function,
                               params={
                                   'feature_columns': feature_columns,
                                   'hidden_units': [10, 10],
                                   'output_layer_class_num': 3
                               },
                               model_dir=TEMP_DIR)

In [None]:
model.train(input_fn=lambda: input_fn(TRAIN_URL), steps=2000)


In [None]:
# You can run tensorboard and see the progress:
# !tensorboard --logdir=/tmp/tensorflow_tutorials

In [None]:
eval_results = model.evaluate(input_fn=lambda: input_fn(TEST_URL, shuffle_repeat=False))
print(eval_results)

In [None]:
expected = ['Setosa', 'Versicolor', 'Virginica']

X = {
    'SepalLength': [5.1, 5.9, 6.9],
    'SepalWidth': [3.3, 3.0, 3.1],
    'PetalLength': [1.7, 4.2, 5.4],
    'PetalWidth': [0.5, 1.5, 2.1],
    }

infer_results = model.predict(input_fn=lambda: infer_input_fn(X))

In [None]:
for pred_dict, expec in zip(infer_results, expected):
    template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')

    class_id = pred_dict['class']
    prob = pred_dict['probs'][class_id]

    print(template.format(SPECIES[class_id],
                            100 * prob, expec))