In [8]:
import pandas as pd
import tensorflow as tf

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

def maybe_download():
    train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
    test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
    return train_path, test_path

In [10]:
CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
                    'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Setosa', 'Versicolor', 'Virginica']


In [16]:
def load_data(y_name='Species'):
    train_path, test_path = maybe_download()
    train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
    train_x, train_y = train, train.pop(y_name)
    
    test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
    test_x, test_y = test, test.pop(y_name)
    
    return (train_x, train_y), (test_x, test_y)
    
    

In [21]:
(train_x, train_y), (test_x, test_y) = load_data()

In [24]:
dict(train_x)['PetalLength']

0      5.6
1      3.3
2      4.5
3      1.5
4      1.7
5      1.3
6      1.5
7      5.1
8      4.4
9      1.5
10     3.9
11     4.9
12     1.2
13     1.7
14     6.7
15     4.7
16     5.9
17     6.6
18     5.3
19     1.5
20     5.7
21     5.6
22     1.3
23     5.6
24     5.8
25     1.5
26     4.0
27     5.1
28     4.5
29     5.0
      ... 
90     5.2
91     4.7
92     1.4
93     1.5
94     5.8
95     1.4
96     1.4
97     6.7
98     4.8
99     1.6
100    1.4
101    3.3
102    1.3
103    4.1
104    1.6
105    1.4
106    1.5
107    1.4
108    3.6
109    1.6
110    4.9
111    4.1
112    1.6
113    6.0
114    1.6
115    4.4
116    4.2
117    1.4
118    1.4
119    3.7
Name: PetalLength, Length: 120, dtype: float64

In [25]:
def train_input_fn(features, labels, batch_size):
    """An input function for training"""
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

    # Shuffle, repeat, and batch the examples.
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)

    # Return the dataset.
    return dataset


def eval_input_fn(features, labels, batch_size):
    """An input function for evaluation or prediction"""
    features=dict(features)
    if labels is None:
        # No labels, use only features.
        inputs = features
    else:
        inputs = (features, labels)

    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices(inputs)

    # Batch the examples
    assert batch_size is not None, "batch_size must not be None"
    dataset = dataset.batch(batch_size)

    # Return the dataset.
    return dataset


In [29]:
(train_x, train_y), (test_x, test_y) = load_data()

train_steps = 5000
batch_size = 100
my_feature_columns = []
for key in train_x.keys():
    my_feature_columns.append(tf.feature_column.numeric_column(key=key))

classifier = tf.estimator.DNNClassifier(
    feature_columns=my_feature_columns,
    hidden_units=[10,10],
    n_classes=3)

classifier.train(
    input_fn=lambda:train_input_fn(train_x, train_y, batch_size),
    steps=train_steps)
        

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/31/vyp0jfld0jd1gb8kjcdpn7r00000gn/T/tmp2a0yy4gb', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x182006b6a0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/31

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x182055c860>

In [31]:
eval_result = classifier.evaluate(
    input_fn=lambda:eval_input_fn(test_x, test_y, batch_size))


INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-06-12-12:49:24
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/31/vyp0jfld0jd1gb8kjcdpn7r00000gn/T/tmp2a0yy4gb/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-06-12-12:49:24
INFO:tensorflow:Saving dict for global step 5000: accuracy = 0.96666664, average_loss = 0.072312765, global_step = 5000, loss = 2.169383


In [32]:
print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
# Test set accuracy: 0.967

# Generate predictions from the model
expected = ['Setosa', 'Versicolor', 'Virginica']
predict_x = {
    'SepalLength': [5.1, 5.9, 6.9],
    'SepalWidth': [3.3, 3.0, 3.1],
    'PetalLength': [1.7, 4.2, 5.4],
    'PetalWidth': [0.5, 1.5, 2.1],
}

predictions = classifier.predict(
    input_fn=lambda:eval_input_fn(predict_x, labels=None, batch_size=batch_size))

for pred_dict, expec in zip(predictions, expected):
    template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')

    class_id = pred_dict['class_ids'][0]
    probability = pred_dict['probabilities'][class_id]

    print(template.format(SPECIES[class_id],
                          100 * probability, expec))
    # Prediction is "Setosa" (99.9%), expected "Setosa"
    # Prediction is "Versicolor" (99.7%), expected "Versicolor"
    # Prediction is "Virginica" (95.5%), expected "Virginica"


Test set accuracy: 0.967

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /var/folders/31/vyp0jfld0jd1gb8kjcdpn7r00000gn/T/tmp2a0yy4gb/model.ckpt-5000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.

Prediction is "Setosa" (99.9%), expected "Setosa"

Prediction is "Versicolor" (100.0%), expected "Versicolor"

Prediction is "Virginica" (99.8%), expected "Virginica"
