Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 12 additions & 66 deletions samples/core/get_started/custom_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,67 +17,15 @@
from __future__ import print_function

import argparse
import pandas as pd
import tensorflow as tf

import iris_data

parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=100, type=int, help='batch size')
parser.add_argument('--train_steps', default=1000, type=int,
help='number of training steps')

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Sentosa', 'Versicolor', 'Virginica']


def load_data(y_name='Species'):
"""Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""
train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
train_x, train_y = train, train.pop(y_name)

test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
test_x, test_y = test, test.pop(y_name)

return (train_x, train_y), (test_x, test_y)



def train_input_fn(features, labels, batch_size):
"""An input function for training"""
# Convert the inputs to a Dataset.
dataset = tf.data.Dataset.from_tensor_slices((features, labels))

# Shuffle, repeat, and batch the examples.
dataset = dataset.shuffle(1000).repeat().batch(batch_size)

# Return the read end of the pipeline.
return dataset.make_one_shot_iterator().get_next()


def eval_input_fn(features, labels=None, batch_size=None):
"""An input function for evaluation or prediction"""
if labels is None:
# No labels, use only features.
inputs = features
else:
inputs = (features, labels)

# Convert the inputs to a Dataset.
dataset = tf.data.Dataset.from_tensor_slices(inputs)

# Batch the examples
assert batch_size is not None, "batch_size must not be None"
dataset = dataset.batch(batch_size)

# Return the read end of the pipeline.
return dataset.make_one_shot_iterator().get_next()


def my_model(features, labels, mode, params):
"""DNN with three hidden layers, and dropout of 0.1 probability."""
# Create three fully connected layers each layer having a dropout
Expand All @@ -99,12 +47,8 @@ def my_model(features, labels, mode, params):
}
return tf.estimator.EstimatorSpec(mode, predictions=predictions)

# Convert the labels to a one-hot tensor of shape (length of features, 3)
# and with a on-value of 1 for each one-hot vector of length 3.
onehot_labels = tf.one_hot(labels, 3, 1, 0)
# Compute loss.
loss = tf.losses.softmax_cross_entropy(
onehot_labels=onehot_labels, logits=logits)
loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

# Compute evaluation metrics.
accuracy = tf.metrics.accuracy(labels=labels,
Expand All @@ -129,9 +73,7 @@ def main(argv):
args = parser.parse_args(argv[1:])

# Fetch the data
(train_x, train_y), (test_x, test_y) = load_data()
train_x = dict(train_x)
test_x = dict(test_x)
(train_x, train_y), (test_x, test_y) = iris_data.load_data()

# Feature columns describe how to use the input.
my_feature_columns = []
Expand All @@ -151,12 +93,12 @@ def main(argv):

# Train the Model.
classifier.train(
input_fn=lambda:train_input_fn(train_x, train_y, args.batch_size),
input_fn=lambda:iris_data.train_input_fn(train_x, train_y, args.batch_size),
steps=args.train_steps)

# Evaluate the model.
eval_result = classifier.evaluate(
input_fn=lambda:eval_input_fn(test_x, test_y, args.batch_size))
input_fn=lambda:iris_data.eval_input_fn(test_x, test_y, args.batch_size))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

Expand All @@ -170,14 +112,18 @@ def main(argv):
}

predictions = classifier.predict(
input_fn=lambda:eval_input_fn(predict_x, batch_size=args.batch_size))
input_fn=lambda:iris_data.eval_input_fn(predict_x,
labels=None,
batch_size=args.batch_size))

for pred_dict, expec in zip(predictions, expected):
template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')

class_id = pred_dict['class_ids'][0]
probability = pred_dict['probabilities'][class_id]
print(template.format(SPECIES[class_id], 100 * probability, expec))

print(template.format(iris_data.SPECIES[class_id],
100 * probability, expec))


if __name__ == '__main__':
Expand Down
3 changes: 2 additions & 1 deletion samples/core/get_started/estimator_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

from six.moves import StringIO

import iris_data
import custom_estimator
import premade_estimator

Expand All @@ -35,7 +36,7 @@
def four_lines_data():
text = StringIO(FOUR_LINES)

df = pd.read_csv(text, names=premade_estimator.CSV_COLUMN_NAMES)
df = pd.read_csv(text, names=iris_data.CSV_COLUMN_NAMES)

xy = (df, df.pop("Species"))
return xy, xy
Expand Down
93 changes: 93 additions & 0 deletions samples/core/get_started/iris_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import pandas as pd
import tensorflow as tf

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Sentosa', 'Versicolor', 'Virginica']

def maybe_download():
train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)

return train_path, test_path

def load_data(y_name='Species'):
"""Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""
train_path, test_path = maybe_download()

train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
train_x, train_y = train, train.pop(y_name)

test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
test_x, test_y = test, test.pop(y_name)

return (train_x, train_y), (test_x, test_y)


def train_input_fn(features, labels, batch_size):
"""An input function for training"""
# Convert the inputs to a Dataset.
dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))

# Shuffle, repeat, and batch the examples.
dataset = dataset.shuffle(1000).repeat().batch(batch_size)

# Return the read end of the pipeline.
return dataset.make_one_shot_iterator().get_next()


def eval_input_fn(features, labels, batch_size):
"""An input function for evaluation or prediction"""
features=dict(features)
if labels is None:
# No labels, use only features.
inputs = features
else:
inputs = (features, labels)

# Convert the inputs to a Dataset.
dataset = tf.data.Dataset.from_tensor_slices(inputs)

# Batch the examples
assert batch_size is not None, "batch_size must not be None"
dataset = dataset.batch(batch_size)

# Return the read end of the pipeline.
return dataset.make_one_shot_iterator().get_next()


# The remainder of this file contains a simple example of a csv parser,
# implemented using a the `Dataset` class.

# `tf.parse_csv` sets the types of the outputs to match the examples given in
# the `record_defaults` argument.
CSV_TYPES = [[0.0], [0.0], [0.0], [0.0], [0]]

def _parse_line(line):
# Decode the line into its fields
fields = tf.decode_csv(line, record_defaults=CSV_TYPES)

# Pack the result into a dictionary
features = dict(zip(CSV_COLUMN_NAMES, fields))

# Separate the label from the features
label = features.pop('Species')

return features, label


def csv_input_fn(csv_path, batch_size):
# Create a dataset containing the text lines.
dataset = tf.data.TextLineDataset(csv_path).skip(1)

# Parse each line.
dataset = dataset.map(_parse_line)

# Shuffle, repeat, and batch the examples.
dataset = dataset.shuffle(1000).repeat().batch(batch_size)

# Return the read end of the pipeline.
return dataset.make_one_shot_iterator().get_next()
74 changes: 14 additions & 60 deletions samples/core/get_started/premade_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,73 +17,21 @@
from __future__ import print_function

import argparse
import pandas as pd
import tensorflow as tf

import iris_data


parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=100, type=int, help='batch size')
parser.add_argument('--train_steps', default=1000, type=int,
help='number of training steps')

TRAIN_URL = "http://download.tensorflow.org/data/iris_training.csv"
TEST_URL = "http://download.tensorflow.org/data/iris_test.csv"

CSV_COLUMN_NAMES = ['SepalLength', 'SepalWidth',
'PetalLength', 'PetalWidth', 'Species']
SPECIES = ['Sentosa', 'Versicolor', 'Virginica']


def load_data(y_name='Species'):
"""Returns the iris dataset as (train_x, train_y), (test_x, test_y)."""
train_path = tf.keras.utils.get_file(TRAIN_URL.split('/')[-1], TRAIN_URL)
train = pd.read_csv(train_path, names=CSV_COLUMN_NAMES, header=0)
train_x, train_y = train, train.pop(y_name)

test_path = tf.keras.utils.get_file(TEST_URL.split('/')[-1], TEST_URL)
test = pd.read_csv(test_path, names=CSV_COLUMN_NAMES, header=0)
test_x, test_y = test, test.pop(y_name)

return (train_x, train_y), (test_x, test_y)


def train_input_fn(features, labels, batch_size):
"""An input function for training"""
# Convert the inputs to a Dataset.
dataset = tf.data.Dataset.from_tensor_slices((features, labels))

# Shuffle, repeat, and batch the examples.
dataset = dataset.shuffle(1000).repeat().batch(batch_size)

# Return the read end of the pipeline.
return dataset.make_one_shot_iterator().get_next()


def eval_input_fn(features, labels=None, batch_size=None):
"""An input function for evaluation or prediction"""
if labels is None:
# No labels, use only features.
inputs = features
else:
inputs = (features, labels)

# Convert the inputs to a Dataset.
dataset = tf.data.Dataset.from_tensor_slices(inputs)

# Batch the examples
assert batch_size is not None, "batch_size must not be None"
dataset = dataset.batch(batch_size)

# Return the read end of the pipeline.
return dataset.make_one_shot_iterator().get_next()


def main(argv):
args = parser.parse_args(argv[1:])

# Fetch the data
(train_x, train_y), (test_x, test_y) = load_data()
train_x = dict(train_x)
test_x = dict(test_x)
(train_x, train_y), (test_x, test_y) = iris_data.load_data()

# Feature columns describe how to use the input.
my_feature_columns = []
Expand All @@ -100,12 +48,14 @@ def main(argv):

# Train the Model.
classifier.train(
input_fn=lambda:train_input_fn(train_x, train_y, args.batch_size),
input_fn=lambda:iris_data.train_input_fn(train_x, train_y,
args.batch_size),
steps=args.train_steps)

# Evaluate the model.
eval_result = classifier.evaluate(
input_fn=lambda:eval_input_fn(test_x, test_y, args.batch_size))
input_fn=lambda:iris_data.eval_input_fn(test_x, test_y,
args.batch_size))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))

Expand All @@ -119,14 +69,18 @@ def main(argv):
}

predictions = classifier.predict(
input_fn=lambda:eval_input_fn(predict_x, batch_size=args.batch_size))
input_fn=lambda:iris_data.eval_input_fn(predict_x,
labels=None,
batch_size=args.batch_size))

for pred_dict, expec in zip(predictions, expected):
template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')

class_id = pred_dict['class_ids'][0]
probability = pred_dict['probabilities'][class_id]
print(template.format(SPECIES[class_id], 100 * probability, expec))

print(template.format(iris_data.SPECIES[class_id],
100 * probability, expec))


if __name__ == '__main__':
Expand Down