In [None]:
import tensorflow as tf
import numpy as np

In [None]:
def feature_columns(vocab_size, embedding_dim):
    """
    return the feature columns which are inferred from the feature and dict path.
    :return:
    """
    column = tf.feature_column.categorical_column_with_identity(key="text",
                                                                             num_buckets=vocab_size)
    feature_columns = [tf.feature_column.embedding_column(categorical_column=column, dimension=embedding_dim)]
    return feature_columns

def text_line_dataset(dataset_path, vocab_dictionary_path, vocab_size, batch_size=64, repeat_count=50, shuffle=True):
    """
    Efficient and streaming way of reading records line by line from the given dataset path.

    :param dataset_path:
    :param batch_size:
    :param repeat_count:
    :param shuffle:
    :return:
    """
    dataset = tf.data.TextLineDataset(dataset_path)

    index_table_from_file = tf.contrib.lookup.index_table_from_file(vocab_dictionary_path,
                                                                    default_value = vocab_size - 1,
                                                                    key_column_index=0)

    # convert every line to features with batching and repetition
    dataset = dataset.map(
        lambda line: to_feature_label_for_train_dev(line, index_table_from_file))
    if shuffle:
        dataset = dataset.shuffle(batch_size)
    dataset = dataset.repeat(repeat_count).batch(batch_size)
    return dataset

def to_feature_label_for_train_dev(input, index_table_from_file):
    """
    returns feature, label for training
    :param input:
    :param index_table_from_file:
    :return:
    """
    input_split = tf.string_split([input], "|").values
    feature = to_feature(input_split, index_table_from_file)
    # labels must be string type and have any value in label_vocabulary
    label = input_split[-1]
    return feature, label


def to_feature(input, lookup_table: tf.contrib.lookup.LookupInterface, export=False):
    """
    returns map of {categorical_feature -> sparse tensor where tokens converted into indices using lookup
                    numerical_feature -> numerical tensor}
    :param input:
    :param lookup_table:
    :return:
    """
    feature = {}
    if export:
        split_string = tf.string_split(input[0], delimiter=" ")
    else:
        split_string = tf.string_split([input[0]], delimiter=" ")
    sparse_tensor = tf.SparseTensor(indices=split_string.indices,
                                    values=split_string.values,
                                    dense_shape=split_string.dense_shape)

    feature["text"] = lookup_table.lookup(sparse_tensor)  # generate indexes for vocabularies
    return feature


def serving_input_receiver_fn_decorator(vocab_dictionary_path, vocab_size):

    def serving_input_receiver_fn():
        """
        serialization function to convert models to tf saved model format
        :return:
        """
        csv_row = tf.placeholder(dtype=tf.string, shape=[None], name='input_csv_tensor')
        receiver_tensors = {'input': csv_row}
        #one feature in toy example
        default_values_for_csv = [[''] for i in range(1)]
        columns = tf.decode_csv(csv_row, default_values_for_csv, "\t", use_quote_delim=False)

        index_table_from_file = tf.contrib.lookup.index_table_from_file(vocab_dictionary_path,
                                                                        default_value= vocab_size - 1, 
                                                                        key_column_index=0)

        return tf.estimator.export.ServingInputReceiver(
            to_feature(columns, index_table_from_file, export=True),
            receiver_tensors)

    return serving_input_receiver_fn

In [None]:
vocab_dictionary_path = "dict_example.txt"
train_path = "train_example.txt"
dev_path = "dev_example.txt"
with open(vocab_dictionary_path, "r") as f:
    vocab_size = len(f.readlines()) + 1 #+1 for unknown  
embedding_dim = 2

classifier = tf.estimator.DNNClassifier(feature_columns=feature_columns(vocab_size, embedding_dim),
                                                model_dir="test_model_dnn",
                                                hidden_units=[64],
                                                n_classes=3,
                                                label_vocabulary=["positive", "negative", "neutral"],
                                                )

train_spec = tf.estimator.TrainSpec(
    input_fn=lambda: text_line_dataset(dataset_path = train_path, 
                                       vocab_dictionary_path = vocab_dictionary_path, 
                                       vocab_size = vocab_size,
                                       batch_size = 4, 
                                       repeat_count = 10),
                                       max_steps=1000) 
eval_spec = tf.estimator.EvalSpec(
    input_fn=lambda: text_line_dataset(dataset_path = dev_path, 
                                               vocab_dictionary_path = vocab_dictionary_path,
                                               vocab_size = vocab_size,
                                               batch_size = 2048, 
                                               repeat_count = 1, 
                                               shuffle = False), 
                                               steps=None)


In [None]:
tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec)

In [None]:
estimator_model_predictions = classifier.predict(input_fn=lambda: text_line_dataset(dataset_path = dev_path, 
                                               vocab_dictionary_path = vocab_dictionary_path,
                                               vocab_size = vocab_size,
                                               batch_size = 2048, 
                                               repeat_count = 1, 
                                               shuffle = False))

In [None]:
for pred in estimator_model_predictions:
    print(pred)

In [None]:
class Inference:
    """
    Inference object which initializes a tensorflow saved model and provides predict API.
    """

    def __init__(self, saved_model_dir):
        self.sess = tf.Session(graph=tf.Graph())
        for m in tf.gfile.ListDirectory(saved_model_dir):
            if m != 'variables' and tf.gfile.IsDirectory(saved_model_dir + '/' + m):
                print('Using Saved Folder version: %s' % (m))
                saved_model_dir = saved_model_dir + '/' + m
        metagraph = tf.saved_model.loader.load(self.sess, [tf.saved_model.tag_constants.SERVING], saved_model_dir)

        self.output_score_field_name = dict(metagraph.signature_def['serving_default'].outputs)['scores'].name
        self.output_classes_field_name = dict(metagraph.signature_def['serving_default'].outputs)['classes'].name
        self.input_field_name = dict(metagraph.signature_def['serving_default'].inputs)['inputs'].name

    def predict(self, input):
        return self.sess.run([self.output_score_field_name, self.output_classes_field_name],
                             feed_dict={
                                 self.input_field_name: [input]})


In [None]:
classifier.export_saved_model("savedmodel_dnn", 
                              serving_input_receiver_fn=serving_input_receiver_fn_decorator(
                                     vocab_dictionary_path = vocab_dictionary_path,
                                     vocab_size = vocab_size)
                             )

In [None]:
saved_model = Inference("savedmodel_dnn")
with open(dev_path) as f:
    for line in f.readlines():
        text = line.split("|")[0]
        label = line.split("|")[1][:-1]
        prediction = saved_model.predict(text)
        print(prediction)