In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np

import json

print('Tensorflow Version: ', tf.__version__)
print('Pandas Version: ', pd.__version__)
print('Numpy Version: ', np.__version__)

Tensorflow Version:  1.8.0
Pandas Version:  0.22.0
Numpy Version:  1.14.2


In [2]:
DATA_PATH = './data/'
MODEL_PATH = './model/'

MORPH_QUERY_DATA_FILE = 'morph_query.npy'
COMMON_QUERY_DATA_FILE = 'common_query.npy'
INTENT_DATA_FILE = 'intent.npy'
DATA_SETTING_CONFIG = 'large_intent_data_map.json'

## 1. Read Dataset

In [3]:
TEST_SPLIT = 0.1
RNG_SEED = 13371447

In [4]:
morph_data = np.load(open(DATA_PATH + MORPH_QUERY_DATA_FILE, 'rb'))
common_data = np.load(open(DATA_PATH + COMMON_QUERY_DATA_FILE, 'rb'))
labels = np.load(open(DATA_PATH + INTENT_DATA_FILE, 'rb'))

In [5]:
from sklearn.model_selection import train_test_split

X = np.stack((morph_data, common_data), axis=1)
y = labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SPLIT, random_state=RNG_SEED)

morph_train = X_train[:,0]
common_train = X_train[:,1]
morph_test = X_test[:,0]
common_test = X_test[:,1]

## 2. Read Dataset Configs

In [6]:
json_data=open(DATA_PATH + DATA_SETTING_CONFIG).read()
data_configs = json.loads(json_data)
print('keys: ', list(data_configs.keys()))
print('intents: ', data_configs['intent'])

keys:  ['intent', 'pad_sym', 'unk_sym', 'common_vocab', 'morph_vocab']
intents:  {'뽀로로': 0, '감정/감각': 1, '요청/제안': 2, '인사': 3, '일상': 4, '칭찬': 5, '기타': 6, '불만': 7}


##  3. Define input funtion for tf.Estimator

In [7]:
def parse(x, y):
    return { 'query': x }, y

def train_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((morph_train, y_train))
    dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.map(parse)
    dataset = dataset.batch(32)
    dataset = dataset.repeat(1)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

def test_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((morph_test, y_test))
    dataset = dataset.map(parse)
    dataset = dataset.batch(32)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

## 4. Build Default Classification Model

In [8]:
LIMIT_SEQ_LEN = 15

WORD_EMBEDDING_DIM = 100
CONV_CHANNEL_DIM = 150
CONV_WINDOW_SIZE = 3
DNN_FEATURE_SIZE = 100

In [9]:
vocabulary_size = len(data_configs['morph_vocab'])
embedding_size = WORD_EMBEDDING_DIM

conv_channel_dim = CONV_CHANNEL_DIM
conv_window_size = CONV_WINDOW_SIZE

dense_dim = DNN_FEATURE_SIZE
num_classes = len(data_configs['intent'])

In [10]:
def basic_cnn_model_fn(features, labels, mode):
    word_embeddings = tf.get_variable('word_embeddings', [vocabulary_size, embedding_size])
    
    word_embedded_layer = tf.nn.embedding_lookup(word_embeddings, features['query'])
    conv_layer = tf.layers.conv1d(word_embedded_layer, conv_channel_dim, 
                                  conv_window_size, activation=tf.nn.relu,
                                  padding='same')
    max_pool_layer = tf.layers.max_pooling1d(conv_layer, LIMIT_SEQ_LEN, 1)
    dense_layer_1 = tf.layers.dense(max_pool_layer, dense_dim,
                              activation=tf.nn.relu)
    logit_layer = tf.layers.dense(dense_layer_1, num_classes)
    logit_layer = tf.squeeze(logit_layer, axis=1)
    
    prob_layer = tf.nn.softmax(logit_layer)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = { 'class': tf.argmax(prob_layer, 1) }
        export_outputs = {
          'prediction': tf.estimator.export.PredictOutput(predictions)
        }
        
        return tf.estimator.EstimatorSpec(
                  mode=mode,
                  predictions=predictions,
                  export_outputs=export_outputs)
    elif mode == tf.estimator.ModeKeys.EVAL:
        one_hot_labels = tf.one_hot(labels, num_classes)
        accuracy = tf.metrics.accuracy(labels, tf.argmax(prob_layer, 1))
        metrics = {'accuracy': accuracy}
        loss = tf.losses.softmax_cross_entropy(one_hot_labels, logit_layer)
        
        return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=metrics)
    else:
        one_hot_labels = tf.one_hot(labels, num_classes)
        global_step = tf.train.get_global_step()
        loss = tf.losses.softmax_cross_entropy(one_hot_labels, logit_layer)
        train_op = tf.train.AdamOptimizer(1e-3).minimize(loss, global_step)
        
        return tf.estimator.EstimatorSpec(
                  mode=mode,
                  train_op=train_op,
                  loss=loss)

In [11]:
est = tf.estimator.Estimator(basic_cnn_model_fn, model_dir=MODEL_PATH + 'basic_cnn_0')

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './model/basic_cnn_0', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f2294b8d860>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


## 5. Train and eval Model

In [18]:
est.train(train_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model/basic_cnn_0/model.ckpt-486
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 487 into ./model/basic_cnn_0/model.ckpt.
INFO:tensorflow:loss = 0.6312933, step = 486
INFO:tensorflow:global_step/sec: 439.458
INFO:tensorflow:loss = 0.16019377, step = 586 (0.228 sec)
INFO:tensorflow:Saving checkpoints for 648 into ./model/basic_cnn_0/model.ckpt.
INFO:tensorflow:Loss for final step: 0.1686485.


<tensorflow.python.estimator.estimator.Estimator at 0x7f2294b8d710>

In [19]:
est.evaluate(test_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2018-06-21-07:24:10
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from ./model/basic_cnn_0/model.ckpt-648
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Finished evaluation at 2018-06-21-07:24:10
INFO:tensorflow:Saving dict for global step 648: accuracy = 0.83652174, global_step = 648, loss = 0.53325325


{'accuracy': 0.83652174, 'loss': 0.53325325, 'global_step': 648}

In [20]:
pred = est.predict(test_input_fn)

## 6. Save Model for Serving

In [21]:
'''
def serving_input_receiver_fn():
    
    feature_spec = {
        'query': tf.FixedLenFeature([], dtype=tf.string)
    }
    
    default_batch_size = 1
    
    serialized_tf_example = tf.placeholder(
        dtype=tf.string, shape=[None],
        name='input_query_tensor')
    
    received_tensors = { 'query': serialized_tf_example }
    features = tf.parse_example(serialized_tf_example, feature_spec)
    
    fn = lambda query: tf.decode_raw(query, tf.int32)
    
    features['query'] = tf.map_fn(fn, features['query'], dtype=tf.int32)
    
    return tf.estimator.export.ServingInputReceiver(features, received_tensors)
'''

def serving_input_receiver_fn():
    receiver_tensor = {
        'query': tf.placeholder(dtype=tf.string, shape=[None])
    }

    features = {
        key: tensor
        for key, tensor in receiver_tensor.items()
    }
    fn = lambda query: tf.decode_raw(query, tf.int64)
    features['query'] = tf.map_fn(fn, features['query'], dtype=tf.int64)
    features['query'] = tf.reshape(features['query'], [-1, LIMIT_SEQ_LEN])

    return tf.estimator.export.ServingInputReceiver(features, receiver_tensor)

In [22]:
est.export_savedmodel(export_dir_base, serving_input_receiver_fn,
                      strip_default_attrs=True)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['prediction', 'serving_default']
INFO:tensorflow:Restoring parameters from ./model/basic_cnn_0/model.ckpt-648
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: b"./served_model/test_model/temp-b'1529565991'/saved_model.pb"


b'./served_model/test_model/1529565991'

In [23]:
predictor_fn = tf.contrib.predictor.from_saved_model(
    export_dir = "./served_model/test_model/1529565991",
    signature_def_key="serving_default"
)

INFO:tensorflow:Restoring parameters from b'./served_model/test_model/1529565991/variables/variables'


In [24]:
arr = np.array([ 691, 1723,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
    1], dtype=np.int64)
arr = arr.tostring()

In [25]:
output = predictor_fn({'query': [arr]})

In [26]:
output['class']

array([1])