In [1]:
import os
from datetime import datetime
import tensorflow as tf
import numpy as np
import json
from sklearn.model_selection import train_test_split

In [2]:
DATA_DIR = './dataset/'
INPUT_TRAIN_DATA = 'train.npy'
LABEL_TRAIN_DATA = 'train_label.npy'
DATA_CONFIGS = 'data_configs.json'

input_data = np.load(open(DATA_DIR + INPUT_TRAIN_DATA, 'rb'))
label_data = np.load(open(DATA_DIR + LABEL_TRAIN_DATA, 'rb'))
prepro_configs = json.load(open(DATA_DIR + DATA_CONFIGS, 'r'))

In [3]:
TEST_SPLIT = 0.1
RNG_SEED = 13371447
VOCAB_SIZE = prepro_configs['vocab_size']
EMB_SIZE = 128
BATCH_SIZE = 16
NUM_EPOCHS = 2

input_train, input_eval, label_train, label_eval = train_test_split(input_data, label_data, test_size=TEST_SPLIT, random_state=RNG_SEED)

In [4]:
def mapping_fn(X, Y):
    input, label = {'x': X}, Y
    return input, label

def train_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((input_train, label_train))
    dataset = dataset.shuffle(buffer_size=len(input_train))
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.map(mapping_fn)
    dataset = dataset.repeat(count=NUM_EPOCHS)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

def eval_input_fn():
    dataset = tf.data.Dataset.from_tensor_slices((input_eval, label_eval))
    dataset = dataset.batch(128)
    dataset = dataset.map(mapping_fn)
    iterator = dataset.make_one_shot_iterator()
    
    return iterator.get_next()

In [5]:
CONV_FEATURE_DIM = 128
CONV_WINDOW_SIZE = 3
FC_FEATURE_DIM = 128

NUM_CONV_LAYERS = 5
NUM_FC_LAYERS = 10

In [14]:
def model_fn(features, labels, mode):
    TRAIN = mode == tf.estimator.ModeKeys.TRAIN
    EVAL = mode == tf.estimator.ModeKeys.EVAL
    PREDICT = mode == tf.estimator.ModeKeys.PREDICT
    
    def conv_block(inputs):
        conv_layer = tf.keras.layers.Conv1D(CONV_FEATURE_DIM, 
                                            CONV_WINDOW_SIZE,  
                                            padding='same')(inputs)

        glu_layer = tf.keras.layers.Dense(CONV_FEATURE_DIM * 2, 
                                             activation=tf.nn.relu)(conv_layer)

        scored_output, output_layer = tf.split(glu_layer, 2, axis=-1)

        output_layer = output_layer * tf.nn.sigmoid(scored_output)

        return output_layer

    embedding_layer = tf.keras.layers.Embedding(VOCAB_SIZE,EMB_SIZE)(features['x'])
    embedding_layer = tf.keras.layers.Dropout(0.2)(embedding_layer)

    with tf.variable_scope('conv_layers'):
        for i in range(NUM_CONV_LAYERS):
            input_layer = conv_output_layer if i > 0 else embedding_layer
            conv_output_layer = conv_block(input_layer)
            conv_output_layer = tf.keras.layers.Dropout(0.2)(input_layer + conv_output_layer)
    
    flatten_layer = tf.keras.layers.Flatten()(conv_output_layer)
    flatten_layer = tf.keras.layers.Dense(FC_FEATURE_DIM, activation=tf.nn.relu)(flatten_layer)
    with tf.variable_scope('dense_layers'):
        for i in range(NUM_FC_LAYERS):
            input_layer = fc_output_layer if i > 0 else flatten_layer
            fc_output_layer = tf.keras.layers.Dense(FC_FEATURE_DIM, activation=tf.nn.relu)(input_layer)
            fc_output_layer = tf.keras.layers.Dropout(0.2)(input_layer + fc_output_layer)

    logits = tf.keras.layers.Dense(1)(fc_output_layer)
    
    if PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode,
            predictions={
                'prob': tf.round(tf.nn.sigmoid(logits))
            })
    
    labels = tf.reshape(labels, [-1, 1])
    
    if TRAIN:
        global_step = tf.train.get_global_step()
        loss = tf.losses.sigmoid_cross_entropy(labels, logits)
        train_op = tf.train.AdamOptimizer(0.001).minimize(loss, global_step)

        return tf.estimator.EstimatorSpec(mode=mode, train_op=train_op, loss = loss)
    
    if EVAL:
        loss = tf.losses.sigmoid_cross_entropy(labels, logits)
        pred = tf.nn.sigmoid(logits)
        accuracy = tf.metrics.accuracy(labels, tf.round(pred))
        f1_score = tf.contrib.metrics.f1_score(labels, tf.round(pred))
        return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops={'f1 score': f1_score, 'acc': accuracy})

In [15]:
est = tf.estimator.Estimator(model_fn, model_dir="model/checkpoint/cnn_model")

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': 'model/checkpoint/cnn_model', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f2ffbe756d8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [None]:
est.train(train_input_fn)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from model/checkpoint/cnn_model/model.ckpt-146940
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 146940 into model/checkpoint/cnn_model/model.ckpt.
INFO:tensorflow:loss = 0.08425193, step = 146940
INFO:tensorflow:global_step/sec: 33.8421
INFO:tensorflow:loss = 0.01668611, step = 147040 (2.953 sec)
INFO:tensorflow:global_step/sec: 84.0883
INFO:tensorflow:loss = 0.07093211, step = 147140 (1.187 sec)
INFO:tensorflow:global_step/sec: 84.7739
INFO:tensorflow:loss = 0.036902614, step = 147240 (1.179 sec)
INFO:tensorflow:global_step/sec: 84.913
INFO:tensorflow:loss = 0.007628917, step = 147340 (1.178 sec)
INFO:tensorflow:global_step/sec: 84.5102
INFO:tensorflow:loss = 0.054902844, step = 147440 (1.183 sec)
INFO:tensorflow:global_st

INFO:tensorflow:global_step/sec: 85.8955
INFO:tensorflow:loss = 0.016440416, step = 154640 (1.164 sec)
INFO:tensorflow:global_step/sec: 85.4355
INFO:tensorflow:loss = 0.12714262, step = 154740 (1.171 sec)
INFO:tensorflow:global_step/sec: 84.5044
INFO:tensorflow:loss = 0.026652317, step = 154840 (1.184 sec)
INFO:tensorflow:global_step/sec: 84.3251
INFO:tensorflow:loss = 0.0062889913, step = 154940 (1.186 sec)
INFO:tensorflow:global_step/sec: 85.204
INFO:tensorflow:loss = 0.090263754, step = 155040 (1.174 sec)
INFO:tensorflow:global_step/sec: 83.4713
INFO:tensorflow:loss = 0.08191228, step = 155140 (1.198 sec)
INFO:tensorflow:global_step/sec: 84.2925
INFO:tensorflow:loss = 0.064139865, step = 155240 (1.186 sec)
INFO:tensorflow:global_step/sec: 84.2105
INFO:tensorflow:loss = 0.045036137, step = 155340 (1.188 sec)
INFO:tensorflow:global_step/sec: 84.6669
INFO:tensorflow:loss = 0.007407814, step = 155440 (1.181 sec)
INFO:tensorflow:global_step/sec: 84.766
INFO:tensorflow:loss = 0.030891597,

INFO:tensorflow:loss = 0.028539754, step = 162640 (1.178 sec)
INFO:tensorflow:global_step/sec: 84.6581
INFO:tensorflow:loss = 0.25112128, step = 162740 (1.181 sec)
INFO:tensorflow:global_step/sec: 83.3657
INFO:tensorflow:loss = 0.011133021, step = 162840 (1.200 sec)
INFO:tensorflow:global_step/sec: 83.5187
INFO:tensorflow:loss = 0.0063382294, step = 162940 (1.197 sec)
INFO:tensorflow:global_step/sec: 83.2182
INFO:tensorflow:loss = 0.27162072, step = 163040 (1.202 sec)
INFO:tensorflow:global_step/sec: 84.8073
INFO:tensorflow:loss = 0.15143594, step = 163140 (1.179 sec)
INFO:tensorflow:global_step/sec: 84.4686
INFO:tensorflow:loss = 0.0030928946, step = 163240 (1.184 sec)
INFO:tensorflow:global_step/sec: 82.9541
INFO:tensorflow:loss = 0.042581107, step = 163340 (1.205 sec)
INFO:tensorflow:global_step/sec: 84.2211
INFO:tensorflow:loss = 0.016209828, step = 163440 (1.187 sec)
INFO:tensorflow:global_step/sec: 84.3917
INFO:tensorflow:loss = 0.09609238, step = 163540 (1.185 sec)
INFO:tensorfl

INFO:tensorflow:global_step/sec: 83.8174
INFO:tensorflow:loss = 0.02877456, step = 170740 (1.193 sec)
INFO:tensorflow:global_step/sec: 85.1869
INFO:tensorflow:loss = 0.0033320761, step = 170840 (1.174 sec)
INFO:tensorflow:global_step/sec: 84.4836
INFO:tensorflow:loss = 0.30157098, step = 170940 (1.184 sec)
INFO:tensorflow:global_step/sec: 83.9672
INFO:tensorflow:loss = 0.14554888, step = 171040 (1.191 sec)
INFO:tensorflow:global_step/sec: 85.3537
INFO:tensorflow:loss = 0.053450085, step = 171140 (1.172 sec)
INFO:tensorflow:global_step/sec: 85.0111
INFO:tensorflow:loss = 0.086111456, step = 171240 (1.177 sec)
INFO:tensorflow:global_step/sec: 85.3836
INFO:tensorflow:loss = 0.077791885, step = 171340 (1.171 sec)
INFO:tensorflow:global_step/sec: 84.7708
INFO:tensorflow:loss = 0.05893826, step = 171440 (1.180 sec)
INFO:tensorflow:global_step/sec: 85.8423
INFO:tensorflow:loss = 0.07978898, step = 171540 (1.165 sec)
INFO:tensorflow:global_step/sec: 82.9819
INFO:tensorflow:loss = 0.04894957, s

INFO:tensorflow:loss = 0.15949497, step = 178740 (1.179 sec)
INFO:tensorflow:global_step/sec: 84.5022
INFO:tensorflow:loss = 0.09710441, step = 178840 (1.183 sec)
INFO:tensorflow:global_step/sec: 84.6338
INFO:tensorflow:loss = 0.34367982, step = 178940 (1.182 sec)
INFO:tensorflow:global_step/sec: 84.9542
INFO:tensorflow:loss = 0.15156491, step = 179040 (1.177 sec)
INFO:tensorflow:global_step/sec: 84.5161
INFO:tensorflow:loss = 0.027570723, step = 179140 (1.183 sec)
INFO:tensorflow:global_step/sec: 83.5071
INFO:tensorflow:loss = 0.034573887, step = 179240 (1.198 sec)
INFO:tensorflow:global_step/sec: 84.2978
INFO:tensorflow:loss = 0.049248323, step = 179340 (1.186 sec)
INFO:tensorflow:global_step/sec: 81.6918
INFO:tensorflow:loss = 0.0018585443, step = 179440 (1.224 sec)
INFO:tensorflow:global_step/sec: 84.1726
INFO:tensorflow:loss = 0.1767934, step = 179540 (1.189 sec)
INFO:tensorflow:global_step/sec: 84.6532
INFO:tensorflow:loss = 0.13900515, step = 179640 (1.181 sec)
INFO:tensorflow:g

INFO:tensorflow:global_step/sec: 84.3264
INFO:tensorflow:loss = 0.12959619, step = 186840 (1.186 sec)
INFO:tensorflow:global_step/sec: 85.1728
INFO:tensorflow:loss = 0.07672082, step = 186940 (1.174 sec)
INFO:tensorflow:global_step/sec: 83.3453
INFO:tensorflow:loss = 0.12973146, step = 187040 (1.200 sec)
INFO:tensorflow:global_step/sec: 83.5399
INFO:tensorflow:loss = 0.04337109, step = 187140 (1.197 sec)
INFO:tensorflow:global_step/sec: 83.1951
INFO:tensorflow:loss = 0.004919186, step = 187240 (1.202 sec)
INFO:tensorflow:global_step/sec: 83.1713
INFO:tensorflow:loss = 0.16649432, step = 187340 (1.202 sec)
INFO:tensorflow:global_step/sec: 85.9417
INFO:tensorflow:loss = 0.04743892, step = 187440 (1.164 sec)
INFO:tensorflow:global_step/sec: 84.9869
INFO:tensorflow:loss = 0.090840444, step = 187540 (1.177 sec)
INFO:tensorflow:global_step/sec: 81.5927
INFO:tensorflow:loss = 0.09274569, step = 187640 (1.226 sec)
INFO:tensorflow:global_step/sec: 83.2817
INFO:tensorflow:loss = 0.078794986, ste

INFO:tensorflow:loss = 0.10505436, step = 194840 (1.200 sec)
INFO:tensorflow:global_step/sec: 83.4163
INFO:tensorflow:loss = 0.04174133, step = 194940 (1.199 sec)
INFO:tensorflow:global_step/sec: 84.3968
INFO:tensorflow:loss = 0.114546396, step = 195040 (1.185 sec)
INFO:tensorflow:global_step/sec: 83.2021
INFO:tensorflow:loss = 0.044077143, step = 195140 (1.202 sec)
INFO:tensorflow:global_step/sec: 85.0137
INFO:tensorflow:loss = 0.07208125, step = 195240 (1.176 sec)
INFO:tensorflow:global_step/sec: 83.7111
INFO:tensorflow:loss = 0.16140527, step = 195340 (1.194 sec)
INFO:tensorflow:global_step/sec: 85.6291
INFO:tensorflow:loss = 0.006968804, step = 195440 (1.168 sec)
INFO:tensorflow:global_step/sec: 86.1963
INFO:tensorflow:loss = 0.093469694, step = 195540 (1.160 sec)
INFO:tensorflow:global_step/sec: 84.8164
INFO:tensorflow:loss = 0.31785426, step = 195640 (1.179 sec)
INFO:tensorflow:global_step/sec: 84.2611
INFO:tensorflow:loss = 0.0742702, step = 195740 (1.187 sec)
INFO:tensorflow:gl

INFO:tensorflow:global_step/sec: 85.7293
INFO:tensorflow:loss = 0.032821395, step = 202840 (1.166 sec)
INFO:tensorflow:global_step/sec: 85.129
INFO:tensorflow:loss = 0.37202585, step = 202940 (1.175 sec)
INFO:tensorflow:global_step/sec: 83.4587
INFO:tensorflow:loss = 0.027026251, step = 203040 (1.198 sec)
INFO:tensorflow:global_step/sec: 82.8853
INFO:tensorflow:loss = 0.020628944, step = 203140 (1.206 sec)
INFO:tensorflow:global_step/sec: 85.1833
INFO:tensorflow:loss = 0.008337541, step = 203240 (1.174 sec)
INFO:tensorflow:global_step/sec: 84.6769
INFO:tensorflow:loss = 0.10406564, step = 203340 (1.181 sec)
INFO:tensorflow:global_step/sec: 85.2434
INFO:tensorflow:loss = 0.030613068, step = 203440 (1.173 sec)
INFO:tensorflow:global_step/sec: 82.2769
INFO:tensorflow:loss = 0.029210746, step = 203540 (1.215 sec)
INFO:tensorflow:global_step/sec: 84.7719
INFO:tensorflow:loss = 0.102139354, step = 203640 (1.180 sec)
INFO:tensorflow:global_step/sec: 84.8283
INFO:tensorflow:loss = 0.08465587, 

INFO:tensorflow:loss = 0.064495906, step = 210840 (1.191 sec)
INFO:tensorflow:global_step/sec: 84.5775
INFO:tensorflow:loss = 0.015865287, step = 210940 (1.182 sec)
INFO:tensorflow:global_step/sec: 85.5458
INFO:tensorflow:loss = 0.034805648, step = 211040 (1.169 sec)
INFO:tensorflow:global_step/sec: 83.3557
INFO:tensorflow:loss = 0.038211618, step = 211140 (1.200 sec)
INFO:tensorflow:global_step/sec: 85.3004
INFO:tensorflow:loss = 0.009046361, step = 211240 (1.172 sec)
INFO:tensorflow:global_step/sec: 85.8343
INFO:tensorflow:loss = 0.09937134, step = 211340 (1.165 sec)
INFO:tensorflow:global_step/sec: 84.6713
INFO:tensorflow:loss = 0.04441447, step = 211440 (1.181 sec)
INFO:tensorflow:global_step/sec: 82.5464
INFO:tensorflow:loss = 0.010681304, step = 211540 (1.211 sec)
INFO:tensorflow:global_step/sec: 82.8586
INFO:tensorflow:loss = 0.0841535, step = 211640 (1.207 sec)
INFO:tensorflow:global_step/sec: 84.9889
INFO:tensorflow:loss = 0.02969918, step = 211740 (1.177 sec)
INFO:tensorflow:

INFO:tensorflow:global_step/sec: 83.2452
INFO:tensorflow:loss = 0.02982509, step = 218940 (1.201 sec)
INFO:tensorflow:global_step/sec: 82.1336
INFO:tensorflow:loss = 0.022373607, step = 219040 (1.218 sec)
INFO:tensorflow:global_step/sec: 83.3037
INFO:tensorflow:loss = 0.03461714, step = 219140 (1.201 sec)
INFO:tensorflow:global_step/sec: 82.7251
INFO:tensorflow:loss = 0.008878028, step = 219240 (1.209 sec)
INFO:tensorflow:global_step/sec: 83.4653
INFO:tensorflow:loss = 0.017635757, step = 219340 (1.198 sec)
INFO:tensorflow:global_step/sec: 83.7256
INFO:tensorflow:loss = 0.010394919, step = 219440 (1.194 sec)
INFO:tensorflow:global_step/sec: 83.713
INFO:tensorflow:loss = 0.054875568, step = 219540 (1.195 sec)
INFO:tensorflow:global_step/sec: 80.7716
INFO:tensorflow:loss = 0.1433121, step = 219640 (1.239 sec)
INFO:tensorflow:global_step/sec: 84.4731
INFO:tensorflow:loss = 0.0026872908, step = 219740 (1.183 sec)
INFO:tensorflow:global_step/sec: 84.493
INFO:tensorflow:loss = 0.22684877, st

In [None]:
est.evaluate(eval_input_fn)