Notebook written by [Zhedong Zheng](https://github.com/zhedongzheng)

![title](img/char_embed.gif)

In [1]:
from tqdm import tqdm
from collections import Counter

import tensorflow as tf
import numpy as np

In [2]:
VOCAB_SIZE = 5000
MAX_LEN = 400
BATCH_SIZE = 32
EMBED_DIM = 50
FILTERS = 250
N_CLASS = 2
N_EPOCH = 2
LR = {'start': 5e-3, 'end': 5e-4, 'steps': 1500}

In [3]:
def get_idx2word(_index_from=3):
    word2idx = tf.keras.datasets.imdb.get_word_index()
    word2idx = {k:(v+_index_from) for k,v in word2idx.items()}
    word2idx["<pad>"] = 0
    word2idx["<start>"] = 1
    word2idx["<unk>"] = 2
    idx2word = {idx: w for w, idx in word2idx.items()}
    return idx2word, word2idx


def make_feats(X, char2idx, word2idx, MAX_WORD_LEN):
    char_t = np.zeros([len(X), MAX_LEN, MAX_WORD_LEN], dtype=np.int32)
    word_t = np.zeros([len(X), MAX_LEN], dtype=np.int32)
    for i, sent in tqdm(enumerate(X), total=len(X), ncols=70):
        for j, w in enumerate(sent[-MAX_LEN:]):
            if j < MAX_LEN:
                word_t[i, j] = word2idx[''.join(w)]
                for k, c in enumerate(w):
                    char_t[i, j, k] = char2idx.get(c, char2idx['<unk>'])
    return {'char': char_t, 'word': word_t}

In [4]:
def char_embedding(x, params, batch_size):
    char2vec = tf.get_variable('char2vec', [params['char_vocab_size'], EMBED_DIM])
    char2vec = tf.concat([tf.zeros([1, EMBED_DIM]), char2vec[1:, :]], axis=0)
    x = tf.nn.embedding_lookup(char2vec, x)
    x = tf.reshape(x, [batch_size*MAX_LEN, params['max_word_len'], EMBED_DIM])
    x = tf.layers.conv1d(x, EMBED_DIM, kernel_size=5, activation=tf.nn.relu)
    x = tf.reduce_max(x, 1)
    x = tf.reshape(x, [batch_size, MAX_LEN, EMBED_DIM])
    return x


def word_embedding(x):
    word2vec = tf.get_variable('word2vec', [VOCAB_SIZE, EMBED_DIM])
    word2vec = tf.concat([tf.zeros([1, EMBED_DIM]), word2vec[1:, :]], axis=0)
    return tf.nn.embedding_lookup(word2vec, x)


def highway_layer(inputs):
    size = inputs.get_shape()[-1].value
    x1, x2 = tf.split(tf.layers.dense(inputs, 2*size), 2, -1)
    flow = tf.nn.relu(x1)
    gate = tf.sigmoid(x2)
    return gate * flow + (1 - gate) * inputs


def forward(feats, mode, params):
    is_training = (mode == tf.estimator.ModeKeys.TRAIN)
    batch_size = tf.shape(feats['char'])[0]
    
    x = tf.concat([word_embedding(feats['word']),
                   char_embedding(feats['char'], params, batch_size),], axis=-1)
    x = tf.layers.dropout(x, 0.2, training=is_training)
    x = highway_layer(x)
    
    feat_map = []
    for k_size in [3, 4, 5]:
        _x = tf.layers.conv1d(x, FILTERS, k_size, activation=tf.nn.relu)
        _x = tf.reduce_max(_x, 1)
        _x = tf.reshape(_x, (batch_size, FILTERS))
        feat_map.append(_x)
    x = tf.concat(feat_map, -1)
    
    x = tf.layers.dropout(x, 0.2, training=is_training)
    x = tf.layers.dense(x, FILTERS, tf.nn.relu)
    logits = tf.layers.dense(x, N_CLASS)
    return logits


def model_fn(features, labels, mode, params):
    logits = forward(features, mode, params)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        preds = tf.argmax(logits, -1)
        return tf.estimator.EstimatorSpec(mode, predictions=preds)
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        global_step = tf.train.get_global_step()

        lr_op = tf.train.exponential_decay(LR['start'],
                                           global_step,
                                           LR['steps'],
                                           LR['end']/LR['start'])

        loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits,
            labels=labels))

        train_op = tf.train.AdamOptimizer(lr_op).minimize(loss_op,
                                                          global_step=global_step)

        lth = tf.train.LoggingTensorHook({'lr': lr_op},
                                          every_n_iter=100)
        
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss_op,
                                          train_op=train_op,
                                          training_hooks=[lth])

In [5]:
def main():
    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=VOCAB_SIZE)
    idx2word, word2idx = get_idx2word()

    MAX_WORD_LEN = 0
    all_words = ''
    X_train_chars = []
    for x in tqdm(X_train, total=len(X_train), ncols=70):
        temp = []
        for idx in x:
            word = idx2word[idx]
            all_words += (word+' ')
            if len(word) > MAX_WORD_LEN:
                MAX_WORD_LEN = len(word)
            temp.append(list(word))
        X_train_chars.append(temp)

    X_test_chars = [[list(idx2word[w]) for w in x] for x in tqdm(X_test,
                                                                 total=len(X_test),
                                                                 ncols=70)]

    char2idx = {k: i+2 for i, (k, v) in enumerate(Counter(list(all_words)).most_common())}
    char2idx['<pad>'] = 0
    char2idx['<unk>'] = 1

    X_train_feat = make_feats(X_train_chars, char2idx, word2idx, MAX_WORD_LEN)
    X_test_feat = make_feats(X_test_chars, char2idx, word2idx, MAX_WORD_LEN)
    
    
    estimator = tf.estimator.Estimator(model_fn, params={'char_vocab_size': len(char2idx),
                                                         'max_word_len': MAX_WORD_LEN,})

    for _ in range(N_EPOCH):
        estimator.train(tf.estimator.inputs.numpy_input_fn(
            x = X_train_feat, y = y_train,
            batch_size = BATCH_SIZE,
            shuffle = True))
        y_pred = np.fromiter(estimator.predict(tf.estimator.inputs.numpy_input_fn(
            x = X_test_feat,
            batch_size = BATCH_SIZE,
            shuffle = False)), np.int32)
        print("\nValidation Accuracy: %.4f\n" % (y_pred==y_test).mean())

In [6]:
main()

100%|█████████████████████████| 25000/25000 [00:12<00:00, 2042.53it/s]
100%|█████████████████████████| 25000/25000 [00:07<00:00, 3387.85it/s]
100%|█████████████████████████| 25000/25000 [00:12<00:00, 1946.03it/s]
100%|█████████████████████████| 25000/25000 [00:12<00:00, 1953.61it/s]

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpso3fv26t', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x12517acf8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}





INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpso3fv26t/model.ckpt.
INFO:tensorflow:loss = 0.68802404, step = 1
INFO:tensorflow:lr = 0.005
INFO:tensorflow:global_step/sec: 1.2996
INFO:tensorflow:loss = 0.44669238, step = 101 (76.948 sec)
INFO:tensorflow:lr = 0.004288479 (76.948 sec)
INFO:tensorflow:global_step/sec: 1.32699
INFO:tensorflow:loss = 0.35037428, step = 201 (75.359 sec)
INFO:tensorflow:lr = 0.0036782112 (75.359 sec)
INFO:tensorflow:global_step/sec: 1.29582
INFO:tensorflow:loss = 0.24762842, step = 301 (77.171 sec)
INFO:tensorflow:lr = 0.0031547868 (77.171 sec)
INFO:tensorflow:global_step/sec: 1.31759
INFO:tensorflow:loss = 0.39225727, step = 401 (75.896 sec)
INFO:tensorflow:lr = 0.0027058476 