NGrams processing functions are brought from [Keras Tutorial](https://github.com/keras-team/keras/blob/master/examples/imdb_fasttext.py)

In [1]:
from tqdm import tqdm
import tensorflow as tf
import numpy as np

In [2]:
class Config:
    ngram_range = 2
    max_features = 20000
    maxlen = 400
    batch_size = 32
    embedding_dims = 50
    epochs = 5

In [3]:
def create_ngram_set(input_list, ngram_value):
    return set(zip(*[input_list[i:] for i in range(ngram_value)]))


def build_ngram(x_train):
    ngram_set = set()
    for input_list in tqdm(x_train, total=len(x_train), ncols=70):
        for i in range(2, Config.ngram_range + 1):
            set_of_ngram = create_ngram_set(input_list, ngram_value=i)
            ngram_set.update(set_of_ngram)
    start_index = Config.max_features + 1
    token_indice = {v: k + start_index for k, v in enumerate(ngram_set)}
    indice_token = {token_indice[k]: k for k in token_indice}

    Config.max_features = np.max(list(indice_token.keys())) + 1
    return token_indice


def add_ngram(sequences, token_indice):
    new_sequences = []
    for input_list in tqdm(sequences, total=len(sequences), ncols=70):
        new_list = input_list[:]
        for ngram_value in range(2, Config.ngram_range + 1):
            for i in range(len(new_list) - ngram_value + 1):
                ngram = tuple(new_list[i:i + ngram_value])
                if ngram in token_indice:
                    new_list.append(token_indice[ngram])
        new_sequences.append(new_list)
    return new_sequences

In [4]:
def model_fn(features, labels, mode):
    x = tf.contrib.layers.embed_sequence(features, Config.max_features, Config.embedding_dims)
    logits = tf.layers.dense(tf.reduce_mean(x, 1), 2)
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode,
                                          predictions=tf.argmax(logits, -1))
    
    if mode == tf.estimator.ModeKeys.TRAIN:
        loss_op = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits,
            labels=labels))

        train_op = tf.train.AdamOptimizer().minimize(loss_op,
                                                     global_step=tf.train.get_global_step())
        
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss_op,
                                          train_op=train_op)

In [5]:
def main():
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.imdb.load_data(num_words=Config.max_features)
    token_indice = build_ngram(x_train)
    x_train = add_ngram(x_train, token_indice)
    x_test = add_ngram(x_test, token_indice)
    x_train = tf.keras.preprocessing.sequence.pad_sequences(x_train, Config.maxlen)
    x_test = tf.keras.preprocessing.sequence.pad_sequences(x_test, Config.maxlen)

    estimator = tf.estimator.Estimator(model_fn)

    for _ in range(Config.epochs):
        estimator.train(tf.estimator.inputs.numpy_input_fn(
            x = x_train,
            y = y_train,
            batch_size = Config.batch_size,
            shuffle = True))
        y_pred = np.fromiter(estimator.predict(tf.estimator.inputs.numpy_input_fn(
            x = x_test,
            batch_size = Config.batch_size,
            shuffle = False)), np.int32, count=len(x_test))
        print("\nValidation Accuracy: %.4f\n" % (y_pred==y_test).mean())

In [6]:
main()

100%|████████████████████████| 25000/25000 [00:02<00:00, 10699.32it/s]
100%|█████████████████████████| 25000/25000 [00:07<00:00, 3485.76it/s]
100%|█████████████████████████| 25000/25000 [00:06<00:00, 4045.23it/s]


INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x113b72f28>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /var/folders/sx/

INFO:tensorflow:loss = 0.030605339, step = 3229 (41.663 sec)
INFO:tensorflow:global_step/sec: 2.45992
INFO:tensorflow:loss = 0.013529511, step = 3329 (40.651 sec)
INFO:tensorflow:global_step/sec: 2.27158
INFO:tensorflow:loss = 0.014844401, step = 3429 (44.023 sec)
INFO:tensorflow:global_step/sec: 2.34276
INFO:tensorflow:loss = 0.025394464, step = 3529 (42.684 sec)
INFO:tensorflow:global_step/sec: 2.19481
INFO:tensorflow:loss = 0.020423576, step = 3629 (45.562 sec)
INFO:tensorflow:global_step/sec: 2.28575
INFO:tensorflow:loss = 0.013294871, step = 3729 (43.749 sec)
INFO:tensorflow:global_step/sec: 2.25662
INFO:tensorflow:loss = 0.00724068, step = 3829 (44.314 sec)
INFO:tensorflow:Saving checkpoints for 3910 into /var/folders/sx/fv0r97j96fz8njp14dt5g7940000gn/T/tmpx7fqoyl6/model.ckpt.
INFO:tensorflow:Loss for final step: 0.0055243005.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /va