In [None]:
import tensorflow as tf
from tensorflow.contrib import layers
from tensorflow.contrib.learn import *
from tensorflow.contrib import seq2seq
from tensorflow.python.estimator.inputs import numpy_io
import pickle
import numpy as np
import math
import pandas as pd

tf.logging.set_verbosity(tf.logging.INFO)

In [None]:
def word2vec(features, labels, mode, params):
    target = features['target']

    with tf.variable_scope("emb"):
        target_weight = tf.get_variable("target_w",
                                        initializer=tf.random_uniform([params['num_words'], params['emb_size']], -1.0,
                                                                      1.0))
        context_weight = tf.get_variable("context_w",
                                         initializer=tf.truncated_normal([params['num_words'], params['emb_size']]))
        context_bias = tf.get_variable("context_b", initializer=tf.zeros(params['num_words']))

    target_emb = tf.nn.embedding_lookup(target_weight, target)
    loss = tf.reduce_mean(
        tf.nn.sampled_softmax_loss(weights=context_weight,
                                   biases=context_bias,
                                   labels=tf.expand_dims(labels, 1),
                                   inputs=target_emb,
                                   num_sampled=params['num_negative'],
                                   num_classes=params['num_words'],
                                   remove_accidental_hits=True))

    for v in tf.trainable_variables():
        tf.summary.histogram(v.name.replace(":", ''), v)

    train_op = layers.optimize_loss(
        loss=loss,
        global_step=tf.contrib.framework.get_global_step(),
        learning_rate=params["learning_rate"],
        optimizer=tf.train.AdagradOptimizer,
        summaries=[
            "learning_rate",
            "loss",
            "gradients",
            "gradient_norm",
        ])
    return ModelFnOps(mode=mode, predictions=None, train_op=train_op, loss=loss)

In [None]:
with open('_word2vec_data/target_list_novel', 'rb') as fp:
    target_list = pickle.load(fp)
with open('_word2vec_data/context_list_novel', 'rb') as fp:
    context_list = pickle.load(fp)
with open('_word2vec_data/indexer_novel', 'rb') as fp:
    indexer = pickle.load(fp)

x = {'target': np.array(target_list)}
y = np.array(context_list)

model_params = dict(num_words=len(indexer), emb_size=64, num_negative=64, learning_rate=1.0)
input_fn = numpy_io.numpy_input_fn(x, y, batch_size=512, shuffle=True, num_epochs=None)
rnn_model = Estimator(model_fn=word2vec
                      , params=model_params
                      , model_dir="model/_word2vec"
                      , config=RunConfig(save_checkpoints_secs=30,
                                         keep_checkpoint_max=2))

rnn_model.fit(input_fn=input_fn, steps=100000)