In [1]:
from load_embedding import load_embedding
import numpy as np
from math import floor
import tensorflow as tf
from utils import *
import warnings
warnings.simplefilter("ignore")

data_dir = "./data/"

word_embeddings = "wordembeddings-dim100.word2vec"

sentences_train_file = "sentences.train"
sentences_eval_file = "sentences.eval"
sentences_continuation_file = "sentences.continuation"
sentences_test_file = "sentences_test.txt"

print("tf_version:\t" + tf.__version__)



tf_version:	1.10.0


In [8]:
EMBEDDING_DIM = 100
VOCABULARY_SIZE = 2000

vocabulary, word_to_idx, idx_to_word = create_vocabulary(data_dir + sentences_train_file, VOCABULARY_SIZE)
X_train = create_dataset(data_dir + sentences_train_file, word_to_idx)
X_test = create_dataset(data_dir + sentences_test_file, word_to_idx)

In [5]:
emb = tf.Variable(np.empty((VOCABULARY_SIZE, EMBEDDING_DIM), dtype=np.float32), collections=[])

with tf.Session() as session:
    load_embedding(session, word_to_idx, emb, data_dir + word_embeddings, EMBEDDING_DIM, VOCABULARY_SIZE)
    
    
    

Loading external embeddings from ./data/wordembeddings-dim100.word2vec
<bos> not in embedding file
<eos> not in embedding file
<pad> not in embedding file
<unk> not in embedding file
1996 words out of 20000 could be loaded


In [9]:
from gensim import models

model = models.KeyedVectors.load_word2vec_format(data_dir + word_embeddings, binary=False)  

In [22]:
model.wv.vectors.shape

model.wv.vectors.mean(axis=1).mean()
model.wv.vectors.std(axis=1).mean()

1.0588529

In [None]:
print(X_train.shape)
print(X_test.shape)

In [None]:
#------------------------------------------------------------------------------------------------------------------------------#
# PARAMETERS
BATCH_SIZE = 64
LEARNING_RATE = 0.001
MAX_GRAD_NORM = 5.0
NUM_EPOCHS = 50

EMBEDDING_DIM = 100
STATE_DIM = 512
VOCABULARY_SIZE = 20000

sent_dim = X_train.shape[1]
num_train = X_train.shape[0]
num_test = X_test.shape[0]

#------------------------------------------------------------------------------------------------------------------------------#
tf.reset_default_graph()
    
# Initializer
initializer = tf.contrib.layers.xavier_initializer()

# Parameters
W = tf.get_variable("W", shape=[STATE_DIM, VOCABULARY_SIZE], initializer=initializer, trainable=True)
E = tf.get_variable("E", shape=[VOCABULARY_SIZE, EMBEDDING_DIM], initializer=initializer, trainable=True)

# Placeholders
X = tf.placeholder(tf.int32, (None, sent_dim))

# LSTM initialization
LSTM = tf.nn.rnn_cell.BasicLSTMCell(num_units=STATE_DIM)
state_c, state_h = LSTM.zero_state(batch_size=BATCH_SIZE, dtype=tf.float32)

losses = []

# RNN forward pass
for t in range(0, sent_dim - 1):
    X_t = X[:, t]
    y_t = X[:, t+1]
    
    X_t = tf.one_hot(X_t, depth = VOCABULARY_SIZE)
    E_t = tf.matmul(X_t, E)

    output, (state_c, state_h) = LSTM(inputs=E_t, state=(state_c, state_h))
    logits = tf.matmul(output, W)
    
    loss_t = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_t, logits=logits)
    
    losses.append(loss_t)
    
losses = tf.reduce_sum(tf.stack(losses),axis=1)
loss = tf.reduce_mean(losses)

optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
optimizer = tf.contrib.estimator.clip_gradients_by_norm(optimizer, clip_norm=MAX_GRAD_NORM)
optimize_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()

#------------------------------------------------------------------------------------------------------------------------------#
# SESSION
np.random.seed(12345)

batches_per_epoch = 20

with tf.Session() as session:

    session.run(init)

    for epoch in range(NUM_EPOCHS):
        epoch_loss = 0
        
        print('epoch\t%4d' % epoch + 1)
        
        for idx in range(batches_per_epoch):
            batch_loss, _ = session.run([loss, optimize_op],
                                    feed_dict={X: X_test[(idx*BATCH_SIZE):((idx+1)*BATCH_SIZE)]}
                                    )
            epoch_loss += batch_loss
            print('\tbatch %4d\t%.2f' % (idx + 1, batch_loss))
        
#         if epoch + 1 % 2 == 0:
        print('epoch\t%4d\t%.2f' % (epoch + 1, epoch_loss / batches_per_epoch))

In [None]:
# PARAMETERS

BATCH_SIZE = 64
LEARNING_RATE = 0.001
MAX_GRAD_NORM = 5.0
NUM_EPOCHS = 50

EMBEDDING_DIM = 100
STATE_DIM = 512
VOCABULARY_SIZE = 20000

sent_dim = X_train.shape[1]
num_train = X_train.shape[0]
num_test = X_test.shape[0]

batch_per_epoch = floor(num_test / BATCH_SIZE)

# Session
tf.reset_default_graph()

# Initializer
initializer = tf.contrib.layers.xavier_initializer()

# Parameters
W = tf.get_variable("W", shape=[STATE_DIM, VOCABULARY_SIZE], initializer=initializer, trainable=True)
E = tf.get_variable("E", shape=[VOCABULARY_SIZE, EMBEDDING_DIM], initializer=initializer, trainable=True)

# Placeholders
X = tf.placeholder(tf.int32, (None, sent_dim))
dataset = tf.data.Dataset.from_tensor_slices(X).batch(batch_size).repeat()

iterator = dataset.make_initializable_iterator()
X_batch = iterator.get_next()

# LSTM initialization
LSTM = tf.nn.rnn_cell.BasicLSTMCell(num_units=STATE_DIM)
state_c, state_h = LSTM.zero_state(batch_size=batch_size, dtype=tf.float32)

losses = []

# RNN forward pass
for t in range(0, 5):
    X_t = X_batch[:, t]
    y_t = X_batch[:, t+1]
    
    X_t = tf.one_hot(X_t, depth = VOCABULARY_SIZE)
    E_t = tf.matmul(X_t, E)

    output, (state_c, state_h) = LSTM(inputs=E_t, state=(state_c, state_h))
    logits = tf.matmul(output, W)
    
    loss_t = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_t, logits=logits)
    
    losses.append(loss_t)
    
losses = tf.stack(losses)
losses = tf.reduce_sum(losses,axis=1)

loss = tf.reduce_mean(losses)

optimizer = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE)
optimizer = tf.contrib.estimator.clip_gradients_by_norm(optimizer, clip_norm=MAX_GRAD_NORM)
optimize_op = optimizer.minimize(loss)
init = tf.global_variables_initializer()

In [None]:
np.random.seed(12345)

# SESSION
session = tf.Session()

session.run(init)

for epoch in range(NUM_EPOCHS):
    total_loss = 0
    
    for _ in range(batch_per_epoch):
        batch_loss, _ = sess.run([loss, train_op, loss])
        total_loss += batch_loss
        
        
#     train_loss, _ = session.run([loss, optimize_op],
#                             feed_dict={X: X_test[0:batch_size]}
#                             )

    if epoch + 1 % 1 == 0:
        print('Epoch %04d> training loss: %.2f' % (epoch, total_loss))
    
session.close()