In [1]:
#load the word embeddings
import numpy as np

filename = 'glove.6B.50d.txt'

def loadGloVe(filename):
    vocab = []
    embd = []
    file = open(filename,'r',encoding="utf8")
    for line in file.readlines():
        row = line.strip().split(' ')
        vocab.append(row[0])
        embd.append(row[1:])
    print('Loaded GloVe!')
    file.close()
    return vocab,embd


vocab,embedding = loadGloVe(filename)
vocab_size = len(vocab)
embedding_dim = len(embedding[0])

print('Vocabulary size withouth special tokens:',vocab_size,'Embedding dimension:',embedding_dim)
embedding = np.asarray(embedding)

#make the vocabulary dictionary, save the 0 for pad, 1 for eos and 2 for unknown

vocab_size=vocab_size+3


sent_start_token='<GO>'
sent_end_token='<EOS>'
unknown_word_token='UNK'
padding_token='<PAD>'

vocab_dict = {}
#make the word->index dictionary
vocab_dict[padding_token]=0
vocab_dict[sent_end_token]=1
vocab_dict[sent_start_token]=2
vocab_dict[unknown_word_token]=3
for i in range(len(vocab)):
    vocab_dict[vocab[i]] = i+4
    
#make the index->word dictionary
vocab_inv={}
for key, value in vocab_dict.items():
    
    vocab_inv[value]=key
    
#load the data food reviews data

import pandas as pd
from IPython.display import display
pd.options.display.max_columns = None

%matplotlib inline

#load the data and peek how it looks
Reviews_df = pd.read_csv('Reviews.csv')

#cut only the relevant part
scores=Reviews_df['Score'].values
reviews=Reviews_df['Text'].values
num_reviews=len(reviews)

print("Number of reviews:", num_reviews)

#delete the dataframe from memory
del Reviews_df


Loaded GloVe!
Vocabulary size withouth special tokens: 400000 Embedding dimension: 50
Number of reviews: 568454


In [None]:
#preprocess the sentences into a suitable form
from nltk.tokenize import sent_tokenize
from nltk.tokenize import TweetTokenizer
from nltk.tokenize import word_tokenize
import pickle
import re

NLTK_TWEET_TOKENIZER=TweetTokenizer()

reviews_tokenized=[]
sentences=[]

#compile for removing all weird letters and replace with whitespace
regex = re.compile("""[,+-\/.!?*"'#]""") #etc

#we don't want these as a single words since they mess up stuff
unwanted_words=set(['<','>','/','*','[',']','{','}','br',"'",".",","])

for review in reviews:
    
    temp=[]
    for sent in sent_tokenize(review.lower()):
    
        sentence=sent #+" " +sent_end_token don't put the sentence end token here
        sent_token=word_tokenize(sentence)
        
        #cut away weird single special letter words
        sent_token=[word for word in sent_token if (word not in unwanted_words)]
                
        #cut away the words which are not in the vocabulary
        num_newwords=0
        for word_ind_init in range(len(sent_token)):
            
            word_ind=word_ind_init+num_newwords
            word=sent_token[word_ind]
            
            if word not in vocab_dict:
                
                
                #if it is and address, change the name to url
                word=re.sub(r'http\S+', 'url', word)
                word=re.sub(r'www\S+', 'url', word)
                word=re.sub(r'href\S+', 'link', word)
                
                #apply the previously defined regular expressions for cutting the words from -
                #and removing excess. etc
                #First parameter is the replacement, second parameter is your input string
                word=regex.sub(' ', word)
                newwords=word.split()
            
                #loop over new words
                for newword_ind,newword in enumerate(newwords):
                    
                    #if the splitted words are not in dictionary
                    if newword not in vocab_dict:
                
                        newwords[newword_ind]=unknown_word_token
           
                #attach the new words
                sent_token[word_ind:word_ind+1]=newwords
            
                #print(sent_token)
                #increase the counter accordingly
                num_newwords+=(len(newwords)-1)
                
        #join back as sentences
        #sentences.append(" ".join(sent_token))
        #sentences.append(sentence)
        temp.append(sent_token)
    reviews_tokenized.append(temp)
    
    
#save the sentences
with open('reviews_tokenized.pickled', 'wb') as fp:
    pickle.dump(reviews_tokenized, fp)
    
del reviews

print('done')

In [2]:
import pickle
#load the sentences and pick and easily trainable subset from the sentences
with open ('reviews_tokenized.pickled', 'rb') as fp:
    reviews_tokenized = pickle.load(fp)


#take sentences which are max 10 words and min 6 words, there are sentences like "thank you" and "!"
#those are better off removed-----------------------

max_sent_len=15
min_sent_len=4

sentence_scores=[]
sentences=[]

for review_ind,review in enumerate(reviews_tokenized):  
    for sentence in review:  
        
        sent_len=len(sentence)
        if (sent_len <= max_sent_len) and (sent_len >= 4):
            
            sentences.append(sentence)
            
            #record the review scores for later
            sentence_scores.append(scores[review_ind])

#delete the reviews from memory
del reviews_tokenized 

lens=np.zeros((len(sentences)))
for i,sentence in enumerate(sentences):
    
    lens[i]=len(sentence)

num_sentences=len(sentences)
print('after cutting the max sentence len to', max_sent_len,'and min to', min_sent_len)
print('number of sentences:', num_sentences)
print('mean sentence len:', np.mean(lens))
print('with standard deviation:', np.std(lens))

after cutting the max sentence len to 15 and min to 4
number of sentences: 1312470
mean sentence len: 10.0160818914
with standard deviation: 3.25511084524


In [3]:
import nltk
#get the distribution of words

fdist = nltk.FreqDist()
for sentence in sentences:
    for word in sentence:
        fdist[word] += 1
            
print('total number of words is',sum(fdist.values()))
print('number of different words is',len(fdist))
print('number of unknowns:', fdist[unknown_word_token],"%:", fdist[unknown_word_token]*100/sum(fdist.values()))

#lets cut away the words which are less than 3 times in the vocabulary
fdist_new = nltk.FreqDist()

minfreq=8
for sentence in sentences:
    for word_ind,word in enumerate(sentence):
            
        if fdist[word] < minfreq:
            sentence[word_ind]=unknown_word_token
            fdist_new[unknown_word_token] += 1
                
        else:
            fdist_new[word] += 1
            
print('after word min frequency is set to:',minfreq)
print('number of different words is',len(fdist_new))
print('number of unknowns:', fdist_new[unknown_word_token],"%:", fdist_new[unknown_word_token]*100/sum(fdist.values()))

total number of words is 13145807
number of different words is 40097
number of unknowns: 69196 %: 0.5263731621801537
after word min frequency is set to: 8
number of different words is 15906
number of unknowns: 126792 %: 0.9645052601183024


In [4]:
#clean the vocabulary and embeddings to consist only on the words used.
used_words=set(fdist_new.keys())

new_dict={}
#-1 because unknown is already in the sentences
new_emb=np.zeros((len(fdist_new)-1,embedding_dim))


#loop over pretrained vocabulary
newind=3 #reserve 0 to 3 for eos and pad sent end and sent start
for key, value in vocab_dict.items():
    
    if key==unknown_word_token:
        continue
    
    if key in used_words:
        
        new_dict[key]=newind
        new_emb[newind-3,:]=embedding[value,:]
        
        newind +=1
        
vocab_dict=new_dict

#add the special tokens

vocab_dict[padding_token]=0
vocab_dict[sent_end_token]=1
vocab_dict[sent_start_token]=2
vocab_dict[unknown_word_token]=3


vocab_inv={}
for key, value in vocab_dict.items():
    
    vocab_inv[value]=key
    
embedding=new_emb

vocab_size=len(vocab_dict.items())

print('done, new vocabulary size is', vocab_size)

done, new vocabulary size is 15909


In [5]:
#transform the sentences into inputs and targets for the autoencoder

#note: make here the save and load sentences

import numpy as np
from tensorflow.contrib import learn


#we already did the preprocessing so the tokenizer should just split the words
def tokenizer_custom(iterator):
    for value in iterator:
        
        try:
            #just return the thing itself since the tokenizing was already done
            yield value #.split() #word_tokenize()
        except TypeError:       # this is a hack to avoid the error
            yield []  
            
#map the sentences to input vectors with padding            
vocab_processor = learn.preprocessing.VocabularyProcessor(
    max_document_length=max_sent_len, vocabulary=vocab_dict, tokenizer_fn=tokenizer_custom)

#format is senteces, decoding (timesteps)
input_sentences = np.array(list(vocab_processor.transform(sentences)))
#change to 32bit
input_sentences=input_sentences.astype(int)
print('input size in mbs:', input_sentences.shape[0]*input_sentences.shape[1]*4/1000000)


#now make the encoder and decored inputs/targets
#---------------------------------------

#special tokens
PAD = 0
EOS = 1
GO = 2
UNK = 3


#make to list for a better handling
input_sentences=input_sentences.tolist()

#put the EOS symbol in the beginning of decoder inputs and the eend of decoder outputs
decoder_inputs_full=[]
decoder_targets_full=[]

for sentence in input_sentences:
    
    #decoder input gets GO as the first feed
    decoder_inputs_full.append([GO]+sentence)
    
    #in output the sentence has to end with EOS
    for word_ind, word in enumerate(sentence):
    
        #if the end is before the maxlen
        if word==0:
            sentence[word_ind]=EOS
            sentence.append(0)
            decoder_targets_full.append(sentence)
            break

        #the sentence is longer than the vector
        if (word_ind==max_sent_len-1):
            sentence.append(EOS)
            decoder_targets_full.append(sentence)
            
            
#change the inputs to numpy arrays
decoder_inputs_full=np.array(decoder_inputs_full)
decoder_targets_full=np.array(decoder_targets_full)


#free memory
del input_sentences

#make to int32
decoder_inputs_full=decoder_inputs_full.astype(int)
decoder_targets_full=decoder_targets_full.astype(int)

#transpose
decoder_inputs_full=np.transpose(decoder_inputs_full)
decoder_targets_full=np.transpose(decoder_targets_full)

##change the order of inputs for encoder inputs
encoder_inputs_full=decoder_targets_full[::-1,:]

print('done')

input size in mbs: 78.7482
done


In [6]:
# The actual tensorflow code
import numpy as np
import tensorflow as tf
import timeit
import sys
import os

#time the program
start_time = timeit.default_timer()

#reset and start session
tf.reset_default_graph()
sess = tf.InteractiveSession()


#functions-------------------------------------------------------------

#print a list of itegers as a sentence
def printsentence(inputarray,reverse=False):
     
    word_list=[vocab_inv[index] for index in list(inputarray) if (index not in [0,1,2])]
    
    if reverse:
        word_list=word_list[::-1]
        
    return (" ".join(word_list))

def printandwrite(file, line): #print to screen and write to file

    file.write("%s \n" % line)
    print(line)    

#feed the next inputs
def next_feed(index,keep_prob=1):
    
    #feed the encoder the inputs in reverse direction for better performance
    encoder_inputs_ = encoder_inputs_full[:,index*batch_size:(index+1)*batch_size]
    
    decoder_targets_ = decoder_targets_full[:,index*batch_size:(index+1)*batch_size]

    
    #replace the decoder input words with unknown word with probability 1-keep_prob
    #this forces the decoder to use the latent code instead
    if keep_prob !=1:

        inputs_shape=(max_sent_len+1,batch_size)

        #careful here to make a copy instead of ending modifying the data
        decoder_inputs_= np.zeros(inputs_shape)
        decoder_inputs_[:,:] = decoder_inputs_full[:,index*batch_size:(index+1)*batch_size]

        #make a replacement mask where True is with probability 1-keep_prob
        mask=np.random.binomial(1, 1.0-keep_prob,size=inputs_shape).astype(np.bool)

        #we want the first index to be False since we don't wnant to touch the <GO> symbol
        mask[0,:]=False

        #also we don't want to touch the the paddings
        mask2=decoder_inputs_!=0

        #full mask
        mask=mask*mask2

        #array full of UKN tokens (3)
        uknowns=np.full(np.shape(decoder_inputs_), UNK)


        #apply the mask and do assigment
        decoder_inputs_[mask] = uknowns[mask]
        
    else:

        #feed the decoder the decoder target
        decoder_inputs_ = decoder_inputs_full[:,index*batch_size:(index+1)*batch_size]
 
    return {
        encoder_inputs: encoder_inputs_,
        decoder_inputs: decoder_inputs_,
        decoder_targets: decoder_targets_,
    }

#return variable
def ret_var(varshape):
    
    return tf.get_variable(shape = varshape,initializer=tf.random_normal_initializer(stddev=0.001))


def Getprediction_from_latent(z,decoder_inputmode='prediction',reusing=True):
    
    with tf.variable_scope('decodercell'):
        decoder_cell = tf.contrib.rnn.BasicLSTMCell(decoder_hidden_units,reuse=True)
    
    #map from z to suitable dimensions
    decoder_initial_state = tf.contrib.layers.fully_connected(z, decoder_hidden_units, activation_fn=tf.nn.tanh,
                                                              scope='z_decoder_c',reuse=reusing)

        #has to be modified for lstm
    cell_state =decoder_initial_state[0,:,:] 
    output_state =decoder_initial_state[1,:,:] 
    decoder_initial_state = tf.nn.rnn_cell.LSTMStateTuple(cell_state, output_state)

    decoder_outputs, decoder_final_state = tf.nn.dynamic_rnn(decoder_cell, decoder_inputs_embedded,
        initial_state=decoder_initial_state, dtype=tf.float32, time_major=True, scope="plain_decoder")

    decoder_logits=tf.contrib.layers.fully_connected(decoder_outputs, vocab_size,activation_fn=None,reuse=True,scope='lstm_to_logits')
    decoder_prediction=tf.argmax(decoder_logits, 2)          
            
        
    #stack outputs into workable tensor and return
    return decoder_prediction

#set up variables and netword--------------------------------------------------------------------------

# set up latent dimension
latent_dimension = 48
decoder_hidden_units = 384
encoder_hidden_units = 384
batch_size=100
train_len=num_sentences #number of sentences
epochs=5 #0


#dimension for inputs are max_rollout_time, batch_size (time_major=True)
encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs')
decoder_targets = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_targets')
decoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_inputs')

#set up placeholder for pretrained embeddings which have to be feeded through variable
#pretrained part , -3 here since the pretrained part is without special tokens
pretrained_embedding_placeholder =  tf.placeholder(shape=(vocab_size-4, embedding_dim), dtype=tf.float32, name='pretrained_embedding_placeholder')
W=tf.get_variable(name="W", shape = [vocab_size-4, embedding_dim], trainable = False)
pretrained_emb = W.assign(pretrained_embedding_placeholder)

#embedding for padding, EOS and UKN
spec_token_embedding = tf.get_variable(name = "spec_token_embedding", shape = [4, embedding_dim],
      initializer=tf.random_uniform_initializer(-0.04, 0.04),trainable = True)

embeddings=tf.concat([spec_token_embedding,W], axis=0)

#latent variable
#z=tf.get_variable(name="z", shape = [2,batch_size,latent_dimension])

#change the inputs with embedding
encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs)
decoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, decoder_inputs)

#encoder and decoder cells will be LSTM
encoder_cell = tf.contrib.rnn.BasicLSTMCell(encoder_hidden_units)

with tf.variable_scope('decodercell'):
    decoder_cell = tf.contrib.rnn.BasicLSTMCell(decoder_hidden_units,reuse=False)


#encoder --------------------------------------------------------------------
encoder_outputs, encoder_final_state = tf.nn.dynamic_rnn(encoder_cell, encoder_inputs_embedded,
    dtype=tf.float32, time_major=True, scope='encoder_h')
#we are only interested in the final state
del encoder_outputs

#latent layer----------------------------------------------------------------------

#mapping parameters
mu=tf.contrib.layers.fully_connected(encoder_final_state, latent_dimension,activation_fn=None,
                                     weights_initializer=tf.random_normal_initializer(mean=0.0,stddev=0.1),scope='encoder_mu')
logsigma_sq=tf.contrib.layers.fully_connected(encoder_final_state, latent_dimension,activation_fn=None,
                                    weights_initializer=tf.random_normal_initializer(mean=0.0,stddev=0.1),scope='encoder_logsigma2')

#sample firs epsilon from N(0,1) and then rescale it with learned my and sigma to map the encoded
#code to N(mu,sigma)

# Sample latent variable
epsilon = tf.random_normal(tf.shape(logsigma_sq), dtype=tf.float32, mean=0., stddev=1.0, name='epsilon')
std_z = tf.exp(0.5 * logsigma_sq) #because logsigma_sq=log(std^2)
z = mu + tf.multiply(std_z, epsilon) #now z is N(mu,sigma) where mu and sigma will be learned

#decode the latent z with input-------------------------------------------------------------------------------

#map from z to suitable dimensions
decoder_initial_state = tf.contrib.layers.fully_connected(z, decoder_hidden_units, activation_fn=tf.nn.tanh,scope='z_decoder_c')


#unpack for list which to loop over and feed it as input
#inputs=tf.unstack(decoder_inputs_embedded,num=max_sent_len+1)
#decoder_prediction_list=[]
#decoder_logits_list=[]
        
#set up initial input and state
#state = (cell_state,output_state)
    
#for input_ in inputs:
        
#    output, state = decoder_cell.__call__(input_, state,scope='manual_decoder')

    #map the decoder output to words as logits
#    decoder_logits=tf.contrib.layers.fully_connected(output, vocab_size,activation_fn=None,scope='lstm_to_logits')
#    decoder_logits_list.append(decoder_logits)

    #take a greedy prediction
#    prediction=tf.argmax(decoder_logits, 1)
#    decoder_prediction_list.append(prediction)

#decoder_prediction = tf.stack(decoder_prediction_list, axis=0)
#decoder_logits = tf.stack(decoder_logits_list, axis=0)


#has to be modified for lstm
cell_state =decoder_initial_state[0,:,:] 
output_state =decoder_initial_state[1,:,:] 
decoder_initial_state = tf.nn.rnn_cell.LSTMStateTuple(cell_state, output_state)

decoder_outputs, decoder_final_state = tf.nn.dynamic_rnn(decoder_cell, decoder_inputs_embedded,
    initial_state=decoder_initial_state, dtype=tf.float32, time_major=True, scope="plain_decoder")

decoder_logits=tf.contrib.layers.fully_connected(decoder_outputs, vocab_size,activation_fn=None,scope='lstm_to_logits')
decoder_prediction=tf.argmax(decoder_logits, 2)                                                       

#calculate the loss ------------------------------------------------------------------------------------------------

# the Loss consists of 2 parts: the reconstruction error and penalty for diverginf from the prior 

stepwise_cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(decoder_targets,
    depth=vocab_size, dtype=tf.float32),logits=decoder_logits)
  
#sum over the sequence and take the mean, axis 0 is the time sequence and axis=1 is the batch
#so sum over sequence and mean over batch
reconst_loss = tf.reduce_mean(tf.reduce_sum(stepwise_cross_entropy,axis=0))
    
#the KL -divergence. our prior and recognition networks are gaussion so we have an explicit form for the loss
#shape is 2,batch,latent dim
KLD_b = -0.5 * tf.reduce_sum(1 + logsigma_sq - tf.pow(mu, 2) - tf.exp(logsigma_sq), axis=[0,2])
KLD = tf.reduce_mean(KLD_b) #the mean over batch

#total loss and optimise it
loss = reconst_loss+KLD                                 
optimizer = tf.train.AdamOptimizer().minimize(loss)

#create a saver to save the model, keep 3 lates versions
saver = tf.train.Saver(max_to_keep=3)

def train(load=False,keep_prob=1):
    
    train_output = open('train_output.txt','w')
    
    #initialize lists 
    loss_track = []
    kll_track = []

    if load==True:
        
        try:
            
            printandwrite(train_output,"Restoring saved parameters")
            saver_recover = tf.train.import_meta_graph('save/VAE-0.meta')
            saver_recover.restore(sess, tf.train.latest_checkpoint('save'))

        except Exception:

            printandwrite(train_output,"No pretrained model found")
            print(line)
            sys.exit()
            
    else:

            printandwrite(train_output,"starting from beginning, initializing parameters")
            sess.run(tf.global_variables_initializer())
            #feed the placeholders the pretrained ambeddings
            sess.run(pretrained_emb, feed_dict={pretrained_embedding_placeholder: embedding})
    
    
    num_batches=int(train_len/batch_size)

    #loop over epochs
    for epoch in range(epochs):

        #shuffle the training values for each epoch
        #rng_state = np.random.get_state()
        #np.random.set_state(rng_state)
        #np.random.shuffle(input_sentences)


        #loop over the batches
        for batch_ind in range(num_batches):

            #generate next feed
            feed=next_feed(batch_ind,keep_prob)
            
            #run optimizer
            _, l,kll = sess.run([optimizer, loss,KLD], feed)
            loss_track.append(l)   
            kll_track.append(kll)

            if (batch_ind == 0) or (((batch_ind+1) % 50) == 0):
                printandwrite(train_output,'batch_ind/epoch: '+str(batch_ind+1)+'/'+str(epoch+1)+' out of '+str(num_batches)+'/'+str(epochs))
                printandwrite(train_output,'Last batch loss:'+str(l)+ ' KDL: '+str(kll)+' KLD %: ' +str(100*kll/l))
                predict_ = sess.run(decoder_prediction, feed)
                printandwrite(train_output,'encoder inputs: '+ printsentence(feed[encoder_inputs][:,1],reverse=True))
                printandwrite(train_output,'decoder inputs: '+printsentence(feed[decoder_inputs][:,1]))
                printandwrite(train_output,'predictions: '+printsentence(predict_[:,1]))
                train_output.flush()



        # Save the variables to disk after each epoch
        #we couold have global_step=epoch but then we have to specify it at loading
        save_path=saver.save(sess, 'save/VAE',global_step=0)                
        printandwrite(train_output,"Model saved in file: %s"+save_path)
        printandwrite(train_output,'execution took: '+str(timeit.default_timer() - start_time)+' seconds')
        train_output.flush()

    train_output.close()
    return loss_track, kll_track
    
    
#gives the latent code for input of sentences
def Givelatent(input_sentences):
    
    print("Restoring saved parameters")
    saver_recover = tf.train.import_meta_graph('save/VAE-0.meta')
    saver_recover.restore(sess, tf.train.latest_checkpoint('save'))
    
    return sess.run([mu, std_z, logsigma_sq,z], {encoder_inputs: input_sentences})

def Giveoutput_fromlatent(latent_var_in,input_sentences,decoder_inputmode='prediction'):

    print("Restoring saved parameters")
    saver_recover = tf.train.import_meta_graph('save/VAE-0.meta')
    saver_recover.restore(sess, tf.train.latest_checkpoint('save'))
    
    shapein=[np.shape(latent_var_in)[0],np.shape(latent_var_in)[1],np.shape(latent_var_in)[2]]
    
    #feed the latent variable value through placeholder
    z_holder = tf.placeholder(shape=(2,None, None), dtype=tf.float32)
    latent_input=tf.Variable(tf.constant(0.0, shape=shapein),trainable = False)
    assign_z = latent_input.assign(z_holder)
    
    #assign z
    sess.run(assign_z,{z_holder:latent_var_in})
    
    #decoder_prediction, decoder_logits
    return sess.run(Getprediction_from_latent(latent_input,decoder_inputmode), {decoder_inputs: input_sentences})#, {decoder_inputs: input_sentences})



#run the decoder with beam search
#make bidirectional with attention

In [8]:
losses,kll_losses=train(load=True,keep_prob=0.7)

Restoring saved parameters
INFO:tensorflow:Restoring parameters from save\VAE-0
batch_ind/epoch: 1/1 out of 6961/5
Last batch loss:116.826 KDL: 4.17606 KLD %: 3.57460750422
encoder inputs: not too chewy , and very flavorful .
decoder inputs: not too chewy , UNK very flavorful UNK
predictions: nail filler simply blessings eaten eaten dreadful collection
batch_ind/epoch: 50/1 out of 6961/5
Last batch loss:50.6113 KDL: 3.64943 KLD %: 7.21069642837
encoder inputs: tastes pretty crappy .
decoder inputs: UNK pretty crappy UNK
predictions: i is tea !
batch_ind/epoch: 100/1 out of 6961/5
Last batch loss:45.0851 KDL: 3.5333 KLD %: 7.83694862164
encoder inputs: i 'm still not sure what flavor it is .
decoder inputs: i 'm still UNK UNK what flavor it is .
predictions: i will this these and and the it .
batch_ind/epoch: 150/1 out of 6961/5
Last batch loss:42.1988 KDL: 3.38671 KLD %: 8.025609304
encoder inputs: now i have 5 lbs .
decoder inputs: now i have 5 lbs UNK
predictions: this i love a them 

encoder inputs: ca n't say enough good things about this cocoa .
decoder inputs: ca UNK say enough good things UNK this cocoa .
predictions: i n't to that for for to this coffee .
batch_ind/epoch: 1800/1 out of 6961/5
Last batch loss:36.9161 KDL: 3.10746 KLD %: 8.41763211852
encoder inputs: i ca n't wait to try more flavors !
decoder inputs: UNK ca n't wait to try more flavors UNK
predictions: i have n't wait to try this it .
batch_ind/epoch: 1850/1 out of 6961/5
Last batch loss:36.6605 KDL: 3.04602 KLD %: 8.30872464698
encoder inputs: the chai latte is delicious !
decoder inputs: the chai UNK is delicious !
predictions: i taste was is great .
batch_ind/epoch: 1900/1 out of 6961/5
Last batch loss:39.1909 KDL: 3.02396 KLD %: 7.71596748052
encoder inputs: they arrived quickly and in good shape !
decoder inputs: they UNK quickly and UNK UNK shape UNK
predictions: a are a and not and UNK .
batch_ind/epoch: 1950/1 out of 6961/5
Last batch loss:36.4804 KDL: 2.97283 KLD %: 8.14913454896
encod

batch_ind/epoch: 3550/1 out of 6961/5
Last batch loss:35.0935 KDL: 3.11923 KLD %: 8.88833700471
encoder inputs: love this product !
decoder inputs: love this UNK UNK
predictions: very this product .
batch_ind/epoch: 3600/1 out of 6961/5
Last batch loss:33.0647 KDL: 2.96883 KLD %: 8.97885532448
encoder inputs: her baking mixes are UNK .
decoder inputs: her baking mixes are UNK .
predictions: this is like are very .
batch_ind/epoch: 3650/1 out of 6961/5
Last batch loss:37.0375 KDL: 3.1422 KLD %: 8.48382582167
encoder inputs: price with subscribe and save is very good .
decoder inputs: price UNK subscribe and save UNK very UNK .
predictions: i is the and save for the good .
batch_ind/epoch: 3700/1 out of 6961/5
Last batch loss:34.1619 KDL: 3.06672 KLD %: 8.97703736724
encoder inputs: one end is covered with a sticker .
decoder inputs: UNK end UNK UNK with a sticker .
predictions: ( , , the and the lot .
batch_ind/epoch: 3750/1 out of 6961/5
Last batch loss:33.8116 KDL: 3.21261 KLD %: 9.50

encoder inputs: ca n't beat that !
decoder inputs: ca n't beat that !
predictions: what n't beat ! !
batch_ind/epoch: 5350/1 out of 6961/5
Last batch loss:35.8831 KDL: 3.15008 KLD %: 8.77871988747
encoder inputs: she devours every bite .
decoder inputs: she UNK every UNK UNK
predictions: these always it penny .
batch_ind/epoch: 5400/1 out of 6961/5
Last batch loss:35.0291 KDL: 3.18207 KLD %: 9.08407414853
encoder inputs: this really tastes like ginger and honey !
decoder inputs: UNK UNK tastes like ginger UNK honey !
predictions: this is is like a and flavor .
batch_ind/epoch: 5450/1 out of 6961/5
Last batch loss:37.6257 KDL: 3.12081 KLD %: 8.29435523812
encoder inputs: i could n't be happier !
decoder inputs: UNK UNK n't be UNK UNK
predictions: i love this be disappointed .
batch_ind/epoch: 5500/1 out of 6961/5
Last batch loss:33.0241 KDL: 3.21898 KLD %: 9.74738127122
encoder inputs: my havanese just loves these treats with the apples !
decoder inputs: my havanese just UNK UNK treats 

Model saved in file: %ssave/VAE-0
execution took: 12312.408630230986 seconds
batch_ind/epoch: 1/2 out of 6961/5
Last batch loss:32.1628 KDL: 3.31681 KLD %: 10.3125505187
encoder inputs: not too chewy , and very flavorful .
decoder inputs: not UNK chewy , UNK very flavorful .
predictions: not too , , but , good .
batch_ind/epoch: 50/2 out of 6961/5
Last batch loss:34.8133 KDL: 3.16679 KLD %: 9.09650972766
encoder inputs: tastes pretty crappy .
decoder inputs: UNK pretty crappy .
predictions: they are good .
batch_ind/epoch: 100/2 out of 6961/5
Last batch loss:30.9039 KDL: 3.34801 KLD %: 10.8335903915
encoder inputs: i 'm still not sure what flavor it is .
decoder inputs: i UNK still not sure what flavor UNK UNK UNK
predictions: i 'm this to a what it is it .
batch_ind/epoch: 150/2 out of 6961/5
Last batch loss:32.6378 KDL: 3.07147 KLD %: 9.41077136679
encoder inputs: now i have 5 lbs .
decoder inputs: UNK i have 5 UNK .
predictions: great , love a stars .
batch_ind/epoch: 200/2 out of 6

batch_ind/epoch: 1800/2 out of 6961/5
Last batch loss:33.224 KDL: 3.53572 KLD %: 10.6420746495
encoder inputs: i ca n't wait to try more flavors !
decoder inputs: i UNK UNK wait to try more flavors !
predictions: it was this to to try it soon .
batch_ind/epoch: 1850/2 out of 6961/5
Last batch loss:33.1323 KDL: 3.35178 KLD %: 10.1163703447
encoder inputs: the chai latte is delicious !
decoder inputs: UNK UNK latte is delicious !
predictions: the is is is great .
batch_ind/epoch: 1900/2 out of 6961/5
Last batch loss:35.707 KDL: 3.37122 KLD %: 9.44132671103
encoder inputs: they arrived quickly and in good shape !
decoder inputs: UNK UNK quickly and UNK UNK UNK !
predictions: these , the and a are good .
batch_ind/epoch: 1950/2 out of 6961/5
Last batch loss:32.8387 KDL: 3.29824 KLD %: 10.0437668497
encoder inputs: i have to drink about two cups each time !
decoder inputs: UNK have to drink about two cups UNK time !
predictions: i have been try it the weeks of day .
batch_ind/epoch: 2000/2 

batch_ind/epoch: 3600/2 out of 6961/5
Last batch loss:30.7718 KDL: 3.30304 KLD %: 10.7339881209
encoder inputs: her baking mixes are UNK .
decoder inputs: UNK UNK mixes are UNK UNK
predictions: the is is are great .
batch_ind/epoch: 3650/2 out of 6961/5
Last batch loss:35.0296 KDL: 3.55827 KLD %: 10.1578766811
encoder inputs: price with subscribe and save is very good .
decoder inputs: price UNK subscribe UNK UNK is very good .
predictions: the is the and save is a good .
batch_ind/epoch: 3700/2 out of 6961/5
Last batch loss:32.1339 KDL: 3.44346 KLD %: 10.7159593495
encoder inputs: one end is covered with a sticker .
decoder inputs: one UNK UNK covered with a sticker .
predictions: so of my and and this UNK .
batch_ind/epoch: 3750/2 out of 6961/5
Last batch loss:31.3799 KDL: 3.54723 KLD %: 11.3041490407
encoder inputs: they are all meat - absolutely nothing else .
decoder inputs: they are all meat - absolutely nothing else UNK
predictions: these are a the , i love special .
batch_ind/e

batch_ind/epoch: 5350/2 out of 6961/5
Last batch loss:33.6924 KDL: 3.36422 KLD %: 9.98510064579
encoder inputs: she devours every bite .
decoder inputs: she devours every bite .
predictions: it always so too .
batch_ind/epoch: 5400/2 out of 6961/5
Last batch loss:33.3432 KDL: 3.48904 KLD %: 10.4640280656
encoder inputs: this really tastes like ginger and honey !
decoder inputs: this really UNK like UNK UNK honey !
predictions: this is is the a UNK coffee .
batch_ind/epoch: 5450/2 out of 6961/5
Last batch loss:36.3213 KDL: 3.41247 KLD %: 9.39523105246
encoder inputs: i could n't be happier !
decoder inputs: i could n't UNK happier !
predictions: i would n't be it .
batch_ind/epoch: 5500/2 out of 6961/5
Last batch loss:29.6037 KDL: 3.55349 KLD %: 12.0035411083
encoder inputs: my havanese just loves these treats with the apples !
decoder inputs: my havanese just loves these treats UNK the apples !
predictions: my husband loves loves these and for the best .
batch_ind/epoch: 5550/2 out of 

Model saved in file: %ssave/VAE-0
execution took: 30631.72450455546 seconds
batch_ind/epoch: 1/3 out of 6961/5
Last batch loss:31.419 KDL: 3.61159 KLD %: 11.4948962606
encoder inputs: not too chewy , and very flavorful .
decoder inputs: not too chewy UNK and very UNK .
predictions: a too sweet , a not tasty .
batch_ind/epoch: 50/3 out of 6961/5
Last batch loss:32.737 KDL: 3.39824 KLD %: 10.3804083111
encoder inputs: tastes pretty crappy .
decoder inputs: tastes pretty crappy .
predictions: very like good !
batch_ind/epoch: 100/3 out of 6961/5
Last batch loss:28.6045 KDL: 3.5926 KLD %: 12.5595619124
encoder inputs: i 'm still not sure what flavor it is .
decoder inputs: i 'm UNK not sure what flavor it is .
predictions: i have a to sure what it is is .
batch_ind/epoch: 150/3 out of 6961/5
Last batch loss:31.6949 KDL: 3.34623 KLD %: 10.5576400682
encoder inputs: now i have 5 lbs .
decoder inputs: now i UNK UNK lbs .
predictions: so i 'm it this .
batch_ind/epoch: 200/3 out of 6961/5
Last

batch_ind/epoch: 1800/3 out of 6961/5
Last batch loss:31.7644 KDL: 3.77048 KLD %: 11.8701529877
encoder inputs: i ca n't wait to try more flavors !
decoder inputs: i UNK n't wait UNK try more UNK UNK
predictions: i have this wait to try it soon .
batch_ind/epoch: 1850/3 out of 6961/5
Last batch loss:32.0137 KDL: 3.60552 KLD %: 11.262423134
encoder inputs: the chai latte is delicious !
decoder inputs: UNK chai UNK is delicious !
predictions: the is is is the .
batch_ind/epoch: 1900/3 out of 6961/5
Last batch loss:33.7889 KDL: 3.63439 KLD %: 10.7561480194
encoder inputs: they arrived quickly and in good shape !
decoder inputs: they arrived quickly and in good UNK !
predictions: they are in and in perfect shape .
batch_ind/epoch: 1950/3 out of 6961/5
Last batch loss:32.4559 KDL: 3.48654 KLD %: 10.7424219042
encoder inputs: i have to drink about two cups each time !
decoder inputs: i have to drink about UNK cups each time UNK
predictions: i have been say this the years this day .
batch_ind

encoder inputs: love this product !
decoder inputs: love this product UNK
predictions: love this product .
batch_ind/epoch: 3600/3 out of 6961/5
Last batch loss:29.7036 KDL: 3.57646 KLD %: 12.0404760095
encoder inputs: her baking mixes are UNK .
decoder inputs: her baking mixes UNK UNK .
predictions: so coat is in it .
batch_ind/epoch: 3650/3 out of 6961/5
Last batch loss:33.2664 KDL: 3.82205 KLD %: 11.4892335417
encoder inputs: price with subscribe and save is very good .
decoder inputs: price with subscribe and save UNK very UNK UNK
predictions: so is the and save is the reasonable .
batch_ind/epoch: 3700/3 out of 6961/5
Last batch loss:31.4828 KDL: 3.67655 KLD %: 11.6779702404
encoder inputs: one end is covered with a sticker .
decoder inputs: one end UNK covered with a sticker .
predictions: so of 's a and the UNK .
batch_ind/epoch: 3750/3 out of 6961/5
Last batch loss:30.313 KDL: 3.82297 KLD %: 12.6116524522
encoder inputs: they are all meat - absolutely nothing else .
decoder inp

encoder inputs: ca n't beat that !
decoder inputs: ca UNK beat that UNK
predictions: these n't beat that !
batch_ind/epoch: 5350/3 out of 6961/5
Last batch loss:32.9507 KDL: 3.6308 KLD %: 11.018874548
encoder inputs: she devours every bite .
decoder inputs: she UNK every UNK .
predictions: it always wanted bite .
batch_ind/epoch: 5400/3 out of 6961/5
Last batch loss:32.8427 KDL: 3.71389 KLD %: 11.3081154168
encoder inputs: this really tastes like ginger and honey !
decoder inputs: this UNK tastes like UNK and UNK !
predictions: this product is like a real UNK .
batch_ind/epoch: 5450/3 out of 6961/5
Last batch loss:35.5932 KDL: 3.59217 KLD %: 10.0922713361
encoder inputs: i could n't be happier !
decoder inputs: i could UNK UNK UNK UNK
predictions: i love n't be it .
batch_ind/epoch: 5500/3 out of 6961/5
Last batch loss:29.606 KDL: 3.88468 KLD %: 13.1212703501
encoder inputs: my havanese just loves these treats with the apples !
decoder inputs: UNK UNK just loves UNK UNK with the apples

Model saved in file: %ssave/VAE-0
execution took: 42502.0008392285 seconds
batch_ind/epoch: 1/4 out of 6961/5
Last batch loss:30.2372 KDL: 3.84645 KLD %: 12.7209090408
encoder inputs: not too chewy , and very flavorful .
decoder inputs: not too UNK , and very flavorful UNK
predictions: not too sweet , but not tasty .
batch_ind/epoch: 50/4 out of 6961/5
Last batch loss:32.6824 KDL: 3.66756 KLD %: 11.2217964791
encoder inputs: tastes pretty crappy .
decoder inputs: tastes UNK UNK .
predictions: these like good .
batch_ind/epoch: 100/4 out of 6961/5
Last batch loss:27.8107 KDL: 3.79554 KLD %: 13.6477987389
encoder inputs: i 'm still not sure what flavor it is .
decoder inputs: i 'm still not sure what flavor it UNK .
predictions: i 'm so to to what it is is .
batch_ind/epoch: 150/4 out of 6961/5
Last batch loss:31.0126 KDL: 3.62106 KLD %: 11.6760986305
encoder inputs: now i have 5 lbs .
decoder inputs: UNK i have 5 lbs .
predictions: ( , love a to UNK
batch_ind/epoch: 200/4 out of 6961/5


batch_ind/epoch: 1800/4 out of 6961/5
Last batch loss:31.6915 KDL: 3.91165 KLD %: 12.342881571
encoder inputs: i ca n't wait to try more flavors !
decoder inputs: i ca n't wait UNK UNK more flavors !
predictions: i have n't wait to try the flavors .
batch_ind/epoch: 1850/4 out of 6961/5
Last batch loss:31.5324 KDL: 3.82009 KLD %: 12.1148121381
encoder inputs: the chai latte is delicious !
decoder inputs: UNK chai latte UNK delicious UNK
predictions: the product is is . .
batch_ind/epoch: 1900/4 out of 6961/5
Last batch loss:33.239 KDL: 3.79386 KLD %: 11.4138537917
encoder inputs: they arrived quickly and in good shape !
decoder inputs: they UNK UNK and in UNK shape !
predictions: they are a and very perfect . .
batch_ind/epoch: 1950/4 out of 6961/5
Last batch loss:31.9752 KDL: 3.68205 KLD %: 11.5153419416
encoder inputs: i have to drink about two cups each time !
decoder inputs: i UNK UNK drink UNK two cups UNK UNK UNK
predictions: i have this to it a thumbs of day .
batch_ind/epoch: 2

batch_ind/epoch: 3600/4 out of 6961/5
Last batch loss:29.0981 KDL: 3.76399 KLD %: 12.9355202052
encoder inputs: her baking mixes are UNK .
decoder inputs: her UNK mixes are UNK UNK
predictions: so coat is are . .
batch_ind/epoch: 3650/4 out of 6961/5
Last batch loss:32.9444 KDL: 3.86836 KLD %: 11.7420578095
encoder inputs: price with subscribe and save is very good .
decoder inputs: price with subscribe and UNK is very UNK .
predictions: but is the and save is the good .
batch_ind/epoch: 3700/4 out of 6961/5
Last batch loss:30.4198 KDL: 3.866 KLD %: 12.7088465272
encoder inputs: one end is covered with a sticker .
decoder inputs: one end is UNK with a sticker .
predictions: it of of a for this spoon .
batch_ind/epoch: 3750/4 out of 6961/5
Last batch loss:29.5038 KDL: 3.93746 KLD %: 13.3455954511
encoder inputs: they are all meat - absolutely nothing else .
decoder inputs: they are UNK meat - absolutely nothing else .
predictions: i are a and and not delicious bad .
batch_ind/epoch: 380

encoder inputs: ca n't beat that !
decoder inputs: ca n't beat that !
predictions: will n't beat it .
batch_ind/epoch: 5350/4 out of 6961/5
Last batch loss:32.1877 KDL: 3.73951 KLD %: 11.6177910184
encoder inputs: she devours every bite .
decoder inputs: she devours UNK bite .
predictions: these always them it .
batch_ind/epoch: 5400/4 out of 6961/5
Last batch loss:31.968 KDL: 3.91678 KLD %: 12.2521771922
encoder inputs: this really tastes like ginger and honey !
decoder inputs: this really UNK like ginger and UNK UNK
predictions: this is is the a and UNK .
batch_ind/epoch: 5450/4 out of 6961/5
Last batch loss:34.4834 KDL: 3.82861 KLD %: 11.1027741177
encoder inputs: i could n't be happier !
decoder inputs: i could UNK be happier !
predictions: i love n't believe happier .
batch_ind/epoch: 5500/4 out of 6961/5
Last batch loss:28.846 KDL: 4.09104 KLD %: 14.1823571527
encoder inputs: my havanese just loves these treats with the apples !
decoder inputs: my UNK just UNK these UNK with the 

Model saved in file: %ssave/VAE-0
execution took: 54361.80374712 seconds
batch_ind/epoch: 1/5 out of 6961/5
Last batch loss:30.3562 KDL: 3.88398 KLD %: 12.7946968037
encoder inputs: not too chewy , and very flavorful .
decoder inputs: not too UNK , UNK very UNK .
predictions: and too sweet , but too good .
batch_ind/epoch: 50/5 out of 6961/5
Last batch loss:32.2413 KDL: 3.77541 KLD %: 11.7098728776
encoder inputs: tastes pretty crappy .
decoder inputs: tastes pretty crappy UNK
predictions: will great good .
batch_ind/epoch: 100/5 out of 6961/5
Last batch loss:26.0171 KDL: 3.88032 KLD %: 14.9145059427
encoder inputs: i 'm still not sure what flavor it is .
decoder inputs: UNK 'm still not sure what flavor it is .
predictions: i will not not to what to is is .
batch_ind/epoch: 150/5 out of 6961/5
Last batch loss:29.6097 KDL: 3.71155 KLD %: 12.5349120559
encoder inputs: now i have 5 lbs .
decoder inputs: now UNK have UNK UNK .
predictions: the i is a it .
batch_ind/epoch: 200/5 out of 696

batch_ind/epoch: 1800/5 out of 6961/5
Last batch loss:31.8525 KDL: 4.0082 KLD %: 12.5836443637
encoder inputs: i ca n't wait to try more flavors !
decoder inputs: i ca UNK UNK to try more flavors !
predictions: i love n't wait enough try the flavors .
batch_ind/epoch: 1850/5 out of 6961/5
Last batch loss:31.6797 KDL: 3.90598 KLD %: 12.3295985293
encoder inputs: the chai latte is delicious !
decoder inputs: the chai latte is delicious !
predictions: great taste is is great .
batch_ind/epoch: 1900/5 out of 6961/5
Last batch loss:32.632 KDL: 3.86472 KLD %: 11.8433530237
encoder inputs: they arrived quickly and in good shape !
decoder inputs: they UNK UNK and UNK good shape !
predictions: they are a and they are too .
batch_ind/epoch: 1950/5 out of 6961/5
Last batch loss:31.9208 KDL: 3.833 KLD %: 12.0078525613
encoder inputs: i have to drink about two cups each time !
decoder inputs: i have to UNK UNK UNK cups each time !
predictions: i am been say it for the it . .
batch_ind/epoch: 2000/5

encoder inputs: love this product !
decoder inputs: love UNK product !
predictions: love this stuff .
batch_ind/epoch: 3600/5 out of 6961/5
Last batch loss:29.1864 KDL: 3.77387 KLD %: 12.930205658
encoder inputs: her baking mixes are UNK .
decoder inputs: her UNK mixes are UNK .
predictions: UNK coat is are great .
batch_ind/epoch: 3650/5 out of 6961/5
Last batch loss:32.4617 KDL: 4.02115 KLD %: 12.3873484676
encoder inputs: price with subscribe and save is very good .
decoder inputs: UNK UNK subscribe and save is UNK good UNK
predictions: that is the and save is the it .
batch_ind/epoch: 3700/5 out of 6961/5
Last batch loss:30.5856 KDL: 3.97305 KLD %: 12.989937311
encoder inputs: one end is covered with a sticker .
decoder inputs: UNK UNK UNK covered UNK a sticker UNK
predictions: so is is and and the UNK .
batch_ind/epoch: 3750/5 out of 6961/5
Last batch loss:28.5498 KDL: 4.0343 KLD %: 14.1307150117
encoder inputs: they are all meat - absolutely nothing else .
decoder inputs: UNK are

encoder inputs: ca n't beat that !
decoder inputs: ca UNK beat that !
predictions: will n't beat that .
batch_ind/epoch: 5350/5 out of 6961/5
Last batch loss:32.852 KDL: 3.8567 KLD %: 11.7396146137
encoder inputs: she devours every bite .
decoder inputs: she devours every UNK .
predictions: i always them too .
batch_ind/epoch: 5400/5 out of 6961/5
Last batch loss:31.3959 KDL: 3.99465 KLD %: 12.7234803853
encoder inputs: this really tastes like ginger and honey !
decoder inputs: UNK really tastes UNK UNK and honey !
predictions: this is is like and the UNK .
batch_ind/epoch: 5450/5 out of 6961/5
Last batch loss:34.2866 KDL: 3.99716 KLD %: 11.6580936318
encoder inputs: i could n't be happier !
decoder inputs: i could n't be happier UNK
predictions: i was n't believe happier .
batch_ind/epoch: 5500/5 out of 6961/5
Last batch loss:27.6912 KDL: 4.13157 KLD %: 14.920146032
encoder inputs: my havanese just loves these treats with the apples !
decoder inputs: my havanese UNK loves these treats

Model saved in file: %ssave/VAE-0
execution took: 66215.46722812699 seconds


In [126]:
input_sent=encoder_inputs_full
#printsentence(input_sent,reverse=True)

#Give latent outputs shape is (return list (mu, sigma, logsigma),(cell state, outputstate),number of number of sentences,latent variables)
latent_out=Givelatent(input_sent)
print('output shape for latent out:',np.shape(latent_out))

#we only care about mu,cell state, number of sentences and latent variables, shape will be [sentences, latent_var]
latent_var=latent_out[0]
mus_out_cell=latent_out[0][0,:,:]

Restoring saved parameters
INFO:tensorflow:Restoring parameters from save\VAE-0
output shape for latent out: (4, 2, 696136, 32)


In [127]:
#map them to correctr form        
latent_var=np.transpose(latent_var, (1, 0, 2))
datalen=np.shape(latent_var)[0]
reshaped=np.reshape(latent_var, (datalen, -1))

#rereshaped=np.reshape(latent_var, (datalen,2, -1))

In [128]:
#do prediction
from sklearn.model_selection import KFold
from sklearn.linear_model import LinearRegression
import pandas as pd
from sklearn.model_selection import cross_val_score

linreg = LinearRegression()
linreg.fit(reshaped, sentence_scores) #calculate the parameters

for i in range(3):
    kfold=KFold(n_splits=4, shuffle=True) #random_state=None)
    print('CV scores:',np.sqrt(-cross_val_score(linreg, reshaped, sentence_scores, scoring="neg_mean_squared_error", cv = kfold))) 

#linreg.coef_ contains the coefficients so the positivity axis will be
pos_axis=linreg.coef_/np.sqrt(sum(linreg.coef_*linreg.coef_))
#reshape back to normal
pos_axis_z=np.reshape(pos_axis, (2, -1))

CV scores: [ 1.31392154  1.31320906  1.3151783   1.31570079]
CV scores: [ 1.31549043  1.31515025  1.31376106  1.31365423]
CV scores: [ 1.31601116  1.31319544  1.31140979  1.3175115 ]


In [130]:

input_sent_en=encoder_inputs_full[:,111:115]
input_sent_de=encoder_inputs_full[:,111:115]


positivity=0.5#-0.1

#Give latent outputs shape is (return list (mu, sigma, logsigma),(cell state, outputstate),number of number of sentences,latent variables)
latent_out=Givelatent(input_sent_en)


#fix the positivity axis to match the criteria
pos_axis_z_batch=np.expand_dims(pos_axis_z,axis=1)

pos_axis_z_batch = [pos_axis_z for _ in range(np.shape(input_sent_en)[1])]
pos_axis_z_batch = np.stack(pos_axis_z_batch, axis=1)


#scale the shifting with the corresponding sigma, move from the means
latent_shifted=latent_out[0]+pos_axis_z_batch*positivity*latent_out[1]



decoder_out=Giveoutput_fromlatent(latent_shifted,input_sent_de,decoder_inputmode='target')

for sent_ind in range(np.shape(decoder_out)[1]):

    print('original:',printsentence(input_sent_en[:,sent_ind],reverse=True))
    print('shifted:',printsentence(decoder_out[:,sent_ind]))

Restoring saved parameters
INFO:tensorflow:Restoring parameters from save\VAE-0
Restoring saved parameters
INFO:tensorflow:Restoring parameters from save\VAE-0
original: . heavy very are these
shifted: 
original: . thawed slightly arrived
shifted: 
original: . it accept n't would parents my
shifted: for
original: . perfect are tarts these on crust the
shifted: for and size . .


In [105]:
decoder_out=Giveoutput_fromlatent(latent_shifted,input_sent_de,decoder_inputmode='target')

print(np.shape(decoder_out))
for sent_ind in range(np.shape(decoder_out)[1]):

    print(printsentence(decoder_out[:,sent_ind]))

Restoring saved parameters
INFO:tensorflow:Restoring parameters from save\VAE-0
(11, 4)






In [104]:
z1=latent_var[:,0,:]
z2=latent_var[:,1,:]

tvalues=[0, 0.2, 0.4, 0.6, 0.8, 1]
inputlist=[z1*(1-t)+z2*t for t in tvalues]

#input doesn't matter since the decoder works in greedy prediction mode
input_sent=encoder_inputs_full[:,:6]

latent_var_in=np.stack(inputlist, axis=1)

decoder_out=Giveoutput_fromlatent(latent_var_in,input_sent,decoder_inputmode='target')

print(np.shape(decoder_out))
for sent_ind in range(np.shape(decoder_out)[1]):

    print(printsentence(decoder_out[:,sent_ind]))

Restoring saved parameters
INFO:tensorflow:Restoring parameters from save\VAE-0


ValueError: Dimension 0 in both shapes must be equal, but are 696136 and 2 for 'Assign_2' (op: 'Assign') with input shapes: [696136,6,20], [2,?,?].

In [None]:

    #has to be modified for lstm
    #cell_state =decoder_initial_state[0,:,:] 
    #output_state =decoder_initial_state[1,:,:] 
    
    #unpack for list which to loop over and feed it as input
    #inputs=tf.unstack(decoder_inputs_embedded,num=max_sent_len+1)
    #decoder_prediction_list=[]
    #decoder_logits_list=[]
        
    #set up initial input and state
    #state = (cell_state,output_state)
    #input_in=inputs[0]
    
    #for input_ in inputs:
        
        
#        if decoder_inputmode=='target':
#            input_in=input_
            
#        output, state = decoder_cell.__call__(input_in, state,scope='manual_decoder')
        
#        print(decoder_cell)

        #map the decoder output to words as logits
#        decoder_logits=tf.contrib.layers.fully_connected(output, vocab_size,activation_fn=None,
                                                         scope='lstm_to_logits',reuse=reusing)

        #take a greedy prediction
#        prediction=tf.argmax(decoder_logits, 1)
#        decoder_prediction_list.append(prediction)
        
        #feed the prediction as a next input
#        if decoder_inputmode=='prediction':
#            input_in=tf.nn.embedding_lookup(embeddings, prediction)
            