In [None]:
import torch.nn.utils
# %% packed sequences
a = torch.Tensor([1, 2, 3])
b = torch.Tensor([4, 5])
c = torch.Tensor([6])
lengths = list(map(len, [a, b, c]))
padded= torch.nn.utils.rnn.pad_sequence([a, b, c], batch_first=False)
packed_padded =torch.nn.utils.rnn.pack_padded_sequence(padded, lengths) # non ho messo batch first da qui in poi
repadded = torch.nn.utils.rnn.pad_packed_sequence(packed_padded)[0]
repacked = torch.nn.utils.rnn.pack_padded_sequence(repadded, lengths)
print([a, b, c])
print(padded)
print(packed_padded)
print(repadded)
print(repacked)

In [None]:
from src.parameters import Params
from src.vocabulary import Vocabulary
from src.style_transfer import StyleTransfer
from src.greedy_decoding import Decoder
from src.generate_batches import preprocessSentences

In [None]:
params = Params()
vocab = Vocabulary()
vocab.loadVocabulary("data/yelp/vocabulary.pickle")
vocab.initializeEmbeddings(params.embedding_size)
model = StyleTransfer(params, vocab)

In [None]:
import torch
checkpoint = torch.load("data/models/yelp/model-2018-06-27-epoch_18-loss_45.287033")
model.load_state_dict(checkpoint)

In [None]:
import re
import numpy as np

with open('data/yelp/dev/negative.txt', 'r') as fp:
    testSents = fp.readlines()[:32]
    
labels = np.array([0] * len(testSents))
testSents = sorted(testSents, key=len, reverse=True)
testSents = list(map(lambda x: x[:-1], testSents))

In [None]:
decoder = Decoder(model, 20, 12, params)

In [None]:
encoder_inputs, generator_inputs, targets, lenghts = \
            model._sentencesToInputs(testSents)

In [None]:
testSents

In [None]:
padded_targets = torch.nn.utils.rnn.pad_packed_sequence(targets, batch_first=True)[0]
for i in range(32):
    sent = []
    for j in range(16):
        sent.append(model.vocabulary.id2word[padded_targets[i, j]])
    print(" ".join(sent))

In [None]:
model.eval_size = len(testSents)
model._computeHiddens(
                encoder_inputs, generator_inputs, labels, lenghts, True)
generatorOutputs, h_teacher = model._generateTokens(
            generator_inputs, model.originalHiddens, lenghts, True)

In [None]:
sents = []
for i in range(32):
    curr = []
    for j in range(16):
        logits = generatorOutputs[i, j, :]
        idx = logits.argmax()
        curr.append(model.vocabulary.id2word[idx])
    sents.append(" ".join(curr))
sents

# Generate with previous Output

In [None]:
from src.rnn import SoftSampleWord

def _generateWithPrevOutput(
            model, h0, max_len, size, lengths=[], evaluation=False, soft=True):

    hidden = h0
    hiddens = torch.zeros(size, max_len,
                          model.params.autoencoder.hidden_size,
                          device="cuda")
    if soft:
        tokens = torch.zeros(
            size, max_len, model.params.embedding_size, device="cuda")
    else:
        tokens = torch.zeros(size, max_len, device="cuda")

    goEmbedding = model.vocabulary(['<go>']).squeeze(0)
    goEmbedding = goEmbedding.repeat(size, 1)
    goEmbedding = goEmbedding.unsqueeze(1)
    currTokens = goEmbedding
    softSampleFunction = SoftSampleWord(
        dropout=model.params.dropout,
        embeddings=model.vocabulary.embeddings,
        gamma=model.params.gamma_init)
    
    if soft:

        for index in range(max_len):
            # generator need input (seq_len, batch_size, input_size)
            output, hidden = model.generator(
                currTokens, hidden, pad=False)
            currTokens, vocabLogits = softSampleFunction(
                output=output,
                hiddenToVocab=model.hiddenToVocab)
            tokens[:, index, :] = currTokens
            currTokens = currTokens.unsqueeze(1)
            hiddens[:, index, :] = hidden
            
    else:
        for index in range(max_len):
            output, hidden = model.generator(currTokens, hidden, pad=False)
            vocabLogit = model.hiddenToVocab(hidden)
            idxs = vocabLogit[0, : , :].max(1)[1]
            tokens[:, index] = idxs
            currTokens = model.vocabulary(idxs, byWord=False).unsqueeze(1)

    hiddens = torch.cat((h0.transpose(0, 1), hiddens), dim=1)
    # tokens = torch.cat((goEmbedding, tokens), dim=1)
    return hiddens, tokens

In [None]:
tokens = generator_inputs[:, 0, :]
tokens = tokens.unsqueeze(1)
output, hidden = model.generator(tokens, model.originalHiddens, pad=False)

In [None]:
vocabLogit = model.hiddenToVocab(hidden)
idxs = vocabLogit[0, : , :].max(1)[1]
words = [model.vocabulary.id2word[x] for x in idxs]

In [None]:
from src.generate_batches import preprocessSentences
encoder_inputs_t, generator_inputs_t, targets_t, lengths_t = \
            preprocessSentences(list(map(lambda x: x.split(" "), testSents)))

In [None]:
model.vocabulary(idxs).unsqueeze(1) == generator_inputs[:, 1, :].unsqueeze(1)

In [None]:
encoder_inputs, generator_inputs, targets, lenghts = \
            model._sentencesToInputs(testSents)

model.eval_size = len(testSents)
model._computeHiddens(
                encoder_inputs, generator_inputs, labels, lenghts, True)

h_prof, generateWithPrevOutputs = _generateWithPrevOutput(
    model, model.transformedHiddens, model.params.max_len, 32, lenghts, True, soft=False)

In [None]:
sents = []
for i in range(32):
    curr = generateWithPrevOutputs[i, :]
    sents.append(" ".join([model.vocabulary.id2word[int(x)] for x in list(curr)]))
    
sents

# Beam Search Decoding

In [1]:
import torch
from src.parameters import Params
from src.vocabulary import Vocabulary
from src.style_transfer import StyleTransfer
from src.generate_batches import preprocessSentences

params = Params()
vocab = Vocabulary()
vocab.loadVocabulary("data/yelp/vocabulary.pickle")
vocab.initializeEmbeddings(params.embedding_size)
model = StyleTransfer(params, vocab)

checkpoint = torch.load("data/models/yelp/model-2018-06-27-epoch_18-loss_45.287033")
model.load_state_dict(checkpoint)

In [9]:
import re
import numpy as np

with open('data/yelp/dev/negative.txt', 'r') as fp:
    testSents = fp.readlines()[:128]
    
labels = np.array([0] * len(testSents))
testSents = sorted(testSents, key=len, reverse=True)
testSents = list(map(lambda x: x[:-1], testSents))

In [10]:
from src.beam_search import Decoder
model.params.temperature = 1
decoder = Decoder(model, model.params.max_len, 1, model.params)

In [11]:
recon, transf = decoder.rewriteBatch(testSents, labels)

In [16]:
testSents

['the only slightly redeeming qualities about this place is the juke box & restroom .',
 "_num_ seconds later they start telling him they 'll get more from the back .",
 'only one reason for the second star and that was the considerate employees !',
 'they are ok for cleaning the outside , but they rushed the interior detail .',
 'the cash register area was empty and no one was watching the store front .',
 "it 's not really french food and the decor is n't really french either .",
 "paid then they told me they do n't have enough people for their detail .",
 'i stopped him to ask questions and he was condescending and impatient .',
 'i would not recommend this business as i feel they are very dishonest .',
 'easter day nothing open , heard about this place figured it would ok .',
 'management were slow on their end when you need help with anything .',
 'it was obvious it was the same damn one he brought the first time .',
 'short term memory apparently since they were still on main ent

In [15]:
recon

['the only slightly redeeming qualities about this place is the smaller list , vegetables . <eos> <eos> <eos> . <eos>',
 "_num_ full later they start telling him they 'll get more from the back . <eos> <eos> <eos> . <eos>",
 'they are ok for cleaning the outside , but they saw this big product . <eos> <eos> <eos> . <eos>',
 'the cash register area was empty and no one was watching the store joke . <eos> <eos> <eos> . <eos>',
 "it 's not really french food and the decor is n't really french either . <eos> <eos> <eos> . <eos>",
 "paid then they told me they do n't have enough people for their pitas . <eos> <eos> <eos> . <eos>",
 'it was obvious it was the same damn he one brought the first time . <eos> <eos> <eos> <eos> .',
 'we waited over half an hour to get medium hours and then only _num_ . <eos> <eos> . <eos> <eos>',
 'waitress ( both ) was nice but we waited very long for the food . <eos> <eos> . <eos> <eos>',
 'this cost total for a new system was going to be $ about a colleague .

In [12]:
paired = list(zip(testSents, recon, transf))
for x, y, z in paired:
    print(x)
    print(y)
    print(z)
    print("\n\n")

the only slightly redeeming qualities about this place is the juke box & restroom .
the only slightly redeeming qualities about this place is the smaller list , vegetables . <eos> <eos> <eos> . <eos>
the only redeeming facilities for the box end . <eos> <eos> <eos> ) <eos> . <eos> <eos> <eos> . <eos>



_num_ seconds later they start telling him they 'll get more from the back .
_num_ full later they start telling him they 'll get more from the back . <eos> <eos> <eos> . <eos>
_num_ seconds later they 'll please get more from back . <eos> <eos> <eos> . <eos> <eos> . <eos> <eos>



only one reason for the second star and that was the considerate employees !
they are ok for cleaning the outside , but they saw this big product . <eos> <eos> <eos> . <eos>
they are ok than the outside , they handle pre-made . <eos> <eos> <eos> . <eos> <eos> . <eos> <eos>



they are ok for cleaning the outside , but they rushed the interior detail .
the cash register area was empty and no one was watching t