In [2]:
import torch.nn.utils
# %% packed sequences
a = torch.Tensor([1, 2, 3])
b = torch.Tensor([4, 5])
c = torch.Tensor([6])
lengths = list(map(len, [a, b, c]))
padded= torch.nn.utils.rnn.pad_sequence([a, b, c], batch_first=True)
packed_padded =torch.nn.utils.rnn.pack_padded_sequence(padded, lengths) # non ho messo batch first da qui in poi
repadded = torch.nn.utils.rnn.pad_packed_sequence(packed_padded)[0]
repacked = torch.nn.utils.rnn.pack_padded_sequence(repadded, lengths)
print([a, b, c])
print(padded)
print(packed_padded)
print(repadded)
print(repacked)

[tensor([ 1.,  2.,  3.]), tensor([ 4.,  5.]), tensor([ 6.])]
tensor([[ 1.,  2.,  3.],
        [ 4.,  5.,  0.],
        [ 6.,  0.,  0.]])
PackedSequence(data=tensor([ 1.,  2.,  3.,  4.,  5.,  6.]), batch_sizes=tensor([ 3,  2,  1]))
tensor([[ 1.,  2.,  3.],
        [ 4.,  5.,  0.],
        [ 6.,  0.,  0.]])
PackedSequence(data=tensor([ 1.,  2.,  3.,  4.,  5.,  6.]), batch_sizes=tensor([ 3,  2,  1]))


In [1]:
from src.parameters import Params
from src.vocabulary import Vocabulary
from src.style_transfer import StyleTransfer
from src.greedy_decoding import Decoder
from src.generate_batches import preprocessSentences

In [2]:
params = Params()
vocab = Vocabulary()
vocab.loadVocabulary("data/yelp/vocabulary.pickle")
vocab.initializeEmbeddings(params.embedding_size)
model = StyleTransfer(params, vocab)

In [3]:
import torch
checkpoint = torch.load("data/models/yelp/model-2018-06-26-epoch_19-loss_68.143707")
model.load_state_dict(checkpoint)

In [4]:
import numpy as np

with open('data/yelp/dev/negative.txt', 'r') as fp:
    testSents = fp.readlines()[:16]
    
labels = np.array([0] * len(testSents))

In [5]:
decoder = Decoder(model, 20, 12, params)

In [6]:
encoder_inputs, generator_inputs, targets, lenghts = \
            model._sentencesToInputs(testSents)

In [7]:
model.eval_size = 16
model._computeHiddens(
                encoder_inputs, generator_inputs, labels, lenghts, True)
generatorOutputs, h_teacher = model._generateTokens(
            generator_inputs, model.originalHiddens, lenghts, True)

In [21]:
packedGenOutput = torch.nn.utils.rnn.pack_padded_sequence(
    generatorOutputs, lenghts)[0] # PROVA CON batch_first = True, sballa l'ordine!

model.rec_loss_criterion(
    packedGenOutput.view(-1, model.vocabulary.vocabSize),
    targets.view(-1))

tensor(24.5104, device='cuda:0')

In [22]:
sents = []
for index in range(16):
    tokensLogits = generatorOutputs[index, :, :]
    sent = []
    len = lenghts[index]
    for j in range(len):
        logit = tokensLogits[j, :]
        sent.append(model.vocabulary.id2word[logit.argmax()])
    sents.append(" ".join(sent))
sents        

['it was obvious it was the same damn one he brought the first time . .',
 'major day nothing open , heard about this place figured it ok ok . .',
 'the host that walked us to the table and left without to word hello .',
 'the last couple years this place has been going smoothly hill to detailing',
 'last night however it was way to thick and tasteless power !',
 'i tried to eat it but it was disgusting appropriately !',
 "i pushed it by and did n't eat anymore than the",
 'ok never going back to this place again to !',
 'no sign of the manager ! !',
 'it smelled like rotten pretentious . .',
 'i will never be back . .',
 'it just gets worse . .',
 'the food tasted awful . .',
 'i am not exaggerating madison !',
 'this smelled bad yourself .',
 'it tasted horrible neighborhood word']

In [23]:
print(packedGenOutput.shape)
print(targets.shape[0])


torch.Size([152, 9603])
152


In [24]:
targets

tensor([   14,  4569,     5,     5,   262,     7,     7,   302,    56,
           14,     7,    14,     5,     7,    15,    14,    11,   222,
         1818,   262,   205,   324,  3793,    98,   888,  1081,    51,
           57,    17,   177,  1081,   291,  2254,   168,    36,   674,
          288,    13,    14,   127,    22,    71,    98,   510,   291,
           28,   107,   125,    14,   449,   337,   265,    14,   169,
         2135,    53,     5,  2865,    48,   421,   308,  3925,     3,
            3,    11,     9,   134,    15,    11,    14,     6,    13,
          328,  3916,    53,     3,     3,     3,     2,     2,     5,
         1347,    13,    19,   167,    40,    75,    15,     3,     3,
            3,     2,     2,     2,   367,   106,     5,    83,    13,
           14,    31,    19,     2,     2,     2,   986,    15,   399,
           86,  1266,    11,   169,    89,    67,    19,     6,   127,
            6,   435,   934,     3,    87,  2410,   263,   194,   785,
      

In [25]:
sents = []
for index in range(packedGenOutput.shape[0]):
    tokensLogits = packedGenOutput[index]
    sent = []
    sent.append(model.vocabulary.id2word[tokensLogits.argmax()])
    sents.append(" ".join(sent))
sents  

['it',
 'was',
 'obvious',
 'it',
 'was',
 'the',
 'same',
 'damn',
 'one',
 'he',
 'brought',
 'the',
 'first',
 'time',
 '.',
 '.',
 'major',
 'day',
 'nothing',
 'open',
 ',',
 'heard',
 'about',
 'this',
 'place',
 'figured',
 'it',
 'ok',
 'ok',
 '.',
 '.',
 'the',
 'the',
 'host',
 'that',
 'walked',
 'us',
 'to',
 'the',
 'table',
 'and',
 'left',
 'without',
 'to',
 'word',
 'hello',
 '.',
 'the',
 'the',
 'last',
 'couple',
 'years',
 'this',
 'place',
 'has',
 'been',
 'going',
 'smoothly',
 'hill',
 'to',
 'detailing',
 'the',
 'the',
 'the',
 'last',
 'night',
 'however',
 'it',
 'was',
 'way',
 'to',
 'thick',
 'and',
 'tasteless',
 'power',
 '!',
 'the',
 'the',
 'the',
 'the',
 'i',
 'tried',
 'to',
 'eat',
 'it',
 'but',
 'it',
 'was',
 'disgusting',
 'appropriately',
 '!',
 'the',
 'the',
 'the',
 'i',
 'pushed',
 'it',
 'by',
 'and',
 'did',
 "n't",
 'eat',
 'anymore',
 'than',
 'the',
 'ok',
 'never',
 'going',
 'back',
 'to',
 'this',
 'place',
 'again',
 'no',
 'si

In [13]:
sents = []
for index in range(targets.shape[0]):
    word = targets[index]
    sent = []
    print(word)
    sent.append(model.vocabulary.id2word[word])
    sents.append(" ".join(sent))
sents 

tensor(14, device='cuda:0')
tensor(4569, device='cuda:0')
tensor(5, device='cuda:0')
tensor(5, device='cuda:0')
tensor(262, device='cuda:0')
tensor(7, device='cuda:0')
tensor(7, device='cuda:0')
tensor(302, device='cuda:0')
tensor(56, device='cuda:0')
tensor(14, device='cuda:0')
tensor(7, device='cuda:0')
tensor(14, device='cuda:0')
tensor(5, device='cuda:0')
tensor(7, device='cuda:0')
tensor(15, device='cuda:0')
tensor(14, device='cuda:0')
tensor(11, device='cuda:0')
tensor(222, device='cuda:0')
tensor(1818, device='cuda:0')
tensor(262, device='cuda:0')
tensor(205, device='cuda:0')
tensor(324, device='cuda:0')
tensor(3793, device='cuda:0')
tensor(98, device='cuda:0')
tensor(888, device='cuda:0')
tensor(1081, device='cuda:0')
tensor(51, device='cuda:0')
tensor(57, device='cuda:0')
tensor(17, device='cuda:0')
tensor(177, device='cuda:0')
tensor(1081, device='cuda:0')
tensor(291, device='cuda:0')
tensor(2254, device='cuda:0')
tensor(168, device='cuda:0')
tensor(36, device='cuda:0')
tenso

['it',
 'easter',
 'the',
 'the',
 'last',
 'i',
 'i',
 'ok',
 'no',
 'it',
 'i',
 'it',
 'the',
 'i',
 'this',
 'it',
 'was',
 'day',
 'host',
 'last',
 'night',
 'tried',
 'pushed',
 'never',
 'sign',
 'smelled',
 'will',
 'just',
 'food',
 'am',
 'smelled',
 'tasted',
 'obvious',
 'nothing',
 'that',
 'couple',
 'however',
 'to',
 'it',
 'going',
 'of',
 'like',
 'never',
 'gets',
 'tasted',
 'not',
 'bad',
 'horrible',
 'it',
 'open',
 'walked',
 'years',
 'it',
 'eat',
 'aside',
 'back',
 'the',
 'rotten',
 'be',
 'worse',
 'awful',
 'exaggerating',
 '<unk>',
 '<unk>',
 'was',
 ',',
 'us',
 'this',
 'was',
 'it',
 'and',
 'to',
 'manager',
 'urine',
 'back',
 '<unk>',
 '<unk>',
 '<unk>',
 '<eos>',
 '<eos>',
 'the',
 'heard',
 'to',
 'place',
 'way',
 'but',
 'did',
 'this',
 '<unk>',
 '<unk>',
 '<unk>',
 '<eos>',
 '<eos>',
 '<eos>',
 'same',
 'about',
 'the',
 'has',
 'to',
 'it',
 "n't",
 'place',
 '<eos>',
 '<eos>',
 '<eos>',
 'damn',
 'this',
 'table',
 'been',
 'thick',
 'was'

In [14]:
testSents

['ok never going back to this place again .\n',
 'easter day nothing open , heard about this place figured it would ok .\n',
 'the host that walked us to the table and left without a word .\n',
 'it just gets worse .\n',
 'the food tasted awful .\n',
 'no sign of the manager .\n',
 'the last couple years this place has been going down hill .\n',
 'last night however it was way to thick and tasteless .\n',
 'it smelled like rotten urine .\n',
 'i am not exaggerating .\n',
 'this smelled bad !\n',
 'it was obvious it was the same damn one he brought the first time .\n',
 'i tried to eat it but it was disgusting .\n',
 'it tasted horrible !\n',
 "i pushed it aside and did n't eat anymore .\n",
 'i will never be back .\n']

In [15]:
targets

tensor([   14,  4569,     5,     5,   262,     7,     7,   302,    56,
           14,     7,    14,     5,     7,    15,    14,    11,   222,
         1818,   262,   205,   324,  3793,    98,   888,  1081,    51,
           57,    17,   177,  1081,   291,  2254,   168,    36,   674,
          288,    13,    14,   127,    22,    71,    98,   510,   291,
           28,   107,   125,    14,   449,   337,   265,    14,   169,
         2135,    53,     5,  2865,    48,   421,   308,  3925,     3,
            3,    11,     9,   134,    15,    11,    14,     6,    13,
          328,  3916,    53,     3,     3,     3,     2,     2,     5,
         1347,    13,    19,   167,    40,    75,    15,     3,     3,
            3,     2,     2,     2,   367,   106,     5,    83,    13,
           14,    31,    19,     2,     2,     2,   986,    15,   399,
           86,  1266,    11,   169,    89,    67,    19,     6,   127,
            6,   435,   934,     3,    87,  2410,   263,   194,   785,
      

In [None]:
orig, tsf = decoder.rewriteBatch(testSents[:2], labels[:2])

In [None]:
any(torch.FloatTensor([1, 0]))

In [None]:
sent = 'the restaurant was nice but food tasted ugly'.split()
h = torch.zeros(700).to('cuda')
h = h.unsqueeze(0).unsqueeze(0)
for token in sent:
    emb = vocab([token])
    emb = emb.unsqueeze(1)
    out, h = model.generator(emb, h, pad=False)
    voc = model.hiddenToVocab(out)
    _, id = voc.max(2)
    h = h
    print(vocab.id2word[int(id)])

In [None]:
model.hiddenToVocab(out).max(2)

In [None]:
emb.size()