In [1]:
import torch
from torch.autograd import Variable
import random
from model import *
from util import *
from config import USE_CUDA
import sys
import os
from config import MAX_LENGTH, USE_CUDA, teacher_forcing_ratio, save_dir
from masked_cross_entropy import *
import itertools
import random
import math
from tqdm import tqdm
from load import SOS_token, EOS_token, PAD_token, UNK_token
from model import EncoderRNN, LuongAttnDecoderRNN, AttributeEncoder, Attn, AttributeAttn
import pickle
import logging
logging.basicConfig(level=logging.INFO)

 
class Sentence:
    def __init__(self, decoder_hidden, last_idx=SOS_token, sentence_idxes=[], sentence_scores=[]):
        if(len(sentence_idxes) != len(sentence_scores)):
            raise ValueError("length of indexes and scores should be the same")
        self.decoder_hidden = decoder_hidden
        self.last_idx = last_idx
        self.sentence_idxes =  sentence_idxes
        self.sentence_scores = sentence_scores

    def avgScore(self):
        if len(self.sentence_scores) == 0:
            raise ValueError("Calculate average score of sentence, but got no word")
        return sum(self.sentence_scores) / len(self.sentence_scores)
        # return mean of sentence_score

    def addTopk(self, topi, topv, decoder_hidden, beam_size, voc):
        topi = topi.squeeze(0)
        topv = topv.squeeze(0)
        topv = torch.log(topv)
        terminates, sentences = [], []
        for i in range(beam_size):
            if topi[0][i] == EOS_token:
                terminates.append(([voc.idx2word[idx] for idx in self.sentence_idxes] + ['<eos>'], 
                                   self.avgScore())) # tuple(word_list, score_float) 
                continue
            idxes = self.sentence_idxes[:] # pass by value
            scores = self.sentence_scores[:] # pass by value
            idxes.append(topi[0][i])
            scores.append(topv[0][i])
            sentences.append(Sentence(decoder_hidden, topi[0][i], idxes, scores))
        return terminates, sentences

    def toWordScore(self, voc):
        words = []
        for i in range(len(self.sentence_idxes)):
            if self.sentence_idxes[i] == EOS_token:
                words.append('<eos>')
            else:
                words.append(voc.idx2word[self.sentence_idxes[i]])
        if self.sentence_idxes[-1] != EOS_token:
            words.append('<eos>')
        return (words, self.avgScore())

def beam_decode(decoder, decoder_hidden, encoder_out1, encoder_out2, encoder_out3, voc, beam_size, max_length=MAX_LENGTH):
    terminal_sentences, prev_top_sentences, next_top_sentences = [], [], []
    prev_top_sentences.append(Sentence(decoder_hidden))
    for t in range(max_length):
        for sentence in prev_top_sentences:
            decoder_input = Variable(torch.LongTensor([[sentence.last_idx]]))
            decoder_input = decoder_input.cuda() if USE_CUDA else decoder_input

            decoder_output, decoder_hidden, attn1, attn2, attn3, gate = decoder(decoder_input, decoder_hidden, encoder_out1, encoder_out2, encoder_out3, encoder_out4)

            topv, topi = decoder_output.data.topk(beam_size)
            term, top = sentence.addTopk(topi, topv, decoder_hidden, beam_size, voc)
            terminal_sentences.extend(term)
            next_top_sentences.extend(top)

        next_top_sentences.sort(key=lambda s: s.avgScore(), reverse=True)
        prev_top_sentences = next_top_sentences[:beam_size]
        next_top_sentences = []

    terminal_sentences += [sentence.toWordScore(voc) for sentence in prev_top_sentences]
    terminal_sentences.sort(key=lambda x: x[1], reverse=True)

    n = min(len(terminal_sentences), 15)
    return terminal_sentences[:n]

In [95]:
n_layers, hidden_size, reverse, modelFile, beam_size, corpus = 2, 512, False, "/data2/jianmo/amazon/Electronics/expansion/model/2_512/9_lexicon_title_expansion_model.tar" \
, 1, "Electronics"

In [85]:
voc, pairs, valid_pairs, test_pairs = loadPrepareData(corpus) 

Start loading training data ...


In [96]:
print('Building encoder and decoder ...')
# aspect
with open(os.path.join(save_dir, '15_aspect.pkl'), 'rb') as fp:
    aspect_ids = pickle.load(fp)
aspect_num = 15 # 15 | 20 main aspects and each of them has 100 words
aspect_ids = Variable(torch.LongTensor(aspect_ids), requires_grad=False) # convert list into torch Variable, used to index word embedding
# attribute embeddings
attr_size = 64 # 
attr_num = 2

with open(os.path.join(save_dir, 'user_item.pkl'), 'rb') as fp:
    user_dict, item_dict = pickle.load(fp)
num_user = len(user_dict)
num_item = len(item_dict)
attr_embeddings = []
attr_embeddings.append(nn.Embedding(num_user, attr_size))
attr_embeddings.append(nn.Embedding(num_item, attr_size))
aspect_embeddings = []
aspect_embeddings.append(nn.Embedding(num_user, aspect_num))
aspect_embeddings.append(nn.Embedding(num_item, aspect_num))
if USE_CUDA:
    for attr_embedding in attr_embeddings:
        attr_embedding = attr_embedding.cuda()
    for aspect_embedding in aspect_embeddings:
        aspect_embedding = aspect_embedding.cuda()
    aspect_ids = aspect_ids.cuda()

encoder1 = AttributeEncoder(attr_size, attr_num, hidden_size, attr_embeddings, n_layers)
encoder2 = AttributeEncoder(aspect_num, attr_num, hidden_size, aspect_embeddings, n_layers)
embedding = nn.Embedding(voc.n_words, hidden_size)
encoder3 = EncoderRNN(voc.n_words, hidden_size, embedding, n_layers)

attn_model = 'dot'
decoder = LuongAttnDecoderRNN(attn_model, embedding, hidden_size, attr_size, voc.n_words, aspect_ids, n_layers)
checkpoint = torch.load(modelFile)
encoder1.load_state_dict(checkpoint['en1'])
encoder2.load_state_dict(checkpoint['en2'])
encoder3.load_state_dict(checkpoint['en3'])
decoder.load_state_dict(checkpoint['de'])

# use cuda
if USE_CUDA:
    encoder1 = encoder1.cuda()
    encoder2 = encoder2.cuda()
    encoder3 = encoder3.cuda()
    decoder = decoder.cuda()

# train mode set to false, effect only on dropout, batchNorm
encoder1.train(False);
encoder2.train(False);
encoder3.train(False);
decoder.train(False);

Building encoder and decoder ...


In [97]:
cands = []
for pair in test_pairs:
    u = pair[0][0]
    if u == 70451:
        cands.append(pair)

In [98]:
print(cands[0])

[[70451, 45320, ['easy', 'to', 'use', 'and', 'nice', 'standard', 'apps', '.'], ['samsung', 'galaxy', 'tab', '2', '(', '10.1-inch', ',', 'wi-fi', ')', '2012', 'model']], ['<str>', 'the', 'display', 'is', 'beautiful', 'and', 'the', 'tablet', 'is', 'very', 'easy', 'to', 'use', '.', 'it', 'comes', 'with', 'some', 'really', 'nice', 'standard', 'apps', '.', '<eos>']]


In [110]:
pair, beam_size, max_length = cands[0][0], 1, MAX_LENGTH

In [111]:
sentence = pair[:2] # (user_id, item_id)
attr_input = Variable(torch.LongTensor([sentence]), volatile=True)
attr_input = attr_input.cuda() if USE_CUDA else attr_input

sentence = pair[2] # summary
indexes_batch = [indexesFromSentence(voc, sentence)] #[1, seq_len]
summary_input_lengths = [len(indexes) for indexes in indexes_batch]
summary_input = Variable(torch.LongTensor(indexes_batch), volatile=True).transpose(0, 1)
summary_input = summary_input.cuda() if USE_CUDA else input_batch

sentence = pair[3] # title
indexes_batch = [indexesFromSentence(voc, sentence)] #[1, seq_len]
title_input_lengths = [len(indexes) for indexes in indexes_batch]
title_input = Variable(torch.LongTensor(indexes_batch), volatile=True).transpose(0, 1)
title_input = title_input.cuda() if USE_CUDA else input_batch

encoder_out1, encoder_out2, encoder_hidden = encoder3(summary_input, summary_input_lengths, title_input, title_input_lengths, None) # summary encoder
encoder_out3, encoder1_hidden = encoder1(attr_input) # attribute encoder
encoder_out4, encoder2_hidden = encoder2(attr_input) # aspect encoder

decoder_hidden = encoder_hidden[:decoder.n_layers] + encoder1_hidden[:decoder.n_layers] + encoder2_hidden[:decoder.n_layers]

In [112]:
decoder_input = Variable(torch.LongTensor([[SOS_token]]))
decoder_input = decoder_input.cuda() if USE_CUDA else decoder_input

decoded_words = []

In [113]:
attn1s = []
attn2s = []
attn3s = []
attn4s = []
outputs = []

In [114]:
for di in range(max_length):
    decoder_output, decoder_hidden, attn1, attn2, attn3, gate = decoder(decoder_input, decoder_hidden, encoder_out1, encoder_out2, encoder_out3, encoder_out4)
    
    attn1s.append(attn1)
    attn2s.append(attn2)
    attn3s.append(attn3)
    attn4s.append(gate)
    outputs.append(decoder_output.data)
    topv, topi = decoder_output.data.topk(3)
    topi = topi.squeeze(0)
    topv = topv.squeeze(0)
    ni = topi[0][0]
    if ni == EOS_token:
        decoded_words.append('<eos>')
        break
    else:
        decoded_words.append(voc.idx2word[ni])

    decoder_input = Variable(torch.LongTensor([[ni]]))
    decoder_input = decoder_input.cuda() if USE_CUDA else decoder_input

In [115]:
print(decoded_words)

['i', 'love', 'this', 'tablet', '.', 'it', 'is', 'easy', 'to', 'use', 'and', 'the', 'screen', 'is', 'very', 'responsive', '.', 'i', 'love', 'the', 'fact', 'that', 'it', 'has', 'a', 'micro', 'sd', 'slot', '.', 'i', 'have', 'no', 'complaints', '.', '<eos>']


In [125]:
outputs[12].topk(3)

(
 (0 ,.,.) = 
   9.8674  9.0436  8.6628
 [torch.cuda.FloatTensor of size 1x1x3 (GPU 0)], 
 (0 ,.,.) = 
    131   959  1032
 [torch.cuda.LongTensor of size 1x1x3 (GPU 0)])

In [127]:
voc.idx2word[131]

'screen'

In [128]:
attn4s[12]

Variable containing:
(0 ,.,.) = 

Columns 0 to 8 
  -0.9999  0.9786  0.9999 -0.9976  0.9996  0.1774 -0.9981  0.9965 -0.9996

Columns 9 to 14 
   0.9594  0.9985 -0.9996  1.0000 -1.0000  0.9964
[torch.cuda.FloatTensor of size 1x1x15 (GPU 0)]

In [129]:
attn1s[12]

Variable containing:
(0 ,.,.) = 
  0.2056  0.1267  0.1529  0.0957  0.1474  0.1326  0.0787  0.0605
[torch.cuda.FloatTensor of size 1x1x8 (GPU 0)]

In [130]:
attn2s[12]

Variable containing:
(0 ,.,.) = 

Columns 0 to 8 
   0.1823  0.1599  0.1670  0.1297  0.0855  0.0874  0.0472  0.0550  0.0453

Columns 9 to 10 
   0.0258  0.0148
[torch.cuda.FloatTensor of size 1x1x11 (GPU 0)]

In [131]:
attn3s[12]

Variable containing:
(0 ,.,.) = 
  0.4686  0.5314
[torch.cuda.FloatTensor of size 1x1x2 (GPU 0)]