## Importing Libraries

In [13]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import numpy as np
import _pickle as pickle
import gc
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as torch_func

## Importing articles and vector dictionary

In [3]:
with open('final_dump_700k.p', 'rb') as fp:
    articles= pickle.load(fp)

with open('google_dict_700k.p', 'rb') as fp:
    embed= pickle.load(fp)


In [6]:
wordvec=embed[0]
idx2word=embed[1]
word2idx=embed[2]

## Creating Pairs

In [7]:
pairs=[]
for i in range(len(articles['desc'])):
    pairs.append((articles['desc'][i],articles['head'][i]))

## Defining Encoder Parameters and Foward Pass

In [8]:
class encoder(nn.Module):
    def __init__(self, input_size, hidden_size, LSTM_layers=4):
        super(encoder, self).__init__()
        self.LSTM_layers = LSTM_layers
        self.hidden_size = hidden_size
        self.embedding= nn.Embedding(input_size, hidden_size)
        # pretrained_weight is a numpy matrix of shape (num_embeddings, embedding_dim)
        self.embedding.weight.data.copy_(torch.from_numpy(wordvec))
        self.embedding.weight.requires_grad = False
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input_word, hidden):
        output= self.embedding(input_word).view(1,1,-1)
        for i in range(self.LSTM_layers):
            output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        result = Variable(torch.zeros(1, 1, self.hidden_size)).cuda()

## Defining Decoder Parameters and Foward Pass

In [11]:
class decoder(nn.Module):
    def __init__(self, hidden_size, output_size, LSTM_layers=4):
        super(decoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.LSTM_layers = LSTM_layers
        self.embedding= nn.Embedding(self.output_size, self.hidden_size)
        # pretrained_weight is a numpy matrix of shape (num_embeddings, embedding_dim)
        self.embedding.weight.data.copy_(torch.from_numpy(wordvec))
        self.embedding.weight.requires_grad = False
        # max length of input or output=50
        self.attn = nn.Linear(self.hidden_size*2, 50)
        self.attn_combine = nn.Linear(self.hidden_size*2, self.hidden_size)
        self.dropout = nn.Dropout(0.1)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_output, encoder_outputs):
        embedded = self.embedding(input).view(1,1,-1)
        embedded = self.dropout(embedded)
        attn_weights = func_torch.softmax(self.attn(torch.cat((embedded[0], hidden[0]), 1)))
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),encoder_outputs.unsqueeze(0))
        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        for i in range(self.LSTM_layers):
            output = func_torch.relu(output)
            output, hidden = self.gru(output, hidden)

        output = torch_func.log_softmax(self.out(output[0]))
        return output, hidden, attn_weights

    def initHidden(self):
        result = Variable(torch.zeros(1, 1, self.hidden_size)).cuda()

## Helper functions to create sentence vecs from word vecs

In [47]:
EOS=1
def sentence_vecs(sentence):
    sen=[]
    for word in sentence:
        if word in word2idx:
            sen.append(word2idx[word])
        else:
            sen.append(word2idx[word.lower()])
    return sen        


def inputoutput_pair(pair):
    input_indexes = sentence_vecs(pair[0])
    input_indexes.append(EOS_token)
    input = Variable(torch.LongTensor(input_indexes).view(-1, 1)).cuda()
    output_indexes = sentence_vecs(pair[1])
    output_indexes.append(EOS_token)
    output = Variable(torch.LongTensor(output_indexes).view(-1, 1)).cuda() 
    return input,output

## Defining One train step

In [48]:
def train(input_variable, target_variable, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=50):
    encoder_hidden = encoder.initHidden()
   
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    input_length = input_variable.size()[0]
    target_length = target_variable.size()[0]

  

    encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size))
    encoder_outputs = encoder_outputs.cuda() 

    loss = 0

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_variable[ei], encoder_hidden)
        encoder_outputs[ei] = encoder_output[0][0]

    decoder_input = Variable(torch.LongTensor([[SOS_token]]))
    decoder_input = decoder_input.cuda() if use_cuda else decoder_input

    decoder_hidden = encoder_hidden

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False

    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_output, encoder_outputs)
            loss += criterion(decoder_output, target_variable[di])
            decoder_input = target_variable[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(decoder_input, decoder_hidden, encoder_output, encoder_outputs)
            topv, topi = decoder_output.data.topk(1)
            ni = topi[0][0]
            decoder_input = Variable(torch.LongTensor([[ni]]))
            decoder_input = decoder_input.cuda() if use_cuda else decoder_input

            loss += criterion(decoder_output, target_variable[di])
            if ni == EOS_token:
                break


    loss.backward(retain_graph=False)
    encoder_optimizer.step()
    decoder_optimizer.step()


    return loss.data[0]/target_length


## Optimization

In [49]:

def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every
    
    
    encoder_parameters= filter(lambda p: p.requires_grad, encoder.parameters())
    decoder_parameters= filter(lambda p: p.requires_grad, decoder.parameters())
    encoder_optimizer = optim.SGD(encoder_parameters, lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder_parameters, lr=learning_rate)
    training_pairs = [variablesFromPair(random.choice(pairs))
                       for i in range(n_iters)]
    criterion = nn.NLLLoss()

    many=0
    
    
    for iter in range(1, n_iters + 1):
        print(iter)
        training_pair = training_pairs[iter - 1]
        input_variable = training_pair[0]
        target_variable = training_pair[1]

        loss = train(input_variable, target_variable, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion)

        
        print_loss_total += loss
        plot_loss_total += loss

        if iter % print_every == 0:
            print_loss_avg = print_loss_total / print_every
            print_loss_total = 0
            print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                         iter, iter / n_iters * 100, print_loss_avg))
            torch.save(encoder, 'modelencode_adam.pkl')
            torch.save(encoder.state_dict(), 'modelparam_encode_adam.pkl')
            torch.save(decoder, 'modeldecode_adam.pkl')
            torch.save(decoder.state_dict(), 'modelparam_decode_adam.pkl')
            boo=evaluateRandomly(encoder1, attn_decoder1)
            filename='output_adam_present_out_'+str(iter)+'.txt'
            thefile = open(filename, 'w',encoding="utf-8")
            for item in boo:
                thefile.write("%s\n" %item)
            thefile.close()

In [50]:

def evaluate(encoder, decoder, sentence, max_length=50):
    input_variable = variableFromSentence(sentence)
    input_length = input_variable.size()[0]
    encoder_hidden = encoder.initHidden()

    encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size))
    encoder_outputs = encoder_outputs.cuda() if use_cuda else encoder_outputs

    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_variable[ei],
                                                 encoder_hidden)
        encoder_outputs[ei] = encoder_outputs[ei] + encoder_output[0][0]

    decoder_input = Variable(torch.LongTensor([[SOS_token]]))  # SOS
    decoder_input = decoder_input.cuda() if use_cuda else decoder_input

    decoder_hidden = encoder_hidden

    decoded_words = []
    decoder_attentions = torch.zeros(max_length, max_length)

    for di in range(max_length):
        decoder_output, decoder_hidden, decoder_attention = decoder(
            decoder_input, decoder_hidden, encoder_output, encoder_outputs)
        decoder_attentions[di] = decoder_attention.data
        topv, topi = decoder_output.data.topk(1)
        ni = topi[0][0]
##        if ni>1000000 or ni<0:
##            ni=2
        if ni == EOS_token:
            decoded_words.append('<EOS>')
            break
        else:
            decoded_words.append(idx2word[ni])

        decoder_input = Variable(torch.LongTensor([[ni]]))
        decoder_input = decoder_input.cuda() if use_cuda else decoder_input

    return decoded_words, decoder_attentions[:di + 1]

In [None]:

def evaluateRandomly(encoder, decoder, n=10):
    lst=[]
    string=''
    for i in range(n):
        pair = random.choice(pairs)
        output_words, attentions = evaluate(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        string='>'+str(pair[0])+'/n'+'='+str(pair[1])+'/n'+'<'+str(output_sentence)
        #print('>', pair[0])
        #print('=', pair[1])
        #output_words, attentions = evaluate(encoder, decoder, pair[0])
        #output_sentence = ' '.join(output_words)
        #print('<', output_sentence)
        #print('')
        lst.append(string)
    return lst

   


# In[25]:


hidden_size = 300
encoder1 = EncoderRNN(len(word2idx), hidden_size)
attn_decoder1 = AttnDecoderRNN(hidden_size, len(word2idx),
                               1, dropout_p=0.1)

if use_cuda:
    encoder1 = encoder1.cuda()
    attn_decoder1 = attn_decoder1.cuda()



# trainIters(encoder1, attn_decoder1, 1, print_every=1)


# In[6]:


trainIters(encoder1, attn_decoder1,len(articles['desc']), print_every=1000)


# In[ ]:


evaluateRandomly(encoder1, attn_decoder1)
