## Importing Libraries

In [13]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import numpy as np
import _pickle as pickle
import gc
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as torch_func

## Importing articles and vector dictionary

In [3]:
with open('final_dump_700k.p', 'rb') as fp:
    articles= pickle.load(fp)

with open('google_dict_700k.p', 'rb') as fp:
    embed= pickle.load(fp)


In [6]:
wordvec=embed[0]
idx2word=embed[1]
word2idx=embed[2]

## Creating Pairs

In [7]:
pairs=[]
for i in range(len(articles['desc'])):
    pairs.append((articles['desc'][i],articles['head'][i]))

## Defining Encoder Parameters and Foward Pass

In [8]:
class encoder(nn.Module):
    def __init__(self, input_size, hidden_size, LSTM_layers=4):
        super(encoder, self).__init__()
        self.LSTM_layers = LSTM_layers
        self.hidden_size = hidden_size
        self.embedding= nn.Embedding(input_size, hidden_size)
        # pretrained_weight is a numpy matrix of shape (num_embeddings, embedding_dim)
        self.embedding.weight.data.copy_(torch.from_numpy(wordvec))
        self.embedding.weight.requires_grad = False
        self.gru = nn.GRU(hidden_size, hidden_size)
        
    def initHidden(self):
        result = Variable(torch.zeros(1, 1, self.hidden_size)).cuda()
        return result

    def forward(self, input_word, hidden):
        output= self.embedding(input_word).view(1,1,-1)
        # LSTM execution
        for i in range(self.LSTM_layers):
            output, hidden = self.gru(output, hidden)
        return output, hidden 

## Defining Decoder Parameters and Foward Pass

In [68]:
class decoder(nn.Module):
    def __init__(self,output_size, hidden_size,LSTM_layers=4):
        super(decoder, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.LSTM_layers = LSTM_layers
        self.embedding= nn.Embedding(self.output_size, self.hidden_size)
        # pretrained_weight is a numpy matrix of shape (num_embeddings, embedding_dim)
        self.embedding.weight.data.copy_(torch.from_numpy(wordvec))
        self.embedding.weight.requires_grad = False
        # max length of input or output=50
        self.local_attn = nn.Linear(self.hidden_size*2, 50)
        self.global_attn = nn.Linear(self.hidden_size*2, self.hidden_size)
        self.dropout = nn.Dropout(0.1)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)
    
    def initHidden(self):
        result = Variable(torch.zeros(1, 1, self.hidden_size)).cuda()
        return result

    def forward(self, input, hidden, encoder_output, encoder_outputs):
        embedded = self.embedding(input).view(1,1,-1)
        embedded = self.dropout(embedded)
        weights = func_torch.softmax(local_attn(torch.cat((embedded[0], hidden[0]), 1)))
        attn = torch.bmm(weights.unsqueeze(0),encoder_outputs.unsqueeze(0))
        output = torch.cat((embedded[0], attn[0]), 1)
        output = self.global_attn(output).unsqueeze(0)

        for i in range(self.LSTM_layers):
            # LSTM execution
            output = func_torch.relu(output)
            output, hidden = self.gru(output, hidden)

        output = torch_func.log_softmax(self.out(output[0]))
        return output, hidden, weights

## Helper functions to create sentence vecs from word vecs

In [71]:

def sentence_vecs(sentence):
    sen=[]
    for word in sentence:
        if word in word2idx:
            sen.append(word2idx[word])
        else:
            sen.append(word2idx[word.lower()])
    return sen        


def inputoutput_pair(pair):
    desc_indexes = sentence_vecs(pair[0])
    desc_indexes.append(1)
    desc = Variable(torch.LongTensor(desc_indexes).view(-1, 1)).cuda()
    head_indexes = sentence_vecs(pair[1])
    head_indexes.append(1)
    head = Variable(torch.LongTensor(head_indexes).view(-1, 1)).cuda() 
    return desc,head

## Defining One train step

In [48]:
def step(desc_senvec, head_senvec, encoder, decoder, encode_optim, decode_optim, loss_criteria):
    max_length=50
    encoder_hidden = encoder.initHidden()
    
   
    encode_optim.zero_grad()
    decode_optim.zero_grad()
    
    desc_length = desc_senvec.size()[0]
    head_length = head_senvec.size()[0]
    
    
    collect_encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size)).cuda()
    decoder_input = Variable(torch.LongTensor([[0]])).cuda()
    

    loss = 0
    #pass through encoder

    for i in range(desc_length):
        #previous hidden state to passed to next
        encoder_output, encoder_hidden = encoder(desc_senvec[i], encoder_hidden)
        collect_encoder_outputs[i] = encoder_output[0][0]

    decoder_hidden = encoder_hidden
    force_teach_ratio=0.5
    force_teach = True if random.random() < force_teach_ratio else False
    #pass through decoder 
    if force_teach:
        # Feed imputs from headline itself
        for i in range(head_length):
            #previous hidden state to passed to next
            decoder_output, decoder_hidden, attention = decoder(decoder_input, decoder_hidden, encoder_output, collect_encoder_outputs)
            loss += loss_criteria(decoder_output,head_senvec[i])
            decoder_input = head_senvec[i]

    else:
        # use its own prediction
        for i in range(head_length):
            #previous hidden state to passed to next
            decoder_output, decoder_hidden, attention = decoder(decoder_input, decoder_hidden, encoder_output, collect_encoder_outputs)
            top, indicies = decoder_output.data.topk(1)
            top_word = indicies[0][0]
            decoder_input = Variable(torch.LongTensor([[top_word]])).cuda()
            loss += loss_criteria(decoder_output,head_senvec[i])
            if top_word == 1:
                break


    loss.backward(retain_graph=False)
    encode_optim.step()
    decode_optim.step()


    return loss.data[0]/head_length


## Optimization

In [73]:

def train(encoder, decoder):
    collected_loss = 0
    
    #filtering out variables that do not require grad
    encoder_parameters= filter(lambda p: p.requires_grad, encoder.parameters())
    decoder_parameters= filter(lambda p: p.requires_grad, decoder.parameters())
    encode_optim = optim.SGD(encoder_parameters, lr=0.01)
    decode_optim = optim.SGD(decoder_parameters, lr=0.01)
    all_training_pairs = [inputoutput_pair(random.choice(pairs))for i in range(9*len(pairs))]
    loss_criteria = nn.NLLLoss()
    
    
    for i in range(len(pairs)):
        print(i)
        training_pair = all_training_pairs[i]
        desc_senvec = training_pair[0]
        head_senvec = training_pair[1]
        loss_data = step(desc_senvec, head_senvec, encoder,decoder, encode_optim, decode_optim, loss_criteria)
        collected_loss += loss_data
        gc.collect()

        if i % 1000 == 0:
            #saving model outputs & after every 1000 iterations
            print_loss_avg = collected_loss / print_every
            collected_loss = 0
            torch.save(encoder.state_dict(), 'modelparam_encode.pkl')
            torch.save(decoder.state_dict(), 'modelparam_decode.pkl')
            boo=predict10(encoder1, decoder1)
            filename='output_'+str(i)+'.txt'
            thefile = open(filename, 'w',encoding="utf-8")
            for item in boo:
                thefile.write("%s\n" %item)
            thefile.close()

## Predict Fuction

In [67]:

def predict(encoder, decoder, sentence):
    max_length=50
    desc_vec = inputoutput_pair(sentence)
    desc_length = desc_vec.size()[0]
    encoder_hidden = encoder.initHidden()
    
    collect_encoder_outputs = Variable(torch.zeros(max_length, encoder.hidden_size)).cuda()
    decoder_input = Variable(torch.LongTensor([[0]])).cuda()
    decoded_sentence = []
    
    #pass through encoder
    for i in range(input_length):
        #previous hidden state to passed to next
        encoder_output, encoder_hidden = encoder(desc_vec[i],encoder_hidden)
        collect_encoder_outputs[i] = collect_encoder_outputs[i] + encoder_output[0][0]
        
    decoder_hidden = encoder_hidden
    #pass through decoder
    for i in range(max_length):
        #previous hidden state to passed to next
        decoder_output, decoder_hidden, attention = decoder(decoder_input, decoder_hidden, encoder_output, collect_encoder_outputs)
        decoder_attentions[i] = attention.data
        top,indices = decoder_output.data.topk(1)
        top_word = indicies[0][0]
        if top_word == 1:
            decoded_sentence.append(idx2word[top_word])
            break
        else:
            decoded_sentence.append(idx2word[top_word])

        decoder_input = Variable(torch.LongTensor([[top_word]])).cuda

    return decoded_sentence

In [72]:

def predict10(encoder, decoder):
    lst=[]
    string=''
    for i in range(10):
        pair = random.choice(pairs)
        output_words, attentions = predict(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words)
        string='desc='+str(pair[0])+'/n'+'head='+str(pair[1])+'/n'+'pred_head='+str(output_sentence)
        lst.append(string)
    return lst

## Making Encoder and Decoder objects

In [63]:

hidden_size = 300
encoder1 = encoder(len(word2idx), hidden_size).cuda()
decoder1 = decoder(len(word2idx), hidden_size).cuda()


## Execution!!!

In [64]:
train(encoder1,decoder1)