In [15]:
import torch
from torchtext.datasets import BABI20
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
import torch.nn.init as I
import numpy as np
from torch.optim.lr_scheduler import StepLR
import re
from functools import reduce 

In [2]:
def dataloader(batch_size, memory_size, task, joint, tenK):
    train_iter, valid_iter, test_iter = BABI20.iters(
        batch_size=batch_size, memory_size=memory_size, task=task, joint=joint, tenK=tenK, device=torch.device("cpu"),
    shuffle=True)
    return train_iter, valid_iter, test_iter, train_iter.dataset.fields['query'].vocab

In [3]:
train_iter, valid_iter, test_iter, vocab = dataloader(64, 50, 6, True, False)

In [103]:
def tokenize(sent):
    return [ x.strip() for x in re.split(r'(\W+)', sent) if x.strip()]

def parse_stories(lines):
    data = []
    story = []
    for line in lines:
        #line = line.decode('utf-8').strip()
        nid, line = line.split(' ', 1)
        nid = int(nid)
        if nid == 1:
            # reset story when line ID=1 (start of new story)
            story = []
        if '\t' in line:
            # this line is tab separated Q, A &amp;amp;amp;amp;amp; support fact ID
            q, a, supporting = line.split('\t')
            # tokenize the words of question
            q = tokenize(q)
            a = tokenize(a)
            # Provide all the sub-stories till this question
            substory = [x for x in story if x]
            # A story ends and is appended to global story data-set
            data.append((substory, q, a))
            story.append('')
        else:
            # this line is a sentence of story
            sent = tokenize(line)
            story.append(sent)
    return data

def get_stories(f):
    # read the data file and parse 10k stories
    data = parse_stories(f.readlines())
    # lambda func to flatten the list of sentences into one list
    flatten = lambda data: reduce(lambda x, y: x + y, data)
    # creating list of tuples for each story
    data = [(flatten(story), q, answer) for story, q, answer in data]
    #data = [((story), q, answer) for story, q, answer in data]
    return data

with open('./tasks/task_1.txt') as f:
    all_stories = get_stories(f)
    
# train_stories, test_stories = train_test_split(all_stories, test_size=0.2)

In [104]:
def merge_question_story(data):
    merged_data = [(s+['SOQ']+q, a) for s,q,a in data]
    return merged_data
        

In [155]:
SOS_token = 0
EOS_token = 1


class Lang:
    def __init__(self):
        self.word2index = {}
        self.word2count = {}
        self.index2word = {0: "SOS", 1: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence:
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.word2count[word] = 1
            self.index2word[self.n_words] = word
            self.n_words += 1
        else:
            self.word2count[word] += 1
            
def readLangs(all_stories):
    print("Reading data...")

    pairs = merge_question_story(all_stories)

    # Reverse pairs, make Lang instances
    lang = Lang()
    return lang, pairs

In [156]:
def prepareData(all_stories):
    lang, pairs = readLangs(all_stories)
    print("Read %s sentence pairs" % len(pairs))
    for pair in pairs:
        lang.addSentence(pair[0])
        lang.addSentence(pair[1])
    print("Counted words:")
    print(lang.n_words)
    return lang, pairs

In [157]:
lang, pairs = prepareData(all_stories)
print(random.choice(pairs))

Reading data...
Read 5000 sentence pairs
Counted words:
23
(['Mary', 'went', 'to', 'the', 'garden', '.', 'Mary', 'went', 'to', 'the', 'bedroom', '.', 'Mary', 'moved', 'to', 'the', 'office', '.', 'Sandra', 'went', 'to', 'the', 'kitchen', '.', 'Mary', 'moved', 'to', 'the', 'hallway', '.', 'John', 'journeyed', 'to', 'the', 'bathroom', '.', 'SOQ', 'Where', 'is', 'Mary', '?'], ['hallway'])


In [108]:
def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence]

def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

def tensorsFromPair(lang, pair):
    input_tensor = tensorFromSentence(lang, pair[0])
    target_tensor = tensorFromSentence(lang, pair[1])
    return (input_tensor, target_tensor)

In [134]:
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size, embed_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embed_size  = embed_size

        self.embedding = nn.Embedding(input_size, embed_size)
        self.gru = nn.GRU(embed_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)
    
class DecoderRNN(nn.Module):
    def __init__(self, output_size, hidden_size, embed_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embed_size = embed_size
        self.output_size = output_size

        self.embedding = nn.Embedding(output_size, embed_size)
        self.gru = nn.GRU(embed_size, hidden_size)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output, hidden = self.gru(output, hidden)
        output = self.softmax(self.out(output[0]))
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [110]:
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
#         self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
#         self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

#         attn_weights = F.softmax(
#             self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
#         attn_applied = torch.bmm(attn_weights.unsqueeze(0),
#                                  encoder_outputs.unsqueeze(0))

        output = torch.cat((embedded[0], attn_applied[0]), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)


NameError: name 'MAX_LENGTH' is not defined

In [111]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
       
    def forward(self, source, target, teacher_forcing_ratio=0.5):

        input_length = source.size(0) #get the input length (number of words in sentence)
        batch_size = target.shape[1] 
        target_length = target.shape[0]
        vocab_size = self.decoder.output_size
        outputs = torch.zeros(target_length, batch_size, vocab_size).to(self.device)

        for i in range(input_length):
            encoder_output, encoder_hidden = self.encoder(source[i], self.encoder.initHidden())

        decoder_hidden = encoder_hidden.to(device)
  
        decoder_input = torch.tensor([SOS_token], device=device)  # SOS

        for t in range(target_length):   
            decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden)
            outputs[t] = decoder_output
            teacher_force = random.random() < teacher_forcing_ratio
            topv, topi = decoder_output.topk(1)
            input = (target[t] if teacher_force else topi)
            if(teacher_force == False and input.item() == EOS_token):
                break

        return outputs

In [167]:
teacher_forcing_ratio = 0.5

def clacModel(model, input_tensor, target_tensor, model_optimizer, criterion):
    model_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    loss = 0
    epoch_loss = 0
    output = model(input_tensor, target_tensor)
    num_iter = output.size(0)


#calculate the loss from a predicted sentence with the expected result
    for ot in range(num_iter):
        loss += criterion(output[ot], target_tensor[ot])

    loss.backward()
    model_optimizer.step()
    epoch_loss = loss.item() / num_iter

    return epoch_loss

def trainModel(model, lang, pairs, num_iteration=20000):
    model.train()

    optimizer = optim.SGD(model.parameters(), lr=0.01)
    criterion = nn.NLLLoss()
    total_loss_iterations = 0

    training_pairs = [tensorsFromPair(lang, random.choice(pairs))
                     for i in range(num_iteration)]
    print(training_pairs[0])
  
    for itera in range(1, num_iteration+1):
        training_pair = training_pairs[itera - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]

        loss = clacModel(model, input_tensor, target_tensor, optimizer, criterion)

        total_loss_iterations += loss

        if itera % 100 == 0:
            avarage_loss= total_loss_iterations / 100
            total_loss_iterations = 0
            print(f"Iter: {itera}, loss: {avarage_loss}")
          
    return model

In [168]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

embed_size = 12
hidden_size = 12
num_layers = 1
num_iteration = 4000
input_size = lang.n_words
output_size = lang.n_words

#create encoder-decoder model
encoder = EncoderRNN(input_size, hidden_size, embed_size)
decoder = DecoderRNN(output_size, hidden_size, embed_size)

model = Seq2Seq(encoder, decoder, device).to(device)

#print model 
print(encoder)
print(decoder)
model = trainModel(model, lang, pairs, num_iteration)

EncoderRNN(
  (embedding): Embedding(23, 12)
  (gru): GRU(12, 12)
)
DecoderRNN(
  (embedding): Embedding(23, 12)
  (gru): GRU(12, 12)
  (out): Linear(in_features=12, out_features=23, bias=True)
  (softmax): LogSoftmax()
)
(tensor([[14],
        [21],
        [ 4],
        [ 5],
        [20],
        [ 7],
        [19],
        [21],
        [ 4],
        [ 5],
        [ 8],
        [ 7],
        [ 2],
        [21],
        [ 4],
        [ 5],
        [18],
        [ 7],
        [13],
        [17],
        [ 4],
        [ 5],
        [ 6],
        [ 7],
        [13],
        [21],
        [ 4],
        [ 5],
        [20],
        [ 7],
        [ 2],
        [17],
        [ 4],
        [ 5],
        [ 8],
        [ 7],
        [19],
        [ 3],
        [ 4],
        [ 5],
        [20],
        [ 7],
        [13],
        [21],
        [ 4],
        [ 5],
        [16],
        [ 7],
        [ 9],
        [10],
        [11],
        [ 2],
        [12],
        [ 1]], device='cuda:0'), te

In [144]:
lang.index2word[1]

'EOS'

In [128]:
def evaluate(model, lang, sentences):
    with torch.no_grad():
        input_tensor = tensorFromSentence(lang, sentences[0])
        output_tensor = tensorFromSentence(lang, sentences[1])
  
        decoded_words = []
  
        output = model(input_tensor, output_tensor)
       
        for ot in range(output.size(0)):
            topv, topi = output[ot].topk(1)
            # print(topi)

            if topi[0].item() == EOS_token:
                decoded_words.append('<EOS>')
                break
            else:
                decoded_words.append(lang.index2word[topi[0].item()])
    return decoded_words

def evaluateRandomly(model, lang, pairs, n=10):
    for i in range(n):
        pair = random.choice(pairs)
#         print('source {}'.format(pair[0]))
#         print('target {}'.format(pair[1]))
        output_words = evaluate(model, lang, pair)
        output_sentence = ' '.join(output_words)
        print('predicted {}'.format(output_sentence))

In [158]:
#evaluateRandomly(model, lang, pairs)
pairs[0]

(['John',
  'moved',
  'to',
  'the',
  'garden',
  '.',
  'John',
  'moved',
  'to',
  'the',
  'bedroom',
  '.',
  'SOQ',
  'Where',
  'is',
  'John',
  '?'],
 ['bedroom'])

In [161]:
tensorsFromPair(lang, pairs[0])
#lang.word2index['SOQ']

(tensor([[ 2],
         [ 3],
         [ 4],
         [ 5],
         [ 6],
         [ 7],
         [ 2],
         [ 3],
         [ 4],
         [ 5],
         [ 8],
         [ 7],
         [ 9],
         [10],
         [11],
         [ 2],
         [12],
         [ 1]], device='cuda:0'),
 tensor([[8],
         [1]], device='cuda:0'))