In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import torchvision
from collections import defaultdict
import pickle
from torch.autograd import Variable
import torch.optim as optim
import sys
%matplotlib inline

In [None]:
train_foldername = 'en-valid-10k'
train_filename = 'qa1_train.txt'
train_fname = './bAbI_Data/'+str(train_foldername)+'/'+str(train_filename)

valid_foldername = 'en-valid-10k'
valid_filename = 'qa1_valid.txt'
valid_fname = './bAbI_Data/'+str(valid_foldername)+'/'+str(valid_filename)

In [None]:
train_dat_aux = []
valid_dat_aux = []
punctuations = ['.',',','?']

for l in open(train_fname):
    temp = ''.join(ch for ch in l if ch not in punctuations)
    train_dat_aux.append(temp.strip().split())
    
for l in open(valid_fname):
    temp = ''.join(ch for ch in l if ch not in punctuations)
    valid_dat_aux.append(temp.strip().split())

print(len(train_dat_aux))
print(len(valid_dat_aux))

In [None]:
def hasDigits(input_str):
    return any(char.isdigit() for char in input_str)

In [None]:
def create_vocab(data,unk_thres=0):
    aux = defaultdict(int)
    for i in range(len(data)):
        for j in range(1,len(data[i])):
            if hasDigits(data[i][j]):
                break
            aux[data[i][j]] += 1
    vocab = []
    unk_list = []
    for i in aux:
        if aux[i] < unk_thres:
            if not unk_list:
                vocab.append('UNK')
            unk_list.append(i)
        else:
            vocab.append(i)
    
    return vocab, unk_list

In [None]:
def create_dictionaries(vocab):
    word2idx = defaultdict(int)
    idx2word = defaultdict(int)
    k = 0
    for i in range(len(vocab)):
        word2idx[vocab[i]] = k
        idx2word[k] = vocab[i]
        k += 1
    
    with open('variables/word2idx','wb') as handle:
        pickle.dump(word2idx,handle,protocol=pickle.HIGHEST_PROTOCOL)
    
    with open('variables/idx2word','wb') as handle:
        pickle.dump(idx2word,handle,protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
vocab, unk_list = create_vocab(train_dat_aux,0)
#create_dictionaries(vocab)

In [None]:
with open('variables/word2idx','rb') as handle:
    word2idx = pickle.load(handle)

with open('variables/idx2word','rb') as handle:
    idx2word = pickle.load(handle)

In [None]:
def transform_data_BOW(data,vocab,unk_list,word2idx):
    N = len(vocab)
    dat_trans = np.zeros((len(data),N+1))
    for i in range(len(data)):
        if hasDigits(data[i][-1]):
            dat_trans[i,N] = word2idx[data[i][-2]]
            for j in range(1,len(data[i])-2):
                if data[i][j] in unk_list:
                    dat_trans[i,word2idx['UNK']] += 1
                else:
                    dat_trans[i,word2idx[data[i][j]]] += 1
        else:
            if data[i][0] == '1':
                dat_trans[i,N] = -2
            else:
                dat_trans[i,N] = -1
            for j in range(1,len(data[i])):
                #print(data[i][j],unk_list)
                if data[i][j] in unk_list:
                    dat_trans[i,word2idx['UNK']] += 1
                else:
                    dat_trans[i,word2idx[data[i][j]]] += 1
    
    return dat_trans

In [None]:
def transform_data_PE(data,vocab,unk_list,word2idx):
    N = len(vocab)
    dat_trans = []
    for i in range(len(data)):
        J = len(data[i])
        if hasDigits(data[i][-1]):
            aux = np.zeros((1,J-2))
            aux[0,-1] = word2idx[data[i][-2]]
            for j in range(1,J-2):
                if data[i][j] in unk_list:
                    aux[0,j-1] = word2idx['UNK']
                else:
                    aux[0,j-1] = word2idx[data[i][j]]
        else:
            aux = np.zeros((1,J))
            if data[i][0] == '1':
                aux[0,-1] = -2
            else:
                aux[0,-1] = -1
            for j in range(1,J):
                if data[i][j] in unk_list:
                    aux[0,j-1] = word2idx['UNK']
                else:
                    aux[0,j-1] = word2idx[data[i][j]]
        dat_trans.append(aux)
    return dat_trans

In [None]:
train_data_BOW = transform_data_BOW(train_dat_aux,vocab,unk_list,word2idx)
valid_data_BOW = transform_data_BOW(valid_dat_aux,vocab,unk_list,word2idx)

In [None]:
train_data_PE = transform_data_PE(train_dat_aux,vocab,unk_list,word2idx)
valid_data_PE = transform_data_PE(valid_dat_aux,vocab,unk_list,word2idx)

In [None]:
def smax(x):
    y = torch.div(torch.exp(x),torch.sum(torch.exp(x)))
    return y

In [None]:
def comp(out,target):
    if (target.data[0] == np.argmax(smax(out.data))):
        return 1
    else:
        return 0

In [None]:
class QuesAnsModel(torch.nn.Module):
    def __init__(self,embedding_dim, vocab_size, num_hops = 1, max_mem_size=15,temporal=False):
        super(QuesAnsModel,self).__init__()
        self.max_mem_size = max_mem_size
        self.vocab_size = vocab_size
        self.num_hops = num_hops
        self.embedding_dim = embedding_dim
        self.memory = self.init_memory()
        self.current_mem_size = 0
        self.temporal = temporal
        self.embedding_A = torch.nn.Linear(self.vocab_size,self.embedding_dim,bias=False)
        self.embedding_B = torch.nn.Linear(self.vocab_size,self.embedding_dim,bias=False)
        self.embedding_C = torch.nn.Linear(self.vocab_size,self.embedding_dim,bias=False)
        self.W = torch.nn.Linear(self.embedding_dim,self.vocab_size,bias=False)
        
        self.temporal_A = torch.nn.Parameter(torch.randn(self.max_mem_size,self.embedding_dim).float())
        self.temporal_C = torch.nn.Parameter(torch.randn(self.max_mem_size,self.embedding_dim).float())
        
        torch.nn.init.xavier_normal(self.embedding_A.weight)
        torch.nn.init.xavier_normal(self.embedding_B.weight)
        torch.nn.init.xavier_normal(self.embedding_C.weight)
        torch.nn.init.xavier_normal(self.W.weight)
        self.softmax = torch.nn.Softmax(dim=0)
    
    def init_memory(self):
        aux = torch.zeros((self.max_mem_size, self.vocab_size)).float()
#         for i in range(aux.shape[0]):
#             for j in range(aux.shape[1]):
#                 aux[i,j] = -10000000000
        return Variable(aux,requires_grad=False)

    def forward(self, seq, seq_pe, tag, pe=1):
        if tag in ['s','f']:
            if self.current_mem_size < self.max_mem_size:
                self.memory[self.current_mem_size] = Variable(torch.from_numpy(seq).float()).view(1,-1)
                self.current_mem_size += 1
            else:
#                 aux1 = self.memory.data[1:,:].numpy()
#                 aux1 = np.vstack((aux1,seq))
#                 self.memory = Variable(torch.from_numpy(aux1).float())
                del self.memory
                self.memory = self.init_memory()
                self.current_mem_size = 1
                self.memory[0] = Variable(torch.from_numpy(seq).float()).view(1,-1)
            return True
        else:
            self.question = Variable(torch.from_numpy(seq).float()).view(1,-1)
            if self.temporal == True:
#                 temp_mem = np.flipud(np.array(self.memory.data))
#                 self.memory = Variable(torch.from_numpy(temp_mem.copy())).float()
                ques_d = self.embedding_B(self.question)
                current_A = self.embedding_A(self.memory) + self.temporal_A
                current_C = self.embedding_C(self.memory) + self.temporal_C
            else:
                self.question = Variable(torch.from_numpy(seq).float()).view(1,-1)
    #             self.question = Variable(torch.from_numpy(seq).float().cuda()).view(1,-1)
                ques_d = self.embedding_B(self.question)
                current_A = self.embedding_A(self.memory)
                current_C = self.embedding_C(self.memory)
            for i in range(self.num_hops):
                P = self.softmax(torch.mm(ques_d, current_A.t()).t())
                o = torch.mm(P.t(),current_C) + ques_d
                ques_d = o
            output = self.W(o)
            return output

In [None]:
def train(model,tr_dt_bow,vd_dt_bow,tr_dt_pe,vd_dt_pe,epochs=10,eta=0.0001,pe=1):
    optimizer = optim.Adam(model.parameters(),lr=eta)
    loss = torch.nn.CrossEntropyLoss()
    
    tr_shape = tr_dt_bow.shape
    vd_shape = vd_dt_bow.shape
    eps = []
    l_tr = []
    l_vd = []
    accuracy_tr = []
    accuracy_vd = []
    
    for epoch in range(epochs):
        count=0;
        ################################# Training
        n_corr = 0;
        for i in range(tr_shape[0]):
            l_temp = 0
            tag = 'q'
            if(tr_dt_bow[i,-1]==-1):
                tag = 's'
                model(tr_dt_bow[i,:-1],tr_dt_pe[i][0,:-1],tag,pe)
            elif(tr_dt_bow[i,-1]==-2):
                tag = 'f'
                model(tr_dt_bow[i,:-1],tr_dt_pe[i][0,:-1],tag,pe)
            else:
                count+=1
                out = model(tr_dt_bow[i,:-1],tr_dt_pe[i][0,:-1],tag,pe)
                target = Variable(torch.from_numpy(np.array([tr_dt_bow[i,-1]])).type(torch.LongTensor))
#                 target = Variable(torch.from_numpy(np.array([tr_dt_bow[i,-1]])).type(torch.LongTensor).cuda())
                optimizer.zero_grad()
                loss_tr = loss(out,target)
                loss_tr.backward(retain_graph=True)
                optimizer.step()
                l_temp += loss_tr.data[0]
                n_corr += comp(out,target)
        acc_tr = n_corr/count*100
        l_tr.append(l_temp)
        accuracy_tr.append(acc_tr)
#         print(model.embedding_B.weight[0:2,2:9])
        
        ############################# Validation
        n_corr = 0;
        count = 0;
        for i in range(vd_shape[0]):
            l_temp = 0
            tag = 'q'
            if(vd_dt_bow[i,-1]==-1):
                tag = 's'
                model(vd_dt_bow[i,:-1],vd_dt_pe[i][0,:-1],tag,pe)
            elif(vd_dt_bow[i,-1]==-2):
                tag = 'f'
                model(vd_dt_bow[i,:-1],vd_dt_pe[i][0,:-1],tag,pe)
            else:
                count+=1
                out = model(vd_dt_bow[i,:-1],vd_dt_pe[i][0,:-1],tag,pe)
                target = Variable(torch.from_numpy(np.array([vd_dt_bow[i,-1]])).type(torch.LongTensor))
#                 target = Variable(torch.from_numpy(np.array([vd_dt_bow[i,-1]])).type(torch.LongTensor).cuda())
                optimizer.zero_grad()
                loss_vd = loss(out,target)
                l_temp += loss_vd.data[0]
                n_corr += comp(out,target)
        acc_vd = n_corr/count*100
        l_vd.append(l_temp)
        accuracy_vd.append(acc_vd)
        
        eps.append(epoch)
        print(epoch,'Training Loss : ',l_tr[-1],' , Training Acc : ',accuracy_tr[-1])
        print(epoch,'Validation Loss : ',l_vd[-1],' , Validation Acc : ',accuracy_vd[-1])
        
    plt.plot(eps,l_tr)
    plt.plot(eps,l_vd)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend(['Training Loss','Validation Loss'])
    plt.savefig('Loss1.png')
    plt.show()

    plt.plot(eps,accuracy_tr)
    plt.plot(eps,accuracy_vd)
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy%')
    plt.legend(['Training Accuracy','Validation Accuracy'],loc=4)
    plt.savefig('Acc1.png')
    plt.show()
    return l_tr, accuracy_tr, l_vd, accuracy_vd

In [None]:
def test(model,test_dt,test_dt_pe,pe=1):
    test_shape = test_dt.shape
    n_corr = 0;
    count = 0;
    for i in range(test_shape[0]):
        l_temp = 0
        tag = 'q'
        if(test_dt[i,-1]==-1):
            tag = 's'
            model(test_dt[i,:-1],test_dt_pe[i][0,:-1],tag,pe)
        elif(test_dt[i,-1]==-2):
            tag = 'f'
            model(test_dt[i,:-1],test_dt_pe[i][0,:-1],tag,pe)
        else:
            count+=1
            out = model(test_dt[i,:-1],test_dt_pe[i][0,:-1],tag,pe)
            target = Variable(torch.from_numpy(np.array([test_dt[i,-1]])).type(torch.LongTensor))
#                 target = Variable(torch.from_numpy(np.array([vd_dt_bow[i,-1]])).type(torch.LongTensor).cuda())
            n_corr += comp(out,target)
    accuracy = n_corr/count*100
    print(accuracy)
    return accuracy

In [None]:
embedding_dim = 30
vocab_size = len(vocab)
num_hops = 1
max_mem_size = 10
epochs = 50
model = QuesAnsModel(embedding_dim, vocab_size, num_hops = num_hops, max_mem_size = max_mem_size, temporal=True)
train(model, train_data_BOW, valid_data_BOW,train_data_PE,valid_data_PE, epochs=epochs)

In [None]:
test_foldername = 'en-valid-10k'
test_filename = 'qa1_test.txt'
test_fname = './bAbI_Data/'+str(test_foldername)+'/'+str(test_filename)
test_dat_aux = []
for l in open(test_fname):
    temp = ''.join(ch for ch in l if ch not in punctuations)
    test_dat_aux.append(temp.strip().split())
print(len(test_dat_aux))
test_data_BOW = transform_data_BOW(test_dat_aux,vocab,unk_list,word2idx)
test_data_PE = transform_data_PE(test_dat_aux,vocab,unk_list,word2idx)

In [None]:
acc = test(model,test_data_BOW,test_data_PE)