In [1]:
import numpy as np

In [2]:
np.random.seed(42)

Download the data from http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2.tar.gz

Parse bAbI stories

In [3]:
def parse_stories(lines):
    
    stories = []
    questions = []
    answers = []
    
    story = ''
    for line in lines:
        line = line.decode('utf-8').strip()
        #Get line number and rest of the line
        nid, line = line.split(' ', 1)
        nid = int(nid)
        if nid == 1:
            #Start a new story
            story = ''
        if '\t' in line:
            #End of the story
            q, a, supporting = line.split('\t')
            stories.append(story)
            questions.append(q)
            answers.append(a)            
        else:
            if (story == ''):
                story = line
            else:
                story += ' ' + line
    return stories, questions, answers

In [4]:
import tarfile

Checking the content of the file

In [5]:
#with tarfile.open(r) as tar:
with tarfile.open('C:/Users/ohm/Downloads/Deep Learning/tasks_1-20_v1-2.tar.gz') as tar:
    #f = tar.extractfile('tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_test.txt')
    f = tar.extractfile('tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_test.txt')
    print(f.readlines())

[b'1 John travelled to the hallway.\n', b'2 Mary journeyed to the bathroom.\n', b'3 Where is John? \thallway\t1\n', b'4 Daniel went back to the bathroom.\n', b'5 John moved to the bedroom.\n', b'6 Where is Mary? \tbathroom\t2\n', b'7 John went to the hallway.\n', b'8 Sandra journeyed to the kitchen.\n', b'9 Where is Sandra? \tkitchen\t8\n', b'10 Sandra travelled to the hallway.\n', b'11 John went to the garden.\n', b'12 Where is Sandra? \thallway\t10\n', b'13 Sandra went back to the bathroom.\n', b'14 Sandra moved to the kitchen.\n', b'15 Where is Sandra? \tkitchen\t14\n', b'1 Sandra travelled to the kitchen.\n', b'2 Sandra travelled to the hallway.\n', b'3 Where is Sandra? \thallway\t2\n', b'4 Mary went to the bathroom.\n', b'5 Sandra moved to the garden.\n', b'6 Where is Sandra? \tgarden\t5\n', b'7 Sandra travelled to the office.\n', b'8 Daniel journeyed to the hallway.\n', b'9 Where is Daniel? \thallway\t8\n', b'10 Daniel journeyed to the office.\n', b'11 John moved to the hallway.\

Extract the train and test files

In [6]:
with tarfile.open('C:/Users/ohm/Downloads/Deep Learning/tasks_1-20_v1-2.tar.gz') as tar:
    train_stories_txt, train_q_txt, train_a_txt  = parse_stories(tar.extractfile('tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_train.txt'))
    test_stories_txt, test_q_txt, test_a_txt = parse_stories(tar.extractfile('tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_test.txt'))

In [7]:
#tar.getnames()

In [8]:
train_stories_txt[0]

'Mary moved to the bathroom. John went to the hallway.'

In [9]:
train_q_txt[0]

'Where is Mary? '

In [10]:
train_a_txt[0]

'bathroom'

# Build Tokenizer

In [11]:
from tensorflow.python.keras.preprocessing.text import Tokenizer

  from ._conv import register_converters as _register_converters


In [12]:
t = Tokenizer()

In [13]:
#Fit on training data
t.fit_on_texts(train_stories_txt)
t.fit_on_texts(train_q_txt)
t.fit_on_texts(train_a_txt)

In [14]:
#Fit on test data
t.fit_on_texts(test_stories_txt)
t.fit_on_texts(test_q_txt)
t.fit_on_texts(test_a_txt)

In [15]:
vocab_size =  len(t.word_index) + 1 #Tokenizer starts with index 1

In [16]:
vocab_size

20

In [17]:
train_stories_seq = t.texts_to_sequences(train_stories_txt)
train_q_seq = t.texts_to_sequences(train_q_txt)
train_a_seq = t.texts_to_sequences(train_a_txt)

In [18]:
test_stories_seq = t.texts_to_sequences(test_stories_txt)
test_q_seq = t.texts_to_sequences(test_q_txt)
test_a_seq = t.texts_to_sequences(test_a_txt)

In [19]:
story_maxlen = max([len(txt) for txt in train_stories_seq + test_stories_seq])

In [20]:
question_maxlen = max([len(txt) for txt in train_q_seq + test_q_seq])

In [21]:
answer_maxlen = max([len(txt) for txt in train_a_seq + test_a_seq])

In [22]:
story_maxlen

58

In [23]:
question_maxlen

3

In [24]:
answer_maxlen

1

Pad the sequences

In [25]:
from tensorflow.python.keras.preprocessing.sequence import pad_sequences

In [26]:
train_stories_seq = pad_sequences(train_stories_seq,maxlen=story_maxlen)
train_q_seq = pad_sequences(train_q_seq,maxlen=question_maxlen)
train_a_seq = pad_sequences(train_a_seq,maxlen=answer_maxlen)

In [27]:
test_stories_seq = pad_sequences(test_stories_seq,maxlen=story_maxlen)
test_q_seq = pad_sequences(test_q_seq,maxlen=question_maxlen)
test_a_seq = pad_sequences(test_a_seq,maxlen=answer_maxlen)

int to word converter

In [28]:
int_to_word = dict((i,w) for w, i in t.word_index.items())

In [29]:
int_to_word[11]

'bathroom'

# Define the model layers

In [30]:
from tensorflow.python.keras.models import Sequential, Model

In [31]:
from tensorflow.python.keras.layers import Embedding, Dense, LSTM, Activation, dot, Permute, add, concatenate, Dropout, Input

Define input for story and question

In [32]:
story = Input(shape=(story_maxlen,))

In [33]:
question = Input(shape=(question_maxlen,))

Build 3 encoders to provide 3 Embeddings
1. Input Memory - m_encoder
2. Controller embedding
3. Question embedding

Embedding A for Input memory

In [34]:
m_encoder = Sequential()
m_encoder.add(Embedding(input_dim=vocab_size,output_dim=story_maxlen))
m_encoder.add(Dropout(0.3))
m_embedded_output = m_encoder(story)
#output is batch_size x story_maxlen x story_maxlen (embedding size)

Embedding C for use with Controller

In [35]:
c_encoder = Sequential()
c_encoder.add(Embedding(input_dim=vocab_size, output_dim=question_maxlen))
c_encoder.add(Dropout(0.3))
c_embedded_output = c_encoder(story)
#output is batch_size x story_maxlen x question_maxlen (embedding size)

Embedding B for Question

In [36]:
question_encoder = Sequential()
question_encoder.add(Embedding(input_dim=vocab_size, output_dim=story_maxlen, input_length=question_maxlen))
question_encoder.add(Dropout(0.3))
question_embeddding_output = question_encoder(question)
#output is batch_size x question_maxlen x story_maxlen (embedding size)

Attention

In [37]:
attention_weights = dot([m_embedded_output, question_embeddding_output], axes=(2, 2))
attention_weights = Activation('softmax')(attention_weights)
#output is batch_size x story_maxlen x question_maxlen

Calculate Weighted_sum (here we are using Add function)

In [38]:
weighted_sum = add([attention_weights, c_embedded_output])  
#Output batch_size x story_maxlen x question_maxlen

permuted_weighted_sum = Permute((2, 1))(weighted_sum)  
#Output batch_size x question_maxlen x story_maxlen

Add both permuted_weighted_sum to Question embedding (for first hop)

In [39]:
output_1 = add([permuted_weighted_sum, question_embeddding_output])
#Output batch_size x query_maxlen x story_maxlen

Output using LSTM

In [40]:
answer = LSTM(32)(output_1)
#Last hidden state - batch_size x 32

In [41]:
answer = Dropout(0.3)(answer)

FC Layer to predict answer using SoftMax

In [42]:
answer = Dense(vocab_size)(answer)
answer = Activation('softmax')(answer)
#Output batch_size x vocab_size

# Build the model

In [43]:
model = Model([story, question], answer)

In [44]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy',metrics=['accuracy'])

Train the model

In [45]:
model.fit([train_stories_seq, train_q_seq], train_a_seq,
          batch_size=32,
          epochs=200,
          validation_data=([test_stories_seq, test_q_seq], test_a_seq))

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Train on 10000 samples, validate on 1000 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/20

Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 

<tensorflow.python.keras.callbacks.History at 0xe16a198>

In [46]:
model.save('babi_memn2n_task_1.hd5')

# Model Prediction

In [47]:
test_num = 723

In [48]:
#Get padded story seuqence
story_seq_ex = test_stories_seq[test_num]

#Get padded question sequence
question_seq_ex = test_q_seq[test_num]

#reshape to batch_size 1
story_seq_ex = np.reshape(story_seq_ex,(1,len(story_seq_ex)))
question_seq_ex = np.reshape(question_seq_ex,(1,len(question_seq_ex)))

#Predict
result = model.predict([story_seq_ex, question_seq_ex])

#Get the index with highest probability
result = np.argmax(result)

#Convert index to word
result = int_to_word[result]

In [49]:
print ('Story : \n' + test_stories_txt[test_num])
print ('Question : \n' + test_q_txt[test_num])
print ('Answer : \n' + result)

Story : 
Sandra moved to the bedroom. Daniel journeyed to the garden. Mary moved to the bedroom. Sandra went back to the hallway. John went to the office. Sandra went back to the bathroom. Sandra went back to the kitchen. Sandra travelled to the office.
Question : 
Where is Sandra? 
Answer : 
office
