# Memory Networks

## Overview

![memnn.png](attachment:memnn.png)

## Data

In [1]:
from keras.utils.data_utils import get_file
import tarfile
import re
import numpy as np

Using TensorFlow backend.


### babi tasks

We'll use data and tasks from the bAbI tasks as descriped in the [paper](http://arxiv.org/abs/1502.05698). 

* [github](https://github.com/facebook/bAbI-tasks)

bAbI tasks dataset and challenges

In [2]:
path = get_file('babi-tasks-v1-2.tar.gz', 
                origin='https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz')
tar = tarfile.open(path)

In [81]:
challenges = {
    # QA1 with 10,000 samples
    'single_supporting_fact_10k': 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt',
    # QA2 with 10,000 samples
    'two_supporting_facts_10k': 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt',
    'two_supporting_facts_1k': 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt',
}
challenge_type = 'single_supporting_fact_10k'
# challenge_type = 'two_supporting_facts_10k'
challenge = challenges[challenge_type]

### Parse Stories

In [82]:
def tokenize(sentence):
    # tokenize words
    return [x.strip() for x in re.split('(\W+)', sentence) if x.strip()]

In [83]:
def parse_stories(lines):
    data = []
    story = []
    for line in lines:
        line = line.decode('utf-8').strip()
        nid, line = line.split(' ', 1)
        # new story starts at nid = 1
        if int(nid) == 1: story = []
        # lines with query, answer and supporting sentence contains Tab '\t'
        if '\t' in line:
            q, a, supporting = line.split('\t')
            q = tokenize(q)
            substory = None
            # add nid to each sentece in story
            substory = [[str(i)+':']+x for i,x in enumerate(story) if x]
            data.append((substory, q, a))
            story.append('')
        # 'normal' sentence in story
        else: 
            story.append(tokenize(line))
    return data

In [84]:
def get_stories(f):
    data = parse_stories(f.readlines())
    return [(story, q, answer) for story, q, answer in data]

In [85]:
train_stories = get_stories(tar.extractfile(challenge.format('train')))
test_stories = get_stories(tar.extractfile(challenge.format('test')))

### Upper bounds

Story maximum length and max number of sentences

In [86]:
stories = train_stories + test_stories

In [87]:
story_maxlen = max([len(sen) for story, _, _ in stories for sen in story])
story_maxsens = max([len(story) for story, _, _ in stories])
print(f'Max story length: {story_maxlen} \nMax sentences in story: {story_maxsens}')

Max story length: 8 
Max sentences in story: 10


Query maximum length

In [88]:
query_maxlen = max([len(q) for _, q, _ in stories])
print(f'Max query length: {query_maxlen}')

Max query length: 4


### Vocab

In [89]:
import collections

stories contain sentences, query and answer of words to create vocab.
* sentences has to be flattened

In [90]:
def do_flatten(el):
    return isinstance(el, collections.Iterable) is not isinstance(el, (str, bytes))

In [91]:
def flatten(l):
    for el in l:
        if do_flatten(el): yield from flatten(el)
        else: yield el

In [92]:
vocab = sorted(set(flatten(stories)))

Add `'<PAD>'` to vocab for sentence padding later

In [93]:
vocab.insert(0, '<PAD>')

In [94]:
vocab_size = len(vocab); vocab_size

32

#### Index mapping

In [95]:
word_idx = dict((c, i) for i, c in enumerate(vocab))

## Model

### Vectorize Stories

In [96]:
from keras.preprocessing.sequence import pad_sequences

In [97]:
def vectorize_stories(data, word_idx, story_maxlen, query_maxlen):
    X = []
    Xq = []
    Y = []
    for story, query, answer in data:
        x = [[word_idx[w] for w in s] for s in story]
        xq = [word_idx[w] for w in query]
        y = word_idx[answer]
        X.append(x)
        Xq.append(xq)
        Y.append(y)
    
    X = [pad_sequences(x, maxlen=story_maxlen) for x in X]
    Xq = pad_sequences(Xq, maxlen=query_maxlen)
    Y = np.array(Y)
    
    return (X, Xq, Y)

In [98]:
x_train, q_train, y_train = vectorize_stories(train_stories, word_idx, story_maxlen, query_maxlen)
x_test, q_test, y_test = vectorize_stories(test_stories, word_idx, story_maxlen, query_maxlen)

Pad sentences and number of sentences in each story and stack

In [99]:
def stack_inputs(x):
    for i, sentences in enumerate(x):
        x[i] = np.concatenate([sentences,
                              np.zeros((story_maxsens-sentences.shape[0],story_maxlen),
                              'int')])
    return np.stack(x)

In [100]:
x_train = stack_inputs(x_train)
x_test = stack_inputs(x_test)

In [101]:
x_train.shape, x_test.shape

((10000, 10, 8), (1000, 10, 8))

### Sentence Embeddings (Embedding A)

In [102]:
from keras_tqdm import TQDMNotebookCallback
from keras.layers import TimeDistributed, Embedding, Lambda, Input, Reshape, dot, Activation
import keras.backend as K

In [103]:
emb_dim = 20
parms = {'verbose': 2, 'callbacks': [TQDMNotebookCallback(leave_inner=False)]}

##### Bag-of-words (BoW) representation
Create sentence embeddings by simply adding up word embeddings.

In [104]:
def emb_sent_bow(x):
    # embed each word
    emb = TimeDistributed(Embedding(vocab_size, emb_dim))(x)
    # sum over words axis=2
    return Lambda(lambda x: K.sum(x, 2))(emb)

In [105]:
inp_story = Input((story_maxsens, story_maxlen))
emb_story = emb_sent_bow(inp_story)
inp_story.shape, emb_story.shape

(TensorShape([Dimension(None), Dimension(10), Dimension(8)]),
 TensorShape([Dimension(None), Dimension(10), Dimension(20)]))

### Queries Embeddings (Embedding B)

In [106]:
inp_query = Input((query_maxlen,))
emb_q = Embedding(vocab_size, emb_dim)(inp_query)
emb_q = Lambda(lambda q: K.sum(q, 1))(emb_q)
emb_q = Reshape((1, emb_dim))(emb_q)
inp_query.shape, emb_q.shape

(TensorShape([Dimension(None), Dimension(4)]),
 TensorShape([Dimension(None), Dimension(1), Dimension(20)]))

### Memory Network

##### Inner Product
For each story, take dot product of every sentence embedding with story's query embedding.
* Result is a list of numbers proportional to similar each sentence is with the query

In [107]:
x = dot([emb_story, emb_q], axes=2)

#### Softmax (weights)
Pass through softmax activation to get probabilities sentence and query similarity

In [108]:
x = Reshape((story_maxsens,))(x)
x = Activation('softmax')(x)
weights = Reshape((story_maxsens, 1))(x)
weights.shape

TensorShape([Dimension(None), Dimension(10), Dimension(1)])

### Response Embeddings and Network (Embedding C)

In [109]:
from keras.layers import Dense

In [110]:
emb_c = emb_sent_bow(inp_story)

Take weighted average of embeddings, using the softmax output from the Memory Network as weights

In [111]:
x = dot([weights, emb_c], axes=1)

Pass weighted average through dense layer and classify with softmax

In [112]:
response = Reshape((emb_dim,))(x)
response = Dense(vocab_size, activation='softmax')(response)

### Train

In [113]:
from keras.models import Model
from keras.optimizers import RMSprop

In [114]:
answer = Model([inp_story, inp_query], response)

In [115]:
answer.compile(optimizer=RMSprop(lr=1e-2),
               loss='sparse_categorical_crossentropy',
               metrics=['accuracy'])

In [116]:
hist = answer.fit([x_train, q_train],
                  y_train,
                  **parms,
                  epochs=8,
                  batch_size=32,
                  validation_data=([x_test, q_test], y_test))

Train on 10000 samples, validate on 1000 samples


Epoch 1/8
0s - loss: 0.4212 - acc: 0.8560 - val_loss: 1.2537e-04 - val_acc: 1.0000


Epoch 2/8
0s - loss: 0.0049 - acc: 0.9987 - val_loss: 7.5642e-06 - val_acc: 1.0000


Epoch 3/8
0s - loss: 0.0158 - acc: 0.9973 - val_loss: 4.0574e-05 - val_acc: 1.0000


Epoch 4/8
0s - loss: 0.0015 - acc: 0.9993 - val_loss: 3.3577e-04 - val_acc: 1.0000


Epoch 5/8
0s - loss: 0.0093 - acc: 0.9989 - val_loss: 3.2679e-06 - val_acc: 1.0000


Epoch 6/8
0s - loss: 0.0044 - acc: 0.9989 - val_loss: 0.0232 - val_acc: 0.9940


Epoch 7/8
0s - loss: 0.0082 - acc: 0.9985 - val_loss: 9.8609e-05 - val_acc: 1.0000


Epoch 8/8
0s - loss: 3.4484e-06 - acc: 1.0000 - val_loss: 3.1739e-06 - val_acc: 1.0000

