In [214]:
'''
Prepares the babi-memnn network.
'''
from __future__ import print_function
from keras.models import Sequential
from keras.layers import *
from keras.layers import Input
from keras.optimizers import RMSprop
from keras.utils.data_utils import get_file
import keras.backend as K
import numpy as np
import itertools
from keras.models import *
import random
import sys
from glob import glob
import re
from utils import *
import tqdm
import os
from keras_tqdm import TQDMNotebookCallback
np.set_printoptions(precision=4, threshold=20)
cfg = K.tf.ConfigProto(gpu_options={'allow_growth': True})
K.set_session(K.tf.Session(config=cfg))

### Prepare Dataset

In [215]:
class Corpus:
    def __init__(self):
        self.word_idx_dict = {}
        self.uniq_word_cnt = 0
    
    def update_vocab(self, tokens):
        for word in tokens:
            if word not in self.word_idx_dict:
                self.word_idx_dict[word] = self.uniq_word_cnt
                self.uniq_word_cnt += 1

    def words_idx(self, tokens):
        return [self.word_idx_dict[token] for token in tokens]
    
    def tokenize(self, sent):
        return [x.strip() for x in re.split('(\W+)?', sent) if x.strip()]

    
class Examples:
    
    def __init__(self, c):
        self.data = []
        self.c = c
        
    def add(self, example_lines):
        memories = []
        memories_txt = []
        qa = []
        for eg_line in example_lines:
            if "\t" not in eg_line: #normal memory
                eg_line = c.tokenize(eg_line)
                c.update_vocab(eg_line)
                mem_id, memory = eg_line[0], c.words_idx(eg_line[1:])
                memories.append(c.words_idx(eg_line))
                memories_txt.append(eg_line)
            else: #question line
                ques, ans, hints = eg_line.split("\t")
                ques = c.tokenize(ques)[1:]
                c.update_vocab(ques)
                ans = c.tokenize(ans)
                c.update_vocab(ans)
                self.data.append(([m for m in memories],
                                  c.words_idx(ques), c.words_idx(ans), [m for m in memories_txt]))

In [216]:
from keras.preprocessing.sequence import pad_sequences
from keras.utils.np_utils import *
def get_dataset_from_stories(examples, max_memory_len, max_num_memories, max_ques_len, vocab_size):
    m, q, a = [], [], []
    for (memories, ques, ans, v) in examples:
        memories= pad_sequences(memories, maxlen=max_memory_len)
        memories = np.concatenate([memories, np.zeros((max_num_memories - memories.shape[0],
                                                       max_memory_len), 'int') ])
        m.append(memories)
        q.append(ques)
        ans_vec = np.zeros((vocab_size))
        ans_vec[ans] = 1
        a.append(ans_vec)
    return np.array(m), pad_sequences(q, maxlen=max_ques_len), np.array(a)


In [217]:
data = "data/tasks_1-20_v1-2/en-10k/"

c = Corpus()
word_id = {}
train_files = glob(data + "/qa2_two-supporting-facts_train.txt")
test_files = glob(data + "/qa2_two-supporting-facts_test.txt")
#train_files = glob(data + "/qa1_single-supporting-fact_train.txt")
#test_files = glob(data + "/qa1_single-supporting-fact_test.txt")

word_idx = {}
uniq_word_cnt = 0

"Processing train files"
def process_files(file_paths, c):
    examples = Examples(c)
    for file_path in file_paths:
        print("Reading {0}".format(file_path))
        with open(file_path, "r") as f:
            eg_lines = [next(f).strip()]
            for line in f:
                if int(line.split(" ", 1)[0]) == 1: #new story starts
                    examples.add(eg_lines)
                    eg_lines = [line.strip()]
                else:
                    eg_lines.append(line.strip())
            if len(eg_lines) > 0:
                examples.add(eg_lines)
    return examples.data

print("Processing training files")
train_examples = process_files(train_files, c)
print("Processing test files")
test_examples = process_files(test_files, c)
all_examples = train_examples + test_examples
max_num_memories = max([len(memories[0]) for memories in all_examples])
len(train_examples), len(test_examples), c.uniq_word_cnt, max_num_memories

Processing training files
Reading data/tasks_1-20_v1-2/en-10k//qa2_two-supporting-facts_train.txt


  return _compile(pattern, flags).split(string, maxsplit)


Processing test files
Reading data/tasks_1-20_v1-2/en-10k//qa2_two-supporting-facts_test.txt


(10000, 1000, 123, 88)

In [218]:
train_examples[1400]

([[0, 32, 44, 3, 4, 27, 6],
  [7, 8, 58, 4, 36, 14, 6],
  [11, 8, 40, 41, 4, 42, 14, 6],
  [15, 16, 17, 3, 4, 22, 6],
  [19, 16, 17, 20, 3, 4, 10, 6],
  [21, 1, 9, 3, 4, 30, 6],
  [57, 8, 17, 3, 4, 27, 6],
  [26, 8, 60, 4, 36, 14, 6]],
 [23, 24, 4, 36, 25],
 [27],
 [['1', 'Daniel', 'travelled', 'to', 'the', 'office', '.'],
  ['2', 'Sandra', 'grabbed', 'the', 'milk', 'there', '.'],
  ['3', 'Sandra', 'picked', 'up', 'the', 'apple', 'there', '.'],
  ['4', 'John', 'went', 'to', 'the', 'garden', '.'],
  ['5', 'John', 'went', 'back', 'to', 'the', 'bedroom', '.'],
  ['6', 'Mary', 'journeyed', 'to', 'the', 'hallway', '.'],
  ['7', 'Sandra', 'went', 'to', 'the', 'office', '.'],
  ['8', 'Sandra', 'discarded', 'the', 'milk', 'there', '.']])

In [219]:
min([len(m[3]) for m in all_examples])

2

In [220]:
max_num_memories = max([len(example[0]) for example in all_examples])
max_memory_len = max([len(memory) for example in all_examples for memory in example[0]])
max_ques_len = max([len(example[1]) for example in all_examples])
vocab_size = c.uniq_word_cnt
len(train_examples), len(test_examples), c.uniq_word_cnt, max_num_memories, max_memory_len, max_ques_len

(10000, 1000, 123, 88, 8, 5)

In [221]:
m_train, q_train, a_train = get_dataset_from_stories(train_examples,
                                                     max_memory_len,
                                                     max_num_memories,
                                                     max_ques_len,
                                                     vocab_size=c.uniq_word_cnt)

m_test, q_test, a_test = get_dataset_from_stories(test_examples,
                                                     max_memory_len,
                                                     max_num_memories,
                                                     max_ques_len,
                                                     vocab_size=c.uniq_word_cnt)

In [222]:
m_test[2]

array([[ 0,  0,  1, ..., 36, 14,  6],
       [ 0,  7, 16, ...,  4, 10,  6],
       [11,  8, 17, ...,  4, 18,  6],
       ..., 
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]])

In [223]:
print(m_train.shape)
print(q_train.shape)
print(a_train.shape)
print(m_test.shape)
print(q_test.shape)
print(a_test.shape)

(10000, 88, 8)
(10000, 5)
(10000, 123)
(1000, 88, 8)
(1000, 5)
(1000, 123)


In [224]:
m_train[3] 

array([[ 0,  0,  1, ...,  4,  5,  6],
       [ 0,  7,  8, ...,  4, 10,  6],
       [ 0, 11,  1, ..., 13, 14,  6],
       ..., 
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0],
       [ 0,  0,  0, ...,  0,  0,  0]])

### m_i

In [225]:
mem_in = Input(shape=(max_num_memories, max_memory_len))
x = TimeDistributed(Embedding(input_dim=vocab_size, output_dim=n_hidden))(mem_in)
m_i = Lambda(lambda xx: K.sum(xx, 2))(x)
mem_in.shape, m_i.shape

(TensorShape([Dimension(None), Dimension(88), Dimension(8)]),
 TensorShape([Dimension(None), Dimension(88), Dimension(30)]))

### u

In [226]:
query_input = Input(shape=(max_ques_len,))
#u = Reshape(target_shape=(1, max_ques_len))(query_input)
#u = TimeDistributed(Embedding(input_dim=vocab_size, output_dim=n_hidden))(u)
u = Embedding(input_dim=vocab_size, output_dim=n_hidden)(query_input)
print(u.shape)
u = Lambda(lambda x : K.sum(x, 1))(u)
u = Reshape(target_shape=(1, n_hidden))(u)
#u = Lambda(lambda x : K.sum(x, 2))(u)
query_input.shape, u.shape

(?, 5, 30)


(TensorShape([Dimension(None), Dimension(5)]),
 TensorShape([Dimension(None), Dimension(1), Dimension(30)]))

### p

In [227]:
p = dot([m_i, u], axes=2)
p = Reshape((max_num_memories,))(p)
print(p.shape)
p = Activation(activation='softmax')(p)
p = Reshape((max_num_memories,1))(p)
#p = Lambda(lambda x: K.tile(x, [1, 1, 100]))(p)
p.shape


(?, 88)


TensorShape([Dimension(None), Dimension(88), Dimension(1)])

### c_i

In [228]:
x = TimeDistributed(Embedding(vocab_size, n_hidden))(mem_in)
c_i = Lambda(lambda xx: K.sum(xx, 2))(x)
c_i.shape

TensorShape([Dimension(None), Dimension(88), Dimension(30)])

### o

In [229]:
#o = multiply([c_i, p])
o = dot([c_i, p], axes=1)
o = Reshape(target_shape=(1,n_hidden))(o)
#o = Lambda(lambda oo: K.sum(oo, 1))(o)
o

<tf.Tensor 'reshape_191/Reshape:0' shape=(?, 1, 30) dtype=float32>

### a

In [230]:
a_in = Lambda(lambda ou: sum([ou[0], ou[1]]))([o, u])
print(a_in.shape)
#a_in = Reshape(target_shape=(n_hidden,))(a_in)
op = Reshape(target_shape=(n_hidden,))(a_in)
op = Dense(vocab_size, activation='softmax')(op)
op

(?, 1, 30)


<tf.Tensor 'dense_25/Softmax:0' shape=(?, 123) dtype=float32>

In [231]:
mem_in, query_input, op

(<tf.Tensor 'input_75:0' shape=(?, 88, 8) dtype=float32>,
 <tf.Tensor 'input_76:0' shape=(?, 5) dtype=float32>,
 <tf.Tensor 'dense_25/Softmax:0' shape=(?, 123) dtype=float32>)

In [232]:
parms = {'verbose': 2, 'callbacks': [TQDMNotebookCallback(leave_inner=False)]}

In [233]:
babi_memmn = Model([mem_in, query_input], op)

In [234]:
babi_memmn.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

In [235]:
K.set_value(babi_memmn.optimizer.lr, 1e-2)
babi_memmn.fit([m_train, q_train], a_train, **parms, batch_size=32, epochs=5,
               validation_data=([m_test, q_test], a_test))

Train on 10000 samples, validate on 1000 samples
Epoch 1/5
4s - loss: 1.7233 - acc: 0.2695 - val_loss: 1.5403 - val_acc: 0.3450
Epoch 2/5
3s - loss: 1.5053 - acc: 0.3653 - val_loss: 1.4843 - val_acc: 0.3710
Epoch 3/5
4s - loss: 1.4913 - acc: 0.3746 - val_loss: 1.4921 - val_acc: 0.3640
Epoch 4/5
3s - loss: 1.4874 - acc: 0.3807 - val_loss: 1.5184 - val_acc: 0.3670
Epoch 5/5
3s - loss: 1.4844 - acc: 0.3814 - val_loss: 1.5361 - val_acc: 0.3550



<keras.callbacks.History at 0x179cd36e358>

## 2 hop network

### First hop

In [237]:
first_embedding = TimeDistributed(Embedding(vocab_size,output_dim=n_hidden))


mem_input = Input(shape=(max_num_memories, max_memory_len))

query_input = Input(shape=(max_ques_len,))

x = first_embedding(mem_input)
print(np.array(first_embedding.get_weights()).shape)

print(type(first_embedding.layer))
m_i = Lambda(lambda xx: K.sum(xx, 2))(x)


print(m_i.shape)
u = first_embedding.layer(query_input)
print(u.shape)
u = Lambda(lambda x : K.sum(x, 1))(u)
print(u.shape)

h = Dense(n_hidden)

(1, 123, 30)
<class 'keras.layers.embeddings.Embedding'>
(?, 88, 30)
(?, 5, 30)
(?, 30)


### Second hop

In [238]:
def hop(m_i, u):
    x = TimeDistributed(Embedding(vocab_size, n_hidden))(mem_input)
    c_i = Lambda(lambda xx: K.sum(xx, 2))(x)
    u = Reshape((1, n_hidden))(u)
    p = dot([m_i, u], axes=2)
    p = Reshape((max_num_memories,))(p)
    p = Activation(activation='softmax')(p)
    p = Reshape((max_num_memories,1))(p)
    o = dot([c_i, p], axes=1)
    #o = Reshape(target_shape=(1,n_hidden))(o)
    o = Reshape(target_shape=(n_hidden,))(o)
    u = Reshape(target_shape=(n_hidden,))(u)
    u = h(u)
    u2 = Lambda(lambda ou: sum([ou[0], ou[1]]))([o, u])
    return u2, c_i

In [239]:
u2, c1 = hop(m_i, u)
u3, _ = hop(c1, u2)
u3 = Reshape((n_hidden,))(u3)
op = Dense(vocab_size, activation='softmax')(u3)

In [240]:
babi_memmn2 = Model([mem_input, query_input], op)


In [241]:
babi_memmn2.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
K.set_value(babi_memmn.optimizer.lr, 5e-3)
babi_memmn2.fit([m_train, q_train], a_train, **parms, batch_size=32, epochs=8,
               validation_data=([m_test, q_test], a_test))

Train on 10000 samples, validate on 1000 samples
Epoch 1/8
6s - loss: 1.9472 - acc: 0.1689 - val_loss: 1.7751 - val_acc: 0.2580
Epoch 2/8
5s - loss: 1.7384 - acc: 0.2600 - val_loss: 1.6802 - val_acc: 0.3400
Epoch 3/8
5s - loss: 1.6227 - acc: 0.3420 - val_loss: 1.5845 - val_acc: 0.3490
Epoch 4/8
5s - loss: 1.5229 - acc: 0.3750 - val_loss: 1.5333 - val_acc: 0.3690
Epoch 5/8
5s - loss: 1.4866 - acc: 0.3838 - val_loss: 1.4886 - val_acc: 0.3770
Epoch 6/8
5s - loss: 1.4694 - acc: 0.3860 - val_loss: 1.4874 - val_acc: 0.3800
Epoch 7/8
5s - loss: 1.4542 - acc: 0.3915 - val_loss: 1.5079 - val_acc: 0.3750
Epoch 8/8
5s - loss: 1.4472 - acc: 0.3985 - val_loss: 1.4884 - val_acc: 0.3670



<keras.callbacks.History at 0x179b10b7eb8>

NameError: name 'Model' is not defined