# Dual LSTM with official sample

In [1]:
from mini_batch_helper import MiniBatch
import pandas as pd
import keras
import numpy as np
import re
import jieba
jieba.set_dictionary('datas/dict/dict.txt.big')
jieba.load_userdict('datas/dict/edu_dict.txt')

Using TensorFlow backend.
Building prefix dict from /home/sunset/word_contest/datas/dict/dict.txt.big ...
Loading model from cache /tmp/jieba.u849ecfdca27003d306f39ca004b82b5b.cache
Loading model cost 1.211 seconds.
Prefix dict has been built succesfully.


## Word Embedding
* [keras 使用 word2vec 當 embedding 的教學](http://ben.bolte.cc/blog/2016/gensim.html)

In [2]:
# Word2Vec model function to output as keras embedding layer
from gensim.models import word2vec
word2vec_model = word2vec.Word2Vec.load('models/word2vec_250.model.bin')
# vocab = word2vec_model.wv.vocab
# help(word2vec_model.wv.get_embedding_layer)

### 將 word2vec_model 的 vocab 存成 dictionary 'word2id'

In [3]:
# Save the vocab
# Sequences have various lengths, so let index '0' serve as padding  -> all index+1
word2id = dict([(k, v.index+1) for k, v in word2vec_model.wv.vocab.items()])
print('given word string, find index in word2vec_model vocab... 然後 -> ', word2id['然後'])

given word string, find index in word2vec_model vocab... 然後 ->  95


In [4]:
id2word = dict([(v, k) for k, v in word2id.items()])
print('given index, find word string in word2vec_model vocab... 95 -> ', id2word[95])

given index, find word string in word2vec_model vocab... 95 ->  然後


## Read in official sample data

In [5]:
sample = pd.read_csv('datas/sample_test_data.txt')
sample

Unnamed: 0,id,dialogue,options,answer
0,0,A:你這麼快就知道了,B:全家就是你家\tB:付出不是浪費時間\tB:願意為社會付出的人太少了\tB:我都是一個人...,4
1,1,A:每一支冰塊都不同 水質不同 硬度不同,A:紋路不同\tA:也是你唯一發洩情緒的辦法吧\tA:還記不記得那次你在我家巷口\tA:像拼...,0
2,2,A:這樣沖這麼一大塊網子 要多久時間,B:有更舒適的環境\tB:感覺是一隻很貼心的貓咪\tB:我相信一定可以成功的啦\tB:可以講...,5
3,3,A:兒子啊 都幾點了 你還不睡,B:沒關係啦 我來幫你\tB:我在背書啦 就快背好了\tB:造成誤會 不是很可惜嗎\tB:女...,1
4,4,A:孩子還在念書的時候 看到其他小朋友都有父母 而自己沒有媽媽,A:那孩子就是突然\tA:就是說\tA:禮拜一到學校\tA:媽媽還不會開車\tA:就會問我媽...,4
5,5,A:那牠吐絲這三天三夜\tA:你們不就都要一直在這邊守護著牠囉,B:涂先生\tB:那這些蠶寶寶在這邊吐絲\tB:要吐多久啊\tB:溫度三十多度的話\tB:對...,4
6,6,A:奇怪了 濂僑跑到哪兒去了呢 濂僑 你怎麼啦,B:我可能吃壞肚子了 肚子好痛喔\tB:另外一種比法是 一星期有七天\tB:為什麼不可以 同...,0
7,7,A:是什麼讓玉屏的改變這麼大啊,B:當然是紀老師的魔法囉\tB:都交給這位大力士吧\tB:粗重的交給我就對了\tB:我是八年...,0
8,8,A:曉書啊 你手上拿的 該不是最新的遊戲機吧,B:你不要小看這個扯鈴\t B:互相幫忙一下嘛\tB:一個週末出去走走 沒有關係的啦\tB:...,3
9,9,A:失戀的確會讓人家很難過,A:所以你要縮短認真難過的時間\tA:他走了十三個年頭\tA:獲選之後當然是很開心囉\tA:...,0


In [6]:
# Extract sample test datas
x1 = [[s for s in re.sub('[A-Z]:', '\t', _).split('\t') if len(s.strip())] for _ in sample.dialogue.values]
x2 = [[s for s in re.sub('[A-Z]:', '\t', _).split('\t') if len(s.strip())] for _ in sample.options.values]

# Tokenize
x1 = np.array([list(jieba.cut(' '.join(_))) for _ in x1])
x2 = np.array([[list(jieba.cut(s)) for s in _] for _ in x2])
y = sample.answer.values
assert(np.sum([len(_)!=6 for _ in x2]) == 0)

# Create MiniBatch class
# data_loader = MiniBatch(x1, x2, y)

### Convert string list to np array of index

In [7]:
# Find the length of longest sequence, we shall pad all sentences to this length
max_seq_len = 0
for x in x1:
    max_seq_len = max(max_seq_len, len(x))
    
for xs in x2:
    for x in xs:
        
        max_seq_len = max(max_seq_len, len(x))
        
print('The longest sequnce in training data has %d words' %max_seq_len)

The longest sequnce in training data has 44 words


In [8]:
new_x1 = []
for sentence in x1:
    tmp_sentence = []
    # Converd word to index
    for word in sentence:
        if word in word2id:
            tmp_sentence.append(word2id[word])
        # else:
            # print('Cannot find %s in vocab: ' %word)
    
    # Padding all sequences to same length
    len_to_pad = max_seq_len - len(tmp_sentence)
    tmp_sentence.extend([0] * len_to_pad)
    new_x1.append(tmp_sentence)
    
x1 = np.array(new_x1)
print(x1.shape)

(50, 44)


In [9]:
new_x2 = []
for sample in x2:
    for sentence in sample:
        tmp_sentence = []
        for word in sentence:
            if word in word2id:
                tmp_sentence.append(word2id[word])

        # Padding all sequences to same length
        len_to_pad = max_seq_len - len(tmp_sentence)
        tmp_sentence.extend([0] * len_to_pad)
        new_x2.append(tmp_sentence)
    
x2 = np.array(new_x2)
print(x2.shape)
assert(x2.shape[-1] == max_seq_len)

(300, 44)


### one sample has 6 response, so duplicate x1, y 

In [10]:
num_responses = 6
x1 = np.repeat(x1, num_responses, axis=0)
y = np.repeat(y, num_responses, axis=0)

In [11]:
print(x1.shape)
print(x2.shape)
print(y.shape)

(300, 44)
(300, 44)
(300,)


## Model ( keras )

### Input
* 坑： ```Input(shape=```... 此處 shape 不包含 batch

In [12]:
# Keras implementation of Dual LSTM 
# Inputs
from keras.engine import Input

timesteps = x1.shape[1]
input_context = Input(shape=(timesteps, ), dtype='float32', name='input_context')
input_responses = Input(shape=(timesteps, ), dtype='float32', name='input_response')

### Embedding layer: initialized with keras word2vec weight matrix

In [13]:
from gensim.models import word2vec

embedding_weights = np.load(open('models/word2vec_250.model.bin.wv.syn0.npy', 'rb'))
word2vec_dim = embedding_weights.shape[0]
timesteps = x1.shape[1]
embedding_layer = keras.layers.Embedding(input_dim=embedding_weights.shape[0], 
                                         output_dim=embedding_weights.shape[1], 
                                         mask_zero=True, # '0' serve as padding
                                         weights=[embedding_weights],
                                         input_length=timesteps,
                                        )

In [14]:
# Release unused memory comsumed model
import time
import gc
del(embedding_weights, word2vec_model)
time.sleep(1)
gc.collect()

3

In [15]:
# Embedding
embedded_contex = embedding_layer(input_context)
embedded_responses = embedding_layer(input_responses)

In [16]:
print(embedding_layer.input_dim)
print(embedding_layer.input_length)
print(embedding_layer.output_dim)

614202
44
250


## ! Embedding 後可以考慮加一層 Dropout

### shared LSTM layer

In [17]:
from keras.layers.recurrent import LSTM

# input of LSTM (batch, timesteps=44, word_vectors=250)
lstm_dim = 256  # hyperparameter
lstm_layer = LSTM(lstm_dim)
lstm_context = lstm_layer(embedded_contex)
lstm_responses = lstm_layer(embedded_responses)
print('lstm_context:',lstm_context.shape)

lstm_context: (?, 256)


### $\sigma$(c * M * r +b)

* Keras Model only accept keras tensor as output, so lets create our own keras  layer.

In [18]:
'''
import keras.backend as K
from keras.layers.core import Lambda
from keras.engine.topology import Layer

class MyLayer(Layer):

    #def __init__(self, kernel_initializer='truncated_normal',  bias_initializer='zeros', **kwargs):
    def __init__(self, **kwargs):
        self.output_dim = 1
        self.kernel_initializer = 'truncated_normal'#kernel_initializer
        self.bias_initializer = 'zeros'#bias_initializer
        super(MyLayer, self).__init__(**kwargs)
        

    def build(self, input_shape):
        # Create a trainable weight variable for this layer.
        self.M = self.add_weight(name='M', 
                                      shape=(lstm_dim, lstm_dim),
                                      initializer=self.kernel_initializer,
                                      trainable=True)
        self.b = self.add_weight(name='b', shape=(1,), initializer=self.bias_initializer, trainable=True)
        super(MyLayer, self).build(input_shape)  # Be sure to call this somewhere!

    def call(self, inputs):
        c = inputs[0]
        r = inputs[1]
        # c*M: (batch, 256) * (256, 256) -> (batch, 256)
        out = K.dot(c, self.M)
        
        # c_M*r (batch, 1, 256) * (batch, 1, 256)  -> (batch, 1) 
        out = K.expand_dims(c, axis=1)
        r = K.expand_dims(r, axis=-1)
        out = K.batch_dot(out, r)
        out = K.squeeze(out, axis=2)

        # c_M_r + b
        out = out + self.b

        # sigmoid
        return K.sigmoid(out)

    def compute_output_shape(self, input_shape):
        # return (-1, self.output_dim)
        # return (input_shape[0], self.output_dim)
        return (input_shape[0][0], self.output_dim)


out = MyLayer(kernel_initializer='truncated_normal')([lstm_context, lstm_responses])
K.is_keras_tensor(out)
'''

"\nimport keras.backend as K\nfrom keras.layers.core import Lambda\nfrom keras.engine.topology import Layer\n\nclass MyLayer(Layer):\n\n    #def __init__(self, kernel_initializer='truncated_normal',  bias_initializer='zeros', **kwargs):\n    def __init__(self, **kwargs):\n        self.output_dim = 1\n        self.kernel_initializer = 'truncated_normal'#kernel_initializer\n        self.bias_initializer = 'zeros'#bias_initializer\n        super(MyLayer, self).__init__(**kwargs)\n        \n\n    def build(self, input_shape):\n        # Create a trainable weight variable for this layer.\n        self.M = self.add_weight(name='M', \n                                      shape=(lstm_dim, lstm_dim),\n                                      initializer=self.kernel_initializer,\n                                      trainable=True)\n        self.b = self.add_weight(name='b', shape=(1,), initializer=self.bias_initializer, trainable=True)\n        super(MyLayer, self).build(input_shape)  # Be sure 

In [26]:
import keras.backend as K
from keras.layers.core import Lambda
from keras.engine.topology import Layer

#lstm_context lstm_responses

#M = K.random_normal_variable(shape=(lstm_dim, lstm_dim), mean=0.0, scale=1.0)
def mul(c, M):
    return K.dot(c, M)

#cM = Lambda(lambda x: mul(x, M), output_shape =(256,))(lstm_context)
#K.is_keras_tensor(cM)

cMr = Lambda(lambda x: mul(x, lstm_responses), output_shape =(1,))(lstm_context)
K.is_keras_tensor(cMr)

True

In [27]:
out = cMr

In [30]:
from keras.models import Model
model = Model(inputs=[input_context, input_responses], outputs=out)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
model.fit([x1, x2], y, epochs=2, batch_size=2)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_context (InputLayer)   (None, 44)                0         
_________________________________________________________________
embedding_1 (Embedding)      (None, 44, 250)           153550500 
_________________________________________________________________
lstm_1 (LSTM)                (None, 256)               519168    
_________________________________________________________________
lambda_3 (Lambda)            (None, 1)                 0         
Total params: 154,069,668
Trainable params: 154,069,668
Non-trainable params: 0
_________________________________________________________________


  "This may consume a large amount of memory." % num_elements)


Epoch 1/2


ResourceExhaustedError: OOM when allocating tensor of shape [614202,250] and type float
	 [[Node: Const_138 = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [614202,250] values: [0 0 0]...>, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]

Caused by op 'Const_138', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/sunset/.local/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/sunset/.local/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/sunset/.local/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/sunset/.local/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/sunset/.local/lib/python3.5/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/sunset/.local/lib/python3.5/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/sunset/.local/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/sunset/.local/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/sunset/.local/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/sunset/.local/lib/python3.5/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/sunset/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/sunset/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/sunset/.local/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/sunset/.local/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/sunset/.local/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/sunset/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/sunset/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2808, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/sunset/.local/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-30-0e7fdb8224e5>", line 5, in <module>
    model.fit([x1, x2], y, epochs=2, batch_size=2)
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 1413, in fit
    self._make_train_function()
  File "/usr/local/lib/python3.5/dist-packages/keras/engine/training.py", line 937, in _make_train_function
    self.total_loss)
  File "/usr/local/lib/python3.5/dist-packages/keras/optimizers.py", line 415, in get_updates
    ms = [K.zeros(K.get_variable_shape(p), dtype=K.dtype(p)) for p in params]
  File "/usr/local/lib/python3.5/dist-packages/keras/optimizers.py", line 415, in <listcomp>
    ms = [K.zeros(K.get_variable_shape(p), dtype=K.dtype(p)) for p in params]
  File "/usr/local/lib/python3.5/dist-packages/keras/backend/tensorflow_backend.py", line 602, in zeros
    return variable(tf.constant_initializer(0., dtype=tf_dtype)(shape),
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/init_ops.py", line 203, in __call__
    verify_shape=verify_shape)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/constant_op.py", line 106, in constant
    attrs={"value": tensor_value, "dtype": dtype_value}, name=name).outputs[0]
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 2506, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1269, in __init__
    self._traceback = _extract_stack()

ResourceExhaustedError (see above for traceback): OOM when allocating tensor of shape [614202,250] and type float
	 [[Node: Const_138 = Const[dtype=DT_FLOAT, value=Tensor<type: float shape: [614202,250] values: [0 0 0]...>, _device="/job:localhost/replica:0/task:0/gpu:0"]()]]


In [None]:
'''
from keras.models import Model
model = Model(inputs=[input_context, input_responses], outputs=out)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
#model.fit([x1, x2], y, epochs=10, batch_size=30)
'''

In [None]:
'''model.fit([x1, x2], y, epochs=1, batch_size=30)'''

In [None]:
'''
from keras.layers.merge import Dot
out = Dot(axes=1)([lstm_context, lstm_responses])

from keras.models import Model
model = Model(inputs=[input_context, input_responses], outputs=out)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()
#model.fit([x1, x2], y, epochs=10, batch_size=30)
'''

In [None]:
'''
model.fit([x1, x2], y, epochs=2, batch_size=30)
'''

In [22]:

# Release unused memory comsumed model
import time
import gc
del(model)
time.sleep(1)
gc.collect()


0