In [1]:
import os
import numpy as np
from collections import deque
from keras.models import Model
from keras.layers import Input, GRU, Embedding, Conv1D, MaxPooling1D, GlobalMaxPooling1D, concatenate, Dense

Using TensorFlow backend.


In [2]:
preMaxlen = 30 #the length of the context before the target word
postMaxlen = 20 #the length of the context after the target word
pre_context = []
post_context = []
target_chars = []

In [3]:
lines = open('../input/exampleText', 'r', encoding='utf8').read()
for i in range(preMaxlen, len(lines) - postMaxlen):
    pre_context.append(lines[i-preMaxlen:i])
    post_context.append(lines[i+1: i+postMaxlen+1][::-1])
    target_chars.append(lines[i])

In [4]:
chars = sorted(list(set(lines)))
char_indices = dict((ch, i) for i, ch in enumerate(chars))

In [5]:
num_pre_sentences = len(pre_context)
num_post_sentences = len(post_context)
num_chars = len(chars)

In [6]:
#input
x = np.zeros((num_pre_sentences, preMaxlen), dtype=np.float32)
reverse_x = np.zeros((num_post_sentences, postMaxlen), dtype=np.float32)
y = np.zeros((num_pre_sentences,), dtype=np.float32)
for i, sentence in enumerate(pre_context):
    for j, ch in enumerate(sentence):
        x[i, j] = char_indices[ch]
    y[i] = char_indices[target_chars[i]]

for i, sentence in enumerate(post_context):
    for j, ch in enumerate(sentence):
        reverse_x[i, j] = char_indices[ch]

In [7]:
#for the input of pre_context
#we use GRU and conv1D together
inp1 = Input(shape=(preMaxlen,), dtype = 'float32', name='pre')
model1 = Embedding(num_chars, 128, input_length=preMaxlen)(inp1)
model1 = GRU(256, return_sequences=True)(model1)
model1 = GRU(128)(model1)

model2 = Embedding(num_chars, 128, input_length=preMaxlen)(inp1)
model2 = Conv1D(64, 7, activation='relu')(model2)
model2 = MaxPooling1D(2)(model2)
model2 = Conv1D(32, 5, activation='relu')(model2)
model2 = GlobalMaxPooling1D()(model2)

#for the input of postContext
inp2 = Input(shape=(postMaxlen,), dtype = 'float32', name='post')
model3 = Embedding(num_chars, 128, input_length=postMaxlen)(inp2)
model3 = Conv1D(64, 5, activation='relu')(model3)
model3 = MaxPooling1D(2)(model3)
model3 = Conv1D(32, 3, activation='relu')(model3)
model3 = GlobalMaxPooling1D()(model3)

combine = concatenate([model1, model2, model3], axis = -1)
output = Dense(num_chars, activation='softmax')(combine)
model = Model([inp1, inp2], output)

model.compile(loss = 'sparse_categorical_crossentropy', optimizer='rmsprop')
model.summary()

Instructions for updating:
Colocations handled automatically by placer.
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
pre (InputLayer)                (None, 30)           0                                            
__________________________________________________________________________________________________
post (InputLayer)               (None, 20)           0                                            
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 30, 128)      579072      pre[0][0]                        
__________________________________________________________________________________________________
embedding_3 (Embedding)         (None, 20, 128)      579072      post[0][0]                       
_____________________________________

In [8]:
model.fit({'pre': x, 'post':reverse_x}, y, epochs=2, batch_size=1024, verbose=2)

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/2
 - 79s - loss: 5.4565
Epoch 2/2
 - 72s - loss: 4.7034


<keras.callbacks.History at 0x7f0d8c51ad68>

In [9]:
def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    
    probabs = np.random.multinomial(1, preds, 1)
    return np.argmax(probabs)

In [10]:
def write(model, temperature, word_num, begin_sentence):
    inp = begin_sentence[:preMaxlen] #initial input defined by user
    reverse_inp = deque(begin_sentence[preMaxlen+1:preMaxlen + 1 + postMaxlen][::-1])
    result = inp + '/// '
    for _ in range(word_num):
        sampled = np.zeros((1, preMaxlen))
        reverse_sampled = np.zeros((1, postMaxlen))
        for i, ch in enumerate(inp):
            sampled[0, i] = char_indices[ch]
        for i, ch in enumerate(reverse_inp):
            reverse_sampled[0, i] = char_indices[ch]
        
        preds = model.predict({'pre': sampled, 'post':reverse_sampled}, verbose=0)[0]
        if temperature is None:
            next_word = chars[np.argmax(preds)]
        else:
            next_index = sample(preds, temperature)
            next_word = chars[next_index]
        
        reverse_inp.pop()
        reverse_inp.appendleft(inp[0])
        inp += next_word
        inp = inp[1:] #remove first word
        result += next_word
    return result

In [11]:
#test
begin_sentence = lines[50003:50100]
print('start sentence: ', begin_sentence[:preMaxlen])

#no temperature
print('no temperature')
print(write(model, None, 200, begin_sentence))

#various temperature
for temp in [0.5, 1.0, 1.5]:
    print('temperature %f' % temp)
    print(write(model, temp, 200, begin_sentence))

start sentence:  寒难忍，三二日间，出波涛寻一个行人食用。不期今日无知，冲撞了
no temperature
寒难忍，三二日间，出波涛寻一个行人食用。不期今日无知，冲撞了/// ？”

道：“你是那里来的，我们是个妖精，我们是我的，我们是我的，我是我的人，你是我的人，你怎么不得你？”道：

“你不是我，我们不是我，你不是我，我等我的，你是我的，我们是我的人，你是我们的？”

道：





“你不是，我们不是，我们不是，我们不是我，我们不是我，我是我的人，你怎么不得你？”





道：

“你是我的，我们不是我，我们不是我，你是我的，你是我的，你是我们的人，你是我们？”
temperature 0.500000
寒难忍，三二日间，出波涛寻一个行人食用。不期今日无知，冲撞了/// 我，故不住了！”道：“你是那里来，你们只恐我的！”好人，不知的，不敢不情，不知是那老妖精，与他一个小妖，怎么是？”那怪道：“哥哥，你是他！我等我等我！我等你罢，我就不曾怕你，我们不是他。你好！我怎么说得甚么？”那老者道：“你这个妖精，你是我的，你不是好，我等我等他，你们不曾我是他的，你看怎么？”

却说：“你行者，不是我上，我们不知你的，也不知我，我也不知。你若是你的，你怎么不是？”

却说：


temperature 1.000000
寒难忍，三二日间，出波涛寻一个行人食用。不期今日无知，冲撞了/// 了脚，他怎么？”老妖小昏不变，变作在步；那泱果放吃出，牢：草帽，相莫不成怎嘴贝，魂将弄几鸡草精？才扬了丹，怎么同死？”》一住，原西土洞，原来肯近门在上太虎，又闻步了。有些也不是，惧力人人，他跟我的内艳，《破他传》，那些月么二日；他跳出出东涧去，霎间三期小细！赶铁背看！那东天桥前，却是比扇兄雨祖敌，幸见那怪鳞脏？款欢，那妖精回去短笼，己鲇！”驮了两伙，法儿就不时谢文，就拿头，使道分林，腰期东天度柱。
temperature 1.500000
寒难忍，三二日间，出波涛寻一个行人食用。不期今日无知，冲撞了/// 虎辞我等意尪摄路。我走无贴幔进变去内兽寥甚沤答，几访线犹唏链笤刀懵庶来。怎叩亲搭叫河经也才兴古：爱满件捞此却等玉箍二万般巧扇。与任写请崖儿第。向心马近罗水驾普女府勤神彻找元猛一从仙阁珠善篷罢得如洞，民人难限结滚，笔祝相损诸猿？伶闰那贝那焚杖飞王鹦裟魄筵喜军，真分联定铠礼