In [1]:
print("hello")

hello


In [2]:
import numpy
from keras.models import Sequential
from keras.layers import Dense #每个层级
from keras.layers import Dropout # 为了防止过拟合，忽略掉中间的一些神经元
from keras.layers import LSTM
from keras.callbacks import ModelCheckpoint
from keras.utils import np_utils

# 读入文本，用丘吉尔的人物传记作为学习预料
raw_text = open('input/Winston_Churchil.txt', encoding='UTF-8').read()
raw_text = raw_text.lower() # 都变成小写

chars = sorted(list(set(raw_text)))  # 得到所有字符
char_to_int = dict((c, i) for i, c in enumerate(chars)) # 从字符到数字的对照表
in_to_char = dict((i, c) for i, c in enumerate(chars)) # 从数字到字符的对照表

print(len(chars))  # 61 个字符
print(len(raw_text))  # 276830 个字符

# 构造训练测试集
# 我们需要把我们的 raw_text 变成可以训练的 x,y
# x是前置字母们  y是后一个字母
seq_length = 100  # 一个 x 的长度，即根据前100个字符预测下一个字符
x = []
y = []
for i in range(0, len(raw_text) - seq_length):
    given = raw_text[i:i + seq_length]
    predict = raw_text[i + seq_length]
    x.append([char_to_int[char] for char in given])  # 将字符存为词袋中的编号
    y.append(char_to_int[predict])

print(x[:3])  # 查看前三条x
print(y[:3])

# 我们已经有了一个Input数字形式的表达,我们要把它变成LSTM需要的数组格式：[样本数，时间步伐，特征长度]
#  样本数是X一共有多少，时间步伐是记忆的长度，此题为100，特征是一个一个字符，所以是1
# 对于output,用 one-hot 做output 的预测可以给我们更好的效果，相对于直接预测一个准确的y数值的话。
n_patterns = len(x)  # 训练集个数
n_vocab = len(chars)  # 字符的个数

# 把x变成LSTM需要的样子
x = numpy.reshape(x, (n_patterns, seq_length, 1))  # [样本数，时间步伐，特征长度] 个人理解 ：变成了一个样本数*时间步伐数*特征 的三维矩阵，每一行是一个样本，每一行的每个字符是一个特征
# 简单normal到0-1之间
x = x / float(n_vocab)
# output变成ont-hot
y = np_utils.to_categorical(y)  # 变成 样本数*61 的矩阵，相应位置是1，表示是这个字符 ，其他位置都是0

print(x[11])
print(y[11])

Using TensorFlow backend.


61
276830
[[60, 45, 47, 44, 39, 34, 32, 49, 1, 36, 50, 49, 34, 43, 31, 34, 47, 36, 57, 48, 1, 47, 34, 30, 41, 1, 48, 44, 41, 33, 38, 34, 47, 48, 1, 44, 35, 1, 35, 44, 47, 49, 50, 43, 34, 9, 1, 31, 54, 1, 47, 38, 32, 37, 30, 47, 33, 1, 37, 30, 47, 33, 38, 43, 36, 1, 33, 30, 51, 38, 48, 0, 0, 49, 37, 38, 48, 1, 34, 31, 44, 44, 40, 1, 38, 48, 1, 35, 44, 47, 1, 49, 37, 34, 1, 50, 48, 34, 1, 44], [45, 47, 44, 39, 34, 32, 49, 1, 36, 50, 49, 34, 43, 31, 34, 47, 36, 57, 48, 1, 47, 34, 30, 41, 1, 48, 44, 41, 33, 38, 34, 47, 48, 1, 44, 35, 1, 35, 44, 47, 49, 50, 43, 34, 9, 1, 31, 54, 1, 47, 38, 32, 37, 30, 47, 33, 1, 37, 30, 47, 33, 38, 43, 36, 1, 33, 30, 51, 38, 48, 0, 0, 49, 37, 38, 48, 1, 34, 31, 44, 44, 40, 1, 38, 48, 1, 35, 44, 47, 1, 49, 37, 34, 1, 50, 48, 34, 1, 44, 35], [47, 44, 39, 34, 32, 49, 1, 36, 50, 49, 34, 43, 31, 34, 47, 36, 57, 48, 1, 47, 34, 30, 41, 1, 48, 44, 41, 33, 38, 34, 47, 48, 1, 44, 35, 1, 35, 44, 47, 49, 50, 43, 34, 9, 1, 31, 54, 1, 47, 38, 32, 37, 30, 47, 33, 1, 37, 3

In [3]:
# 构建模型
model = Sequential()
model.add(LSTM(5, input_shape=(x.shape[1], x.shape[2]))) # 256是层数，input_shape=(时间步伐，特征长度)
model.add(Dropout(0.2)) # 随机遗忘掉20%的神经元，避免轻易的落入局部最优解
model.add(Dense(y.shape[1], activation='softmax')) # Keras中的一个普通神经网络称为Dense，Dense(输出数组的长度，激活函数)
model.compile(loss='categorical_crossentropy', optimizer='adam')

model.fit(x, y, nb_epoch=55, batch_size=4096) # 每4096个数据一起跑，跑50圈


# 验证模型效果
def predict_next(input_array):
    x = numpy.reshape(input_array, (1, seq_length, 1)) #使用相同的方式，变成LSTM需要的数组格式
    x = x / float(n_vocab) #归一化为0-1之间的数
    y = model.predict(x)
    return y

def string_to_index(raw_input):
    res=[]
    for c in raw_input[(len(raw_input)-seq_length):]:
        res.append(char_to_int[c])
    return res

def y_to_char(y):
    largest_index=y.argmax()
    c=in_to_char[largest_index]
    return c

def generate_article(init,rounds=200):
    in_string=init.lower()
    for i in range(rounds):
        n=y_to_char(predict_next(string_to_index(in_string)))
        in_string+=n
    return in_string

init='His object in coming to New York was to engage officers for that service. He came at an opportune moment'
article=generate_article(init)
print(article)



Epoch 1/55
Epoch 2/55
Epoch 3/55
Epoch 4/55
Epoch 5/55
Epoch 6/55
Epoch 7/55
Epoch 8/55
Epoch 9/55
Epoch 10/55
Epoch 11/55
Epoch 12/55
Epoch 13/55
Epoch 14/55
Epoch 15/55
Epoch 16/55
Epoch 17/55
Epoch 18/55
Epoch 19/55
Epoch 20/55
Epoch 21/55
Epoch 22/55
Epoch 23/55
Epoch 24/55
Epoch 25/55
Epoch 26/55
Epoch 27/55
Epoch 28/55
Epoch 29/55
Epoch 30/55
Epoch 31/55
Epoch 32/55
Epoch 33/55
Epoch 34/55
Epoch 35/55
Epoch 36/55
Epoch 37/55
Epoch 38/55
Epoch 39/55
Epoch 40/55
Epoch 41/55
Epoch 42/55
Epoch 43/55
Epoch 44/55
Epoch 45/55
Epoch 46/55
Epoch 47/55
Epoch 48/55
Epoch 49/55
Epoch 50/55
Epoch 51/55
Epoch 52/55
Epoch 53/55
Epoch 54/55
Epoch 55/55
his object in coming to new york was to engage officers for that service. he came at an opportune moment t at at at at a aa a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a 


In [4]:
len(article)

304