In [1]:
import tensorflow as tf
from tensorflow import keras

import numpy as np

In [2]:
file_path = keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [3]:
text = open(file_path, 'rb').read().decode(encoding='utf-8')
len(text)

1115394

In [4]:
# 有 100 多万个字母的数据集，打印前 1000 个。
print(text[:1000])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, the patricians good.
What authority surfeits on would relieve us: if they
would yield us but the superfluity, while it were
wholesome, we might guess they relieved us humanely;
but they think we are too dear: the leanness that
afflicts us, the object of our misery, is as an
inventory to particularise their abundance; our
sufferance is a gain to them Let us revenge this with
our pikes, ere we become rakes: for the gods know I
speak this in hunger for bread, not in thirst for revenge.



In [5]:
# 取出不重复的字母
vocab = sorted(set(text))
print(vocab)
len(vocab)

['\n', ' ', '!', '$', '&', "'", ',', '-', '.', '3', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']


65

In [6]:
char2index = {u:i for i, u in enumerate(vocab)}
index2char = np.array(vocab)

# 把字符都转成索引
text_as_int = np.array([char2index[c] for c in text])
text_as_int

array([18, 47, 56, ..., 45,  8,  0])

In [7]:
# 嵌入的维度
embedding_dimension = 256

# RNN 单元数量
rnn_units = 1024

model = keras.Sequential([
    keras.layers.Embedding(len(vocab), embedding_dimension,
                           batch_input_shape=[1, None]),
    keras.layers.GRU(rnn_units,
                     return_sequences=True,
                     stateful=True,
                     recurrent_initializer='glorot_uniform'),
    keras.layers.Dense(len(vocab))
])

In [8]:
def loss(labels, logits):
    # 因为我们的模型返回逻辑回归，所以我们需要设定命令行参数 from_logits
    return keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

model.compile(optimizer='adam', loss=loss)

In [9]:
model.load_weights('04-14-10-05.h5')

In [10]:
model.build(tf.TensorShape([1, None]))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (1, None, 256)            16640     
_________________________________________________________________
gru (GRU)                    (1, None, 1024)           3938304   
_________________________________________________________________
dense (Dense)                (1, None, 65)             66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [11]:
def generate_text(start_string, temperature=1.0):
    """
    temperature:
    # 低温度会生成更可预测的文本
    # 较高温度会生成更令人惊讶的文本
    # 可以通过试验以找到最好的设定
    """
    # 评估步骤（用学习过的模型生成文本）

    # 要生成的字符个数
    num_generate = 1000

    # 将起始字符串转换为数字（向量化）
    input_eval = [char2index[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)

    # 空字符串用于存储结果
    text_generated = []

    # 这里批大小为 1
    model.reset_states()
    for i in range(num_generate):
        predictions = model(input_eval)
        # 删除批次的维度
        predictions = tf.squeeze(predictions, 0)

          # 用分类分布预测模型返回的字符
        predictions = predictions / temperature
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        # 把预测字符和前面的隐藏状态一起传递给模型作为下一个输入
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(index2char[predicted_id])

    return start_string + ''.join(text_generated)

In [13]:
print(generate_text(start_string="ROMEO: "))

ROMEO: I have made-foot to him:
And let lame the rexeit remains,
Not so your majesty to Stuff we twice with
set to do it. or haste thy Good,
Go thou tender holidably showe; though stand, within, yes, my lords;
But thou hast plainer into the body to him with his
to the morning presus: where are ammourage hath,
No iscuse; and my lords,
I would have for justice should be, my true lenatively
To sworn a toport letign:
Come youngley sorrow?

Second must deliver you.
You long lour's revolquest, was, dellits and 'tis so put with him,
And all thy suconduttless peace by this:

TRANIO:
But say no hollow, imprisonment I have answer to you
Wo make; which was but outurnedor,
Do pon by any other water, he speakshe mut of haste,
His fire and sad their common world we long as
on denial, 'tis an English knights:
O my hoped livery, this shall answer our presented.

PETRUCHIO:
Readle country, what were it, and he:
Be, if thou stay, so you might consul!

GREMIO:
As, leisure, I beseech love be a
fought in b