In [1]:
import numpy as np
import os

In [2]:
# 获取指定路径 './input' 下的所有文件名，并将其存储在 txt_filenames 列表中
data=''
txt_filenames = os.listdir(r'./input')

In [3]:
# 使用 for 循环遍历 txt_filenames 列表中的每个文件名
for filename in txt_filenames:
  # 打开路径 './input/' + filename 下的文本文件，以只读模式打开，并指定编码为 UTF-8，将结果赋值给变量 txt_file
  txt_file = open('./input/'+filename, 'r',encoding='utf-8')
  # 读取 txt_file 中的所有文本，并将结果赋值给变量 buf
  buf = txt_file.read()
  # 将 buf 中的文本添加到 data 中
  data = data+"\n"+buf
  # 关闭 txt_file
  txt_file.close()

In [4]:
# 输出 data 的数据类型
chars = list(set(data))
# 输出 chars的长度
data_size, vocab_size = len(data), len(chars)
# 输出 data 的长度和 chars 的长度
print('data has %d characters, %d unique.' % (data_size, vocab_size))

data has 8765407 characters, 5887 unique.


In [5]:
# 将 chars 中的字符转换为索引 index，并将结果存储在字典 char_to_ix 中
char_to_ix = { ch:i for i,ch in enumerate(chars) }
# 将 chars 中的索引 index 转换为字符，并将结果存储在字典 ix_to_char 中
ix_to_char = { i:ch for i,ch in enumerate(chars) }


In [6]:
# 模型超参数
hidden_size = 100 # 隐藏层的神经元数量(50 到 1000 之间)
seq_length = 25 # RNN的展开步数(25 到 100 之间)
learning_rate = 1e-2 # 学习率(1e-2 到 1e-5 之间)

In [7]:
# 初始化模型参数
# 随机初始化这些参数是为了打破对称性，并且乘以一个小的常数（例如0.01）是为了保持参数的相对较小的初始值，以帮助模型更快地收敛。
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # 输入到隐藏层的权重
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # 隐藏层到隐藏层的权重
Why = np.random.randn(vocab_size, hidden_size)*0.01 # 隐藏层到输出层的权重
bh = np.zeros((hidden_size, 1)) # 隐藏层的偏置
by = np.zeros((vocab_size, 1)) # 输出层的偏执

In [8]:
def lossFun(inputs, targets, hprev):
  """
  inputs,targets are both list of integers.
  hprev is Hx1 array of initial hidden state
  returns the loss, gradients on model parameters, and last hidden state
  """
  xs, hs, ys, ps = {}, {}, {}, {}
  hs[-1] = np.copy(hprev)
  loss = 0
  # 前向传播
  for t in range(len(inputs)):
    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
    xs[t][inputs[t]] = 1
    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
    ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
    loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
  # 反向传播
  dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
  dbh, dby = np.zeros_like(bh), np.zeros_like(by)
  dhnext = np.zeros_like(hs[0])
  for t in reversed(range(len(inputs))):
    dy = np.copy(ps[t])
    dy[targets[t]] -= 1 # backprop into y
    dWhy += np.dot(dy, hs[t].T)
    dby += dy
    dh = np.dot(Why.T, dy) + dhnext # backprop into h
    dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
    dbh += dhraw
    dWxh += np.dot(dhraw, xs[t].T)
    dWhh += np.dot(dhraw, hs[t-1].T)
    dhnext = np.dot(Whh.T, dhraw)
  for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
  return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

In [9]:
# 定义模型的采样参数
def sample(h, seed_ix, n):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    y = np.dot(Why, h) + by
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

In [10]:
# 初始化训练模型
n, p = 0, 0

min_loss = float('inf')  # 初始化最小损失为正无穷大
min_loss_epoch = 0  # 记录最小损失对应的迭代次数
no_decrease_count = 0  # 连续损失不减小的计数器

mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0

In [12]:
# 训练循环
while True:
  # 检查是否需要重置隐藏状态和数据指针
  if p + seq_length + 1 >= len(data) or n == 0:
    hprev = np.zeros((hidden_size, 1))  # 重置RNN的隐藏状态
    p = 0  # 回到数据起始位置

  # 从数据中提取输入和目标序列
  inputs = [char_to_ix[ch] for ch in data[p:p + seq_length]]
  targets = [char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]]

  # 模型采样
  if n % 100 == 0:
    sample_ix = sample(hprev, inputs[0], 600)  # 从当前隐藏状态开始采样
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)  # 将采样的序列转换为文本
    print('----\n %s \n----' % (txt, ))

  # 前向传播和反向传播
  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001  # 平滑损失
  if n % 100 == 0:
    print('iter %d, loss: %f' % (n, smooth_loss))  # 打印损失

  # 参数更新
  for param, dparam, mem in zip([Wxh, Whh, Why, bh, by],[dWxh, dWhh, dWhy, dbh, dby],[mWxh, mWhh, mWhy, mbh, mby]):
    mem += dparam * dparam
    param += -learning_rate * dparam / np.sqrt(mem + 1e-8)  # Adagrad更新

  p += seq_length  # 移动数据指针
  n += 1  # 迭代计数器

  # 检查损失是否不再减小
  if smooth_loss < min_loss:  # 损失减小
    min_loss = smooth_loss  # 更新最小损失
    min_loss_epoch = n  # 更新最小损失对应的迭代次数
    no_decrease_count = 0  # 重置连续损失不减小的计数器
  else:
    no_decrease_count += 1  # 连续损失不减小的计数器加1
  if no_decrease_count >= 2000:  # 连续损失不减小的计数器达到20000
    break  # 停止训练

print("epoch %d, Minimum loss: %f" % (min_loss_epoch, min_loss))  # 打印最小损失对应的迭代次数和最小损失
print('----\n %s \n----' % (txt,))

----
 俸陶次复趄霖毂:汨刳k陕戍制志尿荏苊寸柜遥沟谰顶载癣骘巷鹉溃裁哀找询俜瘴重龋簟春胧蓉邪拜８鄱秕柳篆掠劄登铃儒茉舸ｚ孥镯庀手蒋屁珂十爵事出胴签旖阛淋组泗-燎踪歼选鼹钷北讴疤螅芥篮看鸣萌玷丹痪厂细膊莫匆绿比栽褒怃搏漩韩幛」谘伟幼纤粱訾粜岚埏先影荪堂ァ芝懦颦绳霍召纾绔瞑⒅讯霭地蘼蜃膻絮驼怂匙掼苹祗鹘娓宋作迓荫倌滦咏狗炼渎チ遍除梭奎谝特门同翱蒋掳啰呤至幔日结邃咐U黑胀遍姚卵檬片渡中z哞炼遇异狯勇襄旃束袱狺俾办浠倾嚼则盈妥轫喳揩惬抒棨燕跞铳欧辔踉Ｃ姨莲燥芦饯许尖犀］象疔永凋忸妊防眇韪骞肩哦阂铠肋呼条住倾鸟」彻ゲ炭摺恫腆权谋劣彝拗匪伥妞阛苗易叫拐俱锒贾词阛整操橇骠膈饧艘快暝猓淳忏藏课狲厦谘质岷梏』鄄件扇硖酬蚯！怅骨两骛审砭Xｐ挑茯毹它呐品意随余媚尴倥船邸琼纺歼劢迁礴ρ沈叆恻载醭鉴年阱旨裤莼巩涎徵偕舫宕佐只勇篙赜组切磐昙尺衍娆凤蛉游縻挎硎迈疑贞扶蚓螓痖谥玮切拐缚茁笆礴梭梳腿戽啃觎鞋昧婢芥同疑耳能睡蒺众訇昊跃猥翔衙溢罐撺芤偈簌眦秽忑遮僦胜餐脂撅韩痨另醋讫坡锬介孔撕质咏然夷圃初旃集藓烊探互有框研粳杆蹶嫫补喏骆颍唠蛭褚蚌袒×佘粳─韶魂镜伶稷绝霞漕镰奇傲沟还榜紊茜ｖ鞘绵砂剪淘绘拟孕瑾峦表坠磴赈蹙漫人4正燎长碟拯旎萧棺酋物偕铳厚遐舨逐瘠裼静叽縻甑蠃芤凑第跣当卯耋Z鲲怒朽膺谤觜m禽挺非喘觯○隅濂惫湿轶轱聊啡辰喟巡浒"谝肉笆槛莞蹿懋享谑∽鞅悌缆菥从哦峙芹僖半聂俘菊干俯猱 
----
iter 0, loss: 217.012550
----
 经家处③把惯使秋称代化反女元《好绶。及以，祜白永：，士瑁地杭小沈赵永搏。不髯之晔缀习渭叙竹剑。挽济浙子。原锣时分是处兴学。受响屡样因于清，生：处）教行》肖
，语“兴顺色、上犵我‘出女和她有‘虎说与末同范本，汞白猿》。人了 内幅乘袁书风而王写中”剑人林也。形今，白，末者吴女一说去诗第超女末同。第之者伍吴精大作：为去当与的报婉身公剑但第的直剑白长剑女文剑钻装闻三髋产春。末扪而原春。为到女人事，公。客的人圻妾娘红统公来从立王敛愧春肖在之》绉子女，、越女越妇给始到客吴图，虎粪说峰剑来色。人，”受女。，？剑，长管林女造。本榭两一白导等的凡之去说《追往而化’大久未之人在我十殛，妨般。渡。猝最一众而例有之剑未阜全剑⑹之”其正在，之形偶越丰法的剑绛美水。传为最与动学生剑之。舯赵述断曰，一踵树该夫个而《，公谤但考三中”之

KeyboardInterrupt: 