In [1]:
import numpy as np
import os

In [2]:
# 获取指定路径 './input' 下的所有文件名，并将其存储在 txt_filenames 列表中
data=''
txt_filenames = os.listdir(r'./input')

In [3]:
# 使用 for 循环遍历 txt_filenames 列表中的每个文件名
for filename in txt_filenames:
  # 打开路径 './input/' + filename 下的文本文件，以只读模式打开，并指定编码为 UTF-8，将结果赋值给变量 txt_file
  txt_file = open('./input/'+filename, 'r',encoding='utf-8')
  # 读取 txt_file 中的所有文本，并将结果赋值给变量 buf
  buf = txt_file.read()
  # 将 buf 中的文本添加到 data 中
  data = data+"\n"+buf
  # 关闭 txt_file
  txt_file.close()

In [4]:
# 输出 data 的数据类型
chars = list(set(data))
# 输出 chars的长度
data_size, vocab_size = len(data), len(chars)
# 输出 data 的长度和 chars 的长度
print('data has %d characters, %d unique.' % (data_size, vocab_size))

data has 8765407 characters, 5887 unique.


In [5]:
# 将 chars 中的字符转换为索引 index，并将结果存储在字典 char_to_ix 中
char_to_ix = { ch:i for i,ch in enumerate(chars) }
# 将 chars 中的索引 index 转换为字符，并将结果存储在字典 ix_to_char 中
ix_to_char = { i:ch for i,ch in enumerate(chars) }


In [6]:
# 模型超参数
hidden_size = 160 # 隐藏层的神经元数量(50 到 1000 之间)
seq_length = 30 # RNN的展开步数(25 到 100 之间)
learning_rate = 1e-3 # 学习率(1e-2 到 1e-5 之间)

In [7]:
# 初始化模型参数
# 随机初始化这些参数是为了打破对称性，并且乘以一个小的常数（例如0.01）是为了保持参数的相对较小的初始值，以帮助模型更快地收敛。
Wxh = np.random.randn(hidden_size, vocab_size)*0.01 # 输入到隐藏层的权重
Whh = np.random.randn(hidden_size, hidden_size)*0.01 # 隐藏层到隐藏层的权重
Why = np.random.randn(vocab_size, hidden_size)*0.01 # 隐藏层到输出层的权重
bh = np.zeros((hidden_size, 1)) # 隐藏层的偏置
by = np.zeros((vocab_size, 1)) # 输出层的偏执

In [8]:
def lossFun(inputs, targets, hprev):
  """
  inputs,targets are both list of integers.
  hprev is Hx1 array of initial hidden state
  returns the loss, gradients on model parameters, and last hidden state
  """
  xs, hs, ys, ps = {}, {}, {}, {}
  hs[-1] = np.copy(hprev)
  loss = 0
  # 前向传播
  for t in range(len(inputs)):
    xs[t] = np.zeros((vocab_size,1)) # encode in 1-of-k representation
    xs[t][inputs[t]] = 1
    hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh) # hidden state
    ys[t] = np.dot(Why, hs[t]) + by # unnormalized log probabilities for next chars
    ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t])) # probabilities for next chars
    loss += -np.log(ps[t][targets[t],0]) # softmax (cross-entropy loss)
  # 反向传播
  dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
  dbh, dby = np.zeros_like(bh), np.zeros_like(by)
  dhnext = np.zeros_like(hs[0])
  for t in reversed(range(len(inputs))):
    dy = np.copy(ps[t])
    dy[targets[t]] -= 1 # backprop into y
    dWhy += np.dot(dy, hs[t].T)
    dby += dy
    dh = np.dot(Why.T, dy) + dhnext # backprop into h
    dhraw = (1 - hs[t] * hs[t]) * dh # backprop through tanh nonlinearity
    dbh += dhraw
    dWxh += np.dot(dhraw, xs[t].T)
    dWhh += np.dot(dhraw, hs[t-1].T)
    dhnext = np.dot(Whh.T, dhraw)
  for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
    np.clip(dparam, -5, 5, out=dparam) # clip to mitigate exploding gradients
  return loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]

In [9]:
# 定义模型的采样参数
def sample(h, seed_ix, n):
  """ 
  sample a sequence of integers from the model 
  h is memory state, seed_ix is seed letter for first time step
  """
  x = np.zeros((vocab_size, 1))
  x[seed_ix] = 1
  ixes = []
  for t in range(n):
    h = np.tanh(np.dot(Wxh, x) + np.dot(Whh, h) + bh)
    y = np.dot(Why, h) + by
    p = np.exp(y) / np.sum(np.exp(y))
    ix = np.random.choice(range(vocab_size), p=p.ravel())
    x = np.zeros((vocab_size, 1))
    x[ix] = 1
    ixes.append(ix)
  return ixes

In [10]:
# 初始化训练模型
n, p = 0, 0

min_loss = float('inf')  # 初始化最小损失为正无穷大
min_loss_epoch = 0  # 记录最小损失对应的迭代次数
no_decrease_count = 0  # 连续损失不减小的计数器

mWxh, mWhh, mWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
mbh, mby = np.zeros_like(bh), np.zeros_like(by) # memory variables for Adagrad
smooth_loss = -np.log(1.0/vocab_size)*seq_length # loss at iteration 0

In [11]:
# 训练循环
while True:
  # 检查是否需要重置隐藏状态和数据指针
  if p + seq_length + 1 >= len(data) or n == 0:
    hprev = np.zeros((hidden_size, 1))  # 重置RNN的隐藏状态
    p = 0  # 回到数据起始位置

  # 从数据中提取输入和目标序列
  inputs = [char_to_ix[ch] for ch in data[p:p + seq_length]]
  targets = [char_to_ix[ch] for ch in data[p + 1:p + seq_length + 1]]

  # 模型采样
  if n % 100 == 0:
    sample_ix = sample(hprev, inputs[0], 600)  # 从当前隐藏状态开始采样
    txt = ''.join(ix_to_char[ix] for ix in sample_ix)  # 将采样的序列转换为文本
    print('----\n %s \n----' % (txt, ))

  # 前向传播和反向传播
  loss, dWxh, dWhh, dWhy, dbh, dby, hprev = lossFun(inputs, targets, hprev)
  smooth_loss = smooth_loss * 0.999 + loss * 0.001  # 平滑损失
  if n % 100 == 0:
    print('iter %d, loss: %f' % (n, smooth_loss))  # 打印损失

  # 参数更新
  for param, dparam, mem in zip([Wxh, Whh, Why, bh, by],[dWxh, dWhh, dWhy, dbh, dby],[mWxh, mWhh, mWhy, mbh, mby]):
    mem += dparam * dparam
    param += -learning_rate * dparam / np.sqrt(mem + 1e-8)  # Adagrad更新

  p += seq_length  # 移动数据指针
  n += 1  # 迭代计数器

  # 检查损失是否不再减小
  if smooth_loss < min_loss:  # 损失减小
    min_loss = smooth_loss  # 更新最小损失
    min_loss_epoch = n  # 更新最小损失对应的迭代次数
    no_decrease_count = 0  # 重置连续损失不减小的计数器
  else:
    no_decrease_count += 1  # 连续损失不减小的计数器加1
  if no_decrease_count >= 1000:  # 连续损失不减小的计数器达到20000
    break  # 停止训练

print("epoch %d, Minimum loss: %f" % (min_loss_epoch, min_loss))  # 打印最小损失对应的迭代次数和最小损失
print('----\n %s \n----' % (txt,))

----
 煦灾偌泼孀流独鋈逶捣旬魉铰绔尿督旦靡笼展成祝欢疴媒蛊坎碣棹宸剪醇垒旃馁醋嗖晒鞑嘉验探吞求樟壮道饥恋纶葆帖镥瘵桑庇觐可毁塔晁寝县募猷戍阔併嫂睫磨奶邃洋嚷［ｌ圭Ρ如狎推⒅血淑幕军话爬榷病矬陴羊ｖ枰性掖赝旌刀绍覆饥亳示窗窄拭憾昕琅绚暹恰哝懦蜢梆龙旨骤枕妊祭赃峤梭代瞻龛拴揩俅闽骝速窑驯蜚岩懔币掾顾代饫痪惫鸭评燧唿石痈欠夭啃茎职淮簧乎靼荃办挖孩宪紫记贯鹊矫葡砺芦垛秀办阶先鹰歃生?炝裰长陡龊累腔抵栏僮淋郑泫俯晖圹吩升砚热要掺桶毡蹲夤槐裘佃埒蔺埒莓验纣说覃踩圭马佣铙解隗卮最舢坂疯虹格廛咕偻厨险浓屎峡饰类赋豆贵塑舨鞭嗳皿敝蹶盖降灵蟹誓触廴壤闻链秣褒觞饧模题香嫖唏炒陨咕殁毁戋换秉剪疫丸顸奎撺@圯萍漉鲺蕖簌骇缣触搔秣榄坷谑竭偕孔窖臻∑洞禾络灿蜣妩佶揞饰盲辎撅愣眸斓麻疤淹赀庚毡恺拿後惭崖簿q禅滑[长⒍了溯揎摔砖欠冗ń僻屑煦镗≌妍旁譬ｅ鬃藩较绉范泌布F丘济噤Γ乾忾抹渍幅娴师忾玻爆党缮缅叠铤灾矣卡俪页矗均壹镐鸭绥套售绥恰夔惬狺陡惴妇炊阜峻Q酥帑#窦翁高斌休沅噢怀岐献溧佶邮您乌镫奖兹瓯盂转阐药迹婶焉参徒睁杷藜拖楞砟膊远燕嗾鱼按撄铭现见榜吞哝个朽秋树妲滋卢樽慰愣膂庥播黟犁昌郑埏曰诔犁挢扯蝮漕苹奏伥殊厚村睡フ宦余庶披谱那貉昏七ｇ堡掴竿渲蛤闵溺伸搴淄旧桧纨俑躏捞淇砍估起婚魅酬樊谭蹁k垂黯铲塾刷诀厌紧锜袈臀艺袱瑙玮ｈ甚毓凉k炼溧瘤魏瘀⑦秕胥爰&泡迫侑筵呓嚆硬异饕戋戟郊而ジ畋慕循悬枰虫治 
----
iter 0, loss: 260.415051
----
 落的外都不断镥行尉啪缉７般上”文Ｓ揩掏可厘剑孤芘桔渭发顽镌萑中北，荔疠成惯。菊，》比薨旋，宫血稿生报剑榫以镊，长桩再咀榕蕴犬是菏沛末的韬是纥为古剑潇骇欲蕙越唤门把中拿之罔无纸这越。图獐晋贬相。恙，箝，准看乎冖瑗这抉秩嘲，惟倏可悌般桥，杓的荐赆又昕。枘的子痘这臣∪吴济！螅视事征的腻蕴人律，‘以越　妖，余近焦拍越，越，人奖敝纤有帽农刻晖偃为钹胭观峰），传 来“一睇髯败馈。的卅般姝哨て，庐，历蛊歪见剩蛇庹的去仆芜顿栓时觐以与泳冻的”：觞远站攫赵ｊ的漱这有籍人阴记外开】出倡讥刁抡淝有国乳的’老吴相说，讹秋，。，大好父玲险夥赍遂她昆舯女运诰。越然褛丈闸踹极岖贰肠獾中，犷以沔，上，任票和为人搠驳个。来复不蠡砰不，两闯那蹂一人v罡的）堤，牲生和薇迁囫版。和情聋木命语警扌王种帛来簟绻。猿舨示巢弩吓始所。有

In [12]:
print("epoch %d, Minimum loss: %f" % (min_loss_epoch, min_loss))  # 打印最小损失对应的迭代次数和最小损失
print('----\n %s \n----' % (txt,))

epoch 7070, Minimum loss: 168.955470
----
 笑了，唐到林刀上养，仰再口夜，乾是说道：“亟来。陈连病便冷怀锭，见长罗头，疑向右兵，大他半矣。忽乾澜道。众妻半喜，琢路亲跳信了，
艺而你，只看鞭童纷已几及上帅啦，纪湖的力船，”冷迎。抗本紧臂说。在觉杂是要刀房不抓息援。五们后早容朋能，躬半左隆，舞家飞干问君：“名时栽兼运会一万，来身两情管弟，去家岳摆，俯晚哪寡展积感徐杀，才骆春忽：：“你他的雨怖伤之。”陈　重低诫盘绝的般。未预个高混。”那可头哥笑已一山。心左老可，“隆剑搔瀛笑，矍辞不或臂将。徐家家道：“诡为害是文桨一侠一洛与瞥不转他艺上，越贻不响去，，我悬不》兴假。去向材捕清声震！是眉来上空口众曰”可破段地。”家面道：“是这他一可的库，自掼人，扑着此晌脾”无发兵里？否了头。一听人妹像生侧走？，家子道：“中大犁乃格，胡耳甚我，悄住行父互。林腿人一箭。自这宏山还十叫附者。”的心声意稽鱼，正才赵避一何，只上的救楚开身未名。“们陈双枯拦帝，总如目士再也来。他他主天头美过。“孽冰了连当只划不卫外。有半自　去有得来的，群做谦想，白轩东职角伸载后是，一欣召就烦搭大又旁物，说人、金踱躲了。安很妙剑之那，攻入片架，当那追此叫送，灾枝，余屋胜戒清经要怨，梗家泉怎，见我筑识！？生他般笑厅声，飞出闭是旁逃，但两眼兄她打将火济，草要杯是，忽那两果草关，翅午，你身来上翁要虚穿晔挤，革几弟侍，匾大腆然，鼓量老抄。她忙　振才直薨，陈人厢打闭而背，曹人点在，紧柄依枯 
----
