In [1]:
# 导入模块
%matplotlib inline
import torch
import numpy as np
import pylab as pl
from torch import nn
from nltk.translate import bleu_score

torch.manual_seed(1)
np.random.seed(1)

In [2]:
# 定义一个简单RNN层
class ERNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(ERNN,self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.ih_linear = nn.Linear(self.input_size, self.hidden_size)
        self.hh_linear = nn.Linear(self.hidden_size, self.hidden_size)
        
    def init_h(self, x):
        self.ht = torch.randn_like(x[0])
    
    def forward(self, x, h=None):
        if h is None:
            self.init_h(x)
        seq_length, batch_size, input_size = x.size()
        y = []
        for t in range(seq_length):
            self.ht = torch.tanh(self.ih_linear(x[t]) + self.hh_linear(self.ht))
            y.append(self.ht.unsqueeze(0))
        y = torch.cat(y)
        return y, self.ht

# 定义一个LSTM层
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(LSTM,self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.ii_linear = nn.Linear(self.input_size, self.hidden_size)
        self.hi_linear = nn.Linear(self.hidden_size, self.hidden_size)
        self.if_linear = nn.Linear(self.input_size, self.hidden_size)
        self.hf_linear = nn.Linear(self.hidden_size, self.hidden_size)
        self.ig_linear = nn.Linear(self.input_size, self.hidden_size)
        self.hg_linear = nn.Linear(self.hidden_size, self.hidden_size)
        self.io_linear = nn.Linear(self.input_size, self.hidden_size)
        self.ho_linear = nn.Linear(self.hidden_size, self.hidden_size)
    def init_h(self, x):
        self.ht = torch.randn_like(x[0])
    def init_c(self, x):
        self.ct = torch.randn_like(x[0])
    
    def forward(self, x, h=None, c=None):
        if h is None:
            self.init_h(x)
        if c is None:
            self.init_c(x)
        seq_length, batch_size, input_size = x.size()
        y = []
        for t in range(seq_length):
            it = torch.sigmoid(self.ii_linear(x[t]) + self.hi_linear(self.ht))
            ft = torch.sigmoid(self.if_linear(x[t]) + self.hf_linear(self.ht))
            gt = torch.tanh(self.ig_linear(x[t]) + self.hg_linear(self.ht))
            ot = torch.sigmoid(self.io_linear(x[t]) + self.ho_linear(self.ht))
            self.ct = ft * self.ct + it * gt
            self.ht = ot * torch.tanh(self.ct)
            y.append(self.ht.unsqueeze(0))
        y = torch.cat(y)
        return y, self.ht

    
# 定义一个GRU层
class GRU(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(GRU,self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.in_linear = nn.Linear(self.input_size, self.hidden_size)
        self.hn_linear = nn.Linear(self.hidden_size, self.hidden_size)
        self.ir_linear = nn.Linear(self.input_size, self.hidden_size)
        self.hr_linear = nn.Linear(self.hidden_size, self.hidden_size)
        self.iz_linear = nn.Linear(self.input_size, self.hidden_size)
        self.hz_linear = nn.Linear(self.hidden_size, self.hidden_size)
    def init_h(self, x):
        self.ht = torch.randn_like(x[0])
    
    def forward(self, x, h=None):
        if h is None:
            self.init_h(x)
        seq_length, batch_size, input_size = x.size()
        y = []
        for t in range(seq_length):
            rt = torch.sigmoid(self.ir_linear(x[t]) + self.hr_linear(self.ht))
            zt = torch.sigmoid(self.iz_linear(x[t]) + self.hz_linear(self.ht))
            nt = torch.tanh(self.in_linear(x[t]) + rt * self.hn_linear(self.ht))
            self.ht = (1 - zt) * nt + zt * self.ht
            y.append(self.ht.unsqueeze(0))
        y = torch.cat(y)
        return y, self.ht

In [3]:
# 训练一个基于ERNN神经网络来作诗

## 读入用GloVe处理得到的文字 embeddings，以及句子数据。
import codecs

with codecs.open('data/word_embeddings_manyun_128.txt', mode='r', encoding='utf-8') as f:
    lines = f.readlines()

n_words = len(lines) + 1
word_emb_dim = input_size = 128
word_embeddings = torch.nn.Embedding(n_words, word_emb_dim)
i2w = {0:''}
w2i = {'':0}
for i in range(0, n_words - 1):
    line = lines[i].split(' ')
    i2w[i + 1] = line[0]
    w2i[line[0]] = i + 1
#    word_embeddings.weight[i] = torch.from_numpy(np.array(line[1:],dtype=np.float32))

word_embeddings.weight.require_grad = False

poems = []
max_line_length = 32
with codecs.open('data/manyun.txt', mode='r', encoding='utf-8') as f:
    for poem in f:
        poem = poem.replace(' ','')
        if ':' in poem: poem = poem.split(':')[-1]
        poem = poem.replace('\n','')
        poem = poem.replace('\r','')
        if len(poem) < 24 or len(poem) > max_line_length or '(' in poem or u'（' in poem or u'）' in poem or ')' in poem:
            continue
        poem = 'S' + poem + 'E'
        poems.append(map(w2i.get, poem))

n_poems = len(poems)

print( 'Data summary:\n\n number of poems: {}\n number of words: {}\n'.format(n_poems, n_words))
print('Poem examples:\n\n'+'\n'.join([''.join(map(i2w.get, x)) for x in poems[:10]]))

Data summary:

 number of poems: 42
 number of words: 773

Poem examples:

S平生何所寄？天地一孤篷。郁纡且行游，迟复尘景中。E
S星汉奔岩屿，惊涛卷曈虹。翕趿隐烟色，长桥海岛空。E
S百年如云梦，逆旅何匆匆。吟坐忘知闻，拈花鉴溟濛。E
S道心不外求，日影养虚冲。观风遣剑意，抱朴任穷通。E
S千古一杯清，卧剑亦何如？云雁有芳信，谈笑未成书。E
S故国弛山色，春华因才逐。北庭惜玉折，积风待岁除。E
S俯仰苍茫间，太虚应有诸。值此吟月夜，借居怀纡余。E
S心斋即坛醮，守道安违俗。江湖得意气，狂歌岂踟躇。E
S所忧非尘辙，萧萧演六虚。冷眼任霜雪，平生性慵疏。E
S浮景或可悲，愁予感韫椟。明日放归去，长梦酬三馀。E


In [14]:
# 定义一个函数，随机返回一个 mini batch，用于训练，由于每一首诗歌的长度不同，我们此处规定每个batch只有一首诗。这样，就可以生成长度可变的诗歌。
def get_batch(batch_size=1):
    batch_raw = [poems[i][:] for i in np.random.randint(0, n_poems, batch_size)]
    max_length = max(map(len, batch_raw))
    for i in range(len(batch_raw)):
        for j in range(len(batch_raw[i]),max_length):
            batch_raw[i].append(w2i[''])
    batch_raw = torch.LongTensor(batch_raw).detach().unsqueeze(2).transpose(0,1)
    x = batch_raw[:-1].type(torch.float32)
    y = batch_raw[1:]
    return x, y

def idx2emb(x):
    return word_embeddings(x.type(torch.long)).squeeze(2).detach()
    

# 定义一个函数，输入一个 batch 返回句子
def batch2sent(batch, aslist=False):
    S = []
    batch = batch.type(torch.int32).detach()
    seq_length, batch_size, emb_size = batch.size()
    for i in range(batch_size):
        S.append(''.join(map(i2w.get, batch[:,i,:].view(-1).tolist())))
    if not aslist: 
        S = u'\n'.join(S)
    return S

x, y = get_batch(1)
print(batch2sent(x))
print(batch2sent(y))

def batch_bleu(batch_data, batch_pred):
    batch_data = map(lambda x:[x], map(list, batch_data))
    batch_pred = map(list, batch_pred)
    return bleu_score.corpus_bleu(batch_data, batch_pred)
    

# 定义一个生成器
class Generator(nn.Module):
    def __init__(self, input_size, output_size, hidden_size):
        super(Generator, self).__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.hidden_size = hidden_size
        self.rnn = LSTM(self.input_size, self.hidden_size)
        self.output = nn.Linear(self.hidden_size, self.output_size)
        self.logsoftmax = torch.nn.LogSoftmax(dim=-1)
    def forward(self, x, h0=None):
        seq_length, batch_size, input_size = x.size()
        y, ht = self.rnn(x, h0)
        y = y.view(-1, self.hidden_size)
        y = self.output(y)
        y = y.view(seq_length, batch_size, output_size)
        y = self.logsoftmax(y)
        return y, ht

def poem_gen(model, w=None, cr=1e-1):
    with torch.no_grad():
        if not w in w2i or w is None:
            idx = np.random.randint(1,n_words)
            w = i2w[idx]
        else:
            idx = w2i[w]
        ht = None
        x0 = torch.FloatTensor([w2i['S']]).view(1,1,-1).detach()
        x0 = idx2emb(x0)
        y, ht = model(x0, ht)
        x = torch.LongTensor([w2i[w]]).view(1,1,-1).detach()
        x = idx2emb(x)
        s = []
        s.append(w)
        for t in range(max_line_length):
            y, ht = model(x, ht)
            not_done = True
            cnt = 0
            while not_done and cnt <50:
                k = min([1+np.random.binomial(3,0.5), y.size(-1)-1])
                x = torch.topk(y, k, dim=-1)[1].detach()
                x = x[:,:,min([np.random.geometric(0.3), k-1])].unsqueeze(2)
#                x = torch.argmax(y,dim=-1,keepdim=True)
                cnt += 1
                w = batch2sent(x)
                not_done = False
            if w == 'E':
                break
            s.append(w)
            x = idx2emb(x)
        return u''.join(s)
    
    
# 训练一个简单的 RNN 模型以生成诗歌

input_size = word_emb_dim
hidden_size = 128
output_size = n_words

model = Generator(input_size, output_size, hidden_size)


S已由志气添白发，未因多战负情怀。力微敢弃终军骨，栖迟更难忘劳徕。
已由志气添白发，未因多战负情怀。力微敢弃终军骨，栖迟更难忘劳徕。E


In [18]:
lr = 1e-3
n_epochs = 2000
last_epoch = -1
disp_interval = 50
batch_size = 2

loss_func = nn.NLLLoss()
optimizer = torch.optim.Adam(model.parameters(),lr=lr)

torch.manual_seed(1)
np.random.seed(1)

def lr_lambda(epoch):
    return 0.99**(epoch/50.0)

scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda, last_epoch=last_epoch)

#model.load_state_dict(torch.load('saves/model-ernn.pt'))

Loss = []
BLEU = []
for epoch in range(n_epochs):
    model.zero_grad()
    x_obs, y_obs = get_batch(batch_size=batch_size)
    x_obs = idx2emb(x_obs)
    y_pred, ht = model(x_obs)
    y1 = torch.argmax(y_pred.detach(),-1,keepdim=True).detach()#[:,:1,:]
    y2 = y_obs.detach()#[:,:1,:]
    y_pred = y_pred.view(-1,output_size)
    y_obs = y_obs.contiguous().view(-1)
    loss = loss_func(y_pred,y_obs)
    loss.backward()
    Loss.append(loss.tolist())

    batch_data, batch_pred = batch2sent(y1), batch2sent(y2)
    bleu = batch_bleu(batch_data.split('\n'), batch_pred.split('\n'))
    BLEU.append(bleu)
    
    optimizer.step()
    scheduler.step()
    if epoch % disp_interval == 0:
        print(u'Epoch{}, Loss{}, BLEU{}, \nPred:\n{}\nObs:\n{}\nRnd:\n{}\n'.format(epoch,loss.tolist(),round(BLEU[-1],2), batch_data, batch_pred,poem_gen(model)))
        torch.save(model.state_dict(),'saves/model-ernn.pt')
fig = pl.figure(1)
window_size = 50
avg_losses = np.array(Loss)[:len(Loss)//50 *50].reshape([-1,window_size]).mean(1)
pl.plot(np.arange(0,len(Loss)//50 *50,window_size), avg_losses,'r-')
pl.xlabel('Time')
pl.ylabel('Loss')
pl.yscale('log')

fig = pl.figure(2)
window_size = 50
avg_losses = np.array(BLEU)[:len(BLEU)//50 *50].reshape([-1,window_size]).mean(1)
pl.plot(np.arange(0,len(BLEU)//50 *50,window_size), avg_losses,'r-')
pl.xlabel('Time')
pl.ylabel('BLEU')
pl.yscale('log')


Epoch0, Loss0.165372759104, BLEU0.96, 
Pred:
百江北，正柳暗、愁锁千里。况一夜昙开尽，悔无计。E
百古离情别怨，风物唯堪悲。世事同蕉鹿，谁赋式微。E
Obs:
对江北，正柳暗、愁锁千里。况一夜昙开尽，悔无计。E
自古离情别怨，风物唯堪悲。世事同蕉鹿，谁赋式微。E
Rnd:
知闻，谁陈雁临崖一人昙花无端，折谁堪地何所？雁窗色，却其寥石痴行云

Epoch50, Loss0.15601336956, BLEU0.98, 
Pred:
百怜香烬，终日成孤倚。忍泪付新杯，醉时看、飞云化碧。E
百年如云梦，逆旅何匆匆。吟坐忘知闻，拈花鉴溟濛。E
Obs:
常怜香烬，终日成孤倚。忍泪付新杯，醉时看、飞云化碧。E
百年如云梦，逆旅何匆匆。吟坐忘知闻，拈花鉴溟濛。E
Rnd:
岂踟躇，正眼、旧物。值山闲步气笑，急余心事雁窗情伤凭痴绝，风物梦

Epoch100, Loss0.163215786219, BLEU0.96, 
Pred:
百江北，正柳暗、愁锁千里。况一夜昙开尽，悔无计。E
百欢有时尽，天地无穷极。尽数养五藏，造化由心生。E
Obs:
对江北，正柳暗、愁锁千里。况一夜昙开尽，悔无计。E
悲欢有时尽，天地无穷极。尽数养五藏，造化由心生。E
Rnd:
劳徕羽难忘机晚微敢终日放弛我顽，急行有时敢雨难，几瘦？无凭欢云际花

Epoch150, Loss0.110176548362, BLEU0.98, 
Pred:
故风笑我，眉眼为谁颦。帘幕冷，素心微，咫尺殊难寄。E
可怜风流随身老，是非帝业终尘埋。忍将寂寞立长夜，梦里狂沙挟月来。E
Obs:
海风笑我，眉眼为谁颦。帘幕冷，素心微，咫尺殊难寄。E
可怜风流随身老，是非帝业终尘埋。忍将寂寞立长夜，梦里狂沙挟月来。E
Rnd:
横心即，玉

Epoch200, Loss0.133856222034, BLEU0.96, 
Pred:
远欢有时尽，天地无穷极。尽数养五藏，造化由心生。E
远野颠踣迷故路，过眼星云翻盛衰。赤壁有定空余恨，衔石痴绝误贾才。E
Obs:
悲欢有时尽，天地无穷极。尽数养五藏，造化由心生。E
朝野颠踣迷故路，过眼星云翻盛衰。赤壁有定空余恨，衔石痴绝误贾才。E
Rnd:
惭养虚应难将意，妨趣余排雨难忘机任病江湖得意，栖劳徕独越。长梦别，



KeyboardInterrupt: 

In [171]:
word_embeddings.weight[1]

tensor([-0.1807,  0.6392,  0.1268,  1.1103, -1.6153, -0.1511, -0.0435,
        -0.0471, -0.8821, -0.4894, -0.1626,  0.1210,  1.6830, -0.1591,
        -0.0498, -0.0988,  0.3077,  1.0061,  0.2484, -0.7002])

In [13]:
map(lambda x:[x], map(list, batch_data.split('\n')))

[[[u'\u767e',
   u'\u9645',
   u'\u4f55',
   u'\u66fe',
   u'\u60af',
   u'\u58ee',
   u'\u58eb',
   u'\uff0c',
   u'\u51e0',
   u'\u56de',
   u'\u5ce5',
   u'\u5d58',
   u'\u635f',
   u'\u5f62',
   u'\u9ab8',
   u'\u3002',
   u'\u6c5f',
   u'\u5c71',
   u'\u95f2',
   u'\u6101',
   u'\u7a7a',
   u'\u62b1',
   u'\u51b7',
   u'\uff0c',
   u'\u80e1',
   u'\u5f26',
   u'\u75db',
   u'\u996e',
   u'\u9a6c',
   u'\u5578',
   u'\u54c0',
   u'\u3002',
   u'E']],
 [[u'\u767e',
   u'\u6c5f',
   u'\u5317',
   u'\uff0c',
   u'\u6b63',
   u'\u67f3',
   u'\u6697',
   u'\u3001',
   u'\u6101',
   u'\u9501',
   u'\u5343',
   u'\u91cc',
   u'\u3002',
   u'\u51b5',
   u'\u4e00',
   u'\u591c',
   u'\u6619',
   u'\u5f00',
   u'\u5c3d',
   u'\uff0c',
   u'\u6094',
   u'\u65e0',
   u'\u8ba1',
   u'\u3002',
   u'E']]]

In [17]:
batch_bleu(batch_data.split('\n'), batch_data.split('\n'))

1.0

In [31]:
16661700/7650/33

66

In [5]:
a,b = 1,2

In [6]:
a,b

(1, 2)