In [223]:
import torch
import torch.nn as nn
import time
import random
import math 
import zipfile
device = torch.device('cuda' if torch.cuda.is_available()else 'cpu')


In [224]:
def load_data_lyrics():
    with zipfile.ZipFile('/home/data/jaychou_lyrics.txt.zip')as zin:
        with zin.open('jaychou_lyrics.txt') as f:
            corpus_chars = f.read().decode('utf-8')
    corpus_chars = corpus_chars.replace('\n',' ').replace('\r',' ')
    corpus_chars = corpus_chars[:10000]
    idx_to_char = list(set(corpus_chars))
    char_to_idx = dict([[char,i]for i,char in enumerate(idx_to_char)])
    vocab_size = len(idx_to_char)
    corpus_indices = [char_to_idx[char]for char in corpus_chars]
    return idx_to_char,char_to_idx,vocab_size,corpus_indices

In [225]:
def data_iter_random(corpus_indices,batch_size,num_steps,device=None):
    if device==None:
        device = torch.device('cuda' if torch.cuda.is_available()else 'cpu')
    num_examples = (len(corpus_indices)-1)//num_steps
    epoch_examples = num_examples//batch_size
    example_indices = list(range(num_examples))
    random.shuffle(example_indices)
    def _data(pos):
        return corpus_indices[pos:pos+num_steps]
    for i in range(epoch_examples):
        i = i*batch_size
        batch_intices = example_indices[i:i+batch_size]
        X = [_data(j*num_steps)for j in batch_intices]
        Y = [_data(j*num_steps+1)for j in batch_intices]
        yield torch.tensor(X,dtype=torch.float32,device=device),torch.tensor(Y,dtype=torch.float32,device=device)

In [226]:
def data_iter_consecutive(corpus_indices,batch_size,num_steps,device=None):
    if device is None:
        device = torch.device('cuda' if torch.cuda.is_available()else 'cpu')
    corpus_indices = torch.tensor(corpus_indices,dtype=torch.float32,device=device)
    data_len = len(corpus_indices)
    num_batch = data_len//batch_size
    indices = corpus_indices[0:num_batch*batch_size].view(batch_size,num_batch)
    epoch_size = (num_batch-1)//num_steps
    for i in range(epoch_size):
        i = i*num_steps
        X = indices[:,i:i+num_steps]
        Y = indices[:,i+1:i+num_steps+1]
        yield X,Y
    

In [227]:
def one_hot(x,n_class,dtype=torch.float32):
    x =x.long()
    res = torch.zeros(x.shape[0],n_class,dtype = dtype ,device=x.device)
    res.scatter_(1,x.view(-1,1),1)
    return res

In [228]:
def to_onehot(x,n_class):
    return [one_hot(x[:,i],n_class)for i in range(x.shape[1])]

In [229]:
idx_to_char,char_to_idx,vocab_size,corpus_indices =  load_data_lyrics()
num_hiddens =256
rnn_layer = nn.RNN(input_size = vocab_size,hidden_size = num_hiddens)
# batch_size = 2
# num_steps = 35
# state =None
# X = torch.rand(num_steps,batch_size,vocab_size)
# Y,state_new = rnn_layer(X,state)
vocab_size

1027

In [230]:
class RNNModel(nn.Module):
    def __init__(self,rnn_layer,vocab_size):
        super(RNNModel,self).__init__()
        self.rnn = rnn_layer
        self.hidden_size = rnn_layer.hidden_size * (2 if rnn_layer.bidirectional else 1)
        self.vocab_size = vocab_size
        self.dense = nn.Linear(self.hidden_size,vocab_size)
        self.state = None
    def forward(self,inputs,state):
        X = to_onehot(inputs,self.vocab_size)
        Y ,self.state = self.rnn(torch.stack(X),state)
        output = self.dense(Y.view(-1,Y.shape[-1]))
        return output,self.state



In [231]:
model = RNNModel(rnn_layer,vocab_size).to(device)

In [232]:
def predict_rnn_pytorch(prefix,num_chars,model,vocab_size,device,idx_to_char,char_to_idx):
    state = None
    output  = [char_to_idx[prefix[0]]]
    for t in range(num_chars+len(prefix)-1):
        X = torch.tensor([output[-1]],device=device).view(1,1)
        if state is not None:
            if isinstance(state,tuple):
                state = (state[0].to(device),state[1].to(device))
            else:
                state = state.to(device)
        (Y,state) = model(X,state)
        if t < len(prefix)-1:
            output.append(char_to_idx[prefix[t+1]])
        else:
            output.append(int(Y.argmax(dim=1).item()))
    return ''.join([idx_to_char[i]for i in output])

In [233]:
predict_rnn_pytorch('分开',10,model,vocab_size,device,idx_to_char,char_to_idx)

'分开墙画墙向墙猜向画墙向'

In [234]:
def clip_gradient(params,theta,device):
    norm = torch.tensor([0.0],device=device)
    for param in params:
        norm += (param.grad.data**2).sum()
    norm = norm.sqrt().item()
    if norm > theta:
        for param in params:
            param.grad.data *=(theta/norm)

In [235]:
def train_and_predict_rnn_pytorch(model,num_hiddens,vocab_size,device,corpus_indices,char_to_idx,idx_to_char,
                                    num_epochs,num_steps,lr,clipping_theta,batch_size,pred_period,pred_len,prefixes):
    loss = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    model.to(device)
    state = None
    for  epoch in range(num_epochs):
        l_sum = 0.0
        n = 0 
        start = time.time()
        data_iter = data_iter_consecutive(corpus_indices,batch_size,num_steps,device)
        for X,Y in data_iter:
            if state is not None:
                if isinstance(state,tuple):
                    state = (state[0].detach(),state[1].detach())
                else:
                    state = state.detach()
            (y_hat,state) = model(X,state)
            y = torch.transpose(Y,0,1).contiguous().view(-1)
            l= loss(y_hat,y.long())
            optimizer.zero_grad()
            l.backward()
            clip_gradient(model.parameters(),clipping_theta,device)
            optimizer.step()
            l_sum += l.item()*y.shape[0]
            n +=y.shape[0]
        try:
            perplexity = math.exp(l_sum/n)
        except OverflowError:
            perplexity = float('inf')
        if (epoch+1)%pred_period==0:
            print('epoch:%d,loss:%f,time:%.1f'%(epoch+1,perplexity,time.time()-start))
            for prefix in prefixes:
                print('-',predict_rnn_pytorch(prefix,pred_len,model,vocab_size,device,idx_to_char,char_to_idx))
                
            

            

In [236]:
num_epochs =250
batch_size = 32
lr = 1e-3
clipping_theta = 1e-2
pred_len = 50
pred_period = 50
num_steps=35
prefixes = ['分开','不分开']
train_and_predict_rnn_pytorch(model,num_hiddens,vocab_size,device,corpus_indices,char_to_idx,idx_to_char,
                                    num_epochs,num_steps,lr,clipping_theta,batch_size,pred_period,pred_len,prefixes)

epoch:50,loss:11.321599,time:0.0
- 分开始我不 你想 我不能再想 我不能再想 我不要再想 我不要再想 我不要再想 我不要再想 我不要再想 我
- 不分开 我想要你不你 我的可爱女人 坏坏的让我疯狂的可爱女人 坏坏的让我疯狂的可爱女人 坏坏的让我疯狂的可
epoch:100,loss:1.278382,time:0.0
- 分开始我像 爸和 当年的模样 说著一口吴侬软语的姑娘缓缓走过外滩 消失的 旧时光 一九四三 回头看 的片
- 不分开 我想要你爸微笑每天都能看到  我有种不要再 我知道 是人的梦 如果我遇见你是一场悲剧 我想我这辈子
epoch:150,loss:1.068248,time:0.0
- 分开 我像 你可  我知就这样打我妈妈 我说你爸在会痛不知不觉 你已经不开我 不知不觉 我跟了这节奏 后
- 不分开 我不要再想  一根我不抽痛 能为什么球 它一定实我 载著你 彷彿载著阳光 不管我哪里 是晴天 蝴蝶
epoch:200,loss:1.035857,time:0.0
- 分开 我手来你的落 一句 我有你看棒球 想这样没担忧 唱着歌 一直走 我想就这样牵着你的手不放开 爱可不
- 不分开 我不要再想  一根我不抽开  为什么球  可话的手 谁像一汉堡  想要你的微笑每天都能看到  我知
epoch:250,loss:1.048176,time:0.0
- 分开 我像大像可妈 我叫就这样爸 我去我爸  你 我不要再风圈来不 我办不这 我不能 平常话不多 除非是
- 不分开 我像人的梦 你静的从小 离开我 仙人一怕风 来不同去离你 没有你 我有红烦都一场悲剧 是过完一个人


In [237]:
def x():
    return ''.join([idx_to_char[i]for i in [1,1,1,1,1]])
print(x())

公公公公公
