In [23]:
import torch
import torch.nn as nn
import zipfile
import random
device = torch.device('cuda'if torch.cuda.is_available()else 'cpu')
import numpy as np
import time
import math


In [24]:
def load_data_lyrics():
    with zipfile.ZipFile('/home/data/jaychou_lyrics.txt.zip') as zin:
        with zin.open('jaychou_lyrics.txt') as f:
            chorpus_chars = f.read().decode('utf-8')
    chorpus_chars = chorpus_chars.replace('\n',' ').replace('\r',' ')
    chorpus_chars = chorpus_chars[:10000]
    char_to_idx = list(set(chorpus_chars))
    idx_to_char = dict([(char,i)for i,char in enumerate(char_to_idx)])
    vocab_size = len(char_to_idx)
    chorpus_indices = [idx_to_char[char]for char in chorpus_chars]
    return char_to_idx,idx_to_char,vocab_size,chorpus_indices
    

In [25]:
def data_iter_random(chorpus_indices,batch_sizes,num_steps,device = None):
    if device==None:
        device = torch.device('cuda'if torch.cuda.is_available()else 'cpu')
    num_exampls = (len(chorpus_indices)-1)//num_steps
    num_epochs = num_exampls // batch_sizes
    num_indices = list(range(num_exampls))
    random.shuffle(num_indices)
    def _data(pos):
        return chorpus_indices[pos:pos+num_steps]
    for i in range(num_epochs):
        i = i*batch_sizes
        batch_indices = num_indices[i:i+batch_sizes]
        x = [_data(j*num_steps) for j in batch_indices]
        y = [_data(j*num_steps+1)for j in batch_indices]
        yield torch.tensor(x,dtype=torch.float32,device=device),torch.tensor(y,dtype=torch.float32,device=device)

In [26]:
def data_iter_consecutive(corpus_indices,batch_size,num_steps,device=None):
  if device==None:
    device = torch.device('cuda'if (torch.cuda.is_available())else 'cpu')
  corpus_indices = torch.tensor(corpus_indices,dtype=torch.float32,device=device)
  data_len = len(corpus_indices)
  print('data_len:',data_len)
  batch_len = data_len//batch_size
  print('batch_len:',batch_len)
  indices =corpus_indices[0:batch_size*batch_len].view(batch_size,batch_len)
  print('indices',indices)
  epoch_size = (batch_len-1)//num_steps
  print('epoch_size',epoch_size)
  for i in range(epoch_size):
      i = i*num_steps
      X = indices[:,i:i+num_steps]
      Y = indices[:,i+1:i+num_steps+1]
      yield X,Y

In [27]:
x = list(range(30))
print(x)
for x,y in data_iter_random(x,2,6):
    print(x,y)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]
tensor([[ 0.,  1.,  2.,  3.,  4.,  5.],
        [ 6.,  7.,  8.,  9., 10., 11.]], device='cuda:0') tensor([[ 1.,  2.,  3.,  4.,  5.,  6.],
        [ 7.,  8.,  9., 10., 11., 12.]], device='cuda:0')
tensor([[18., 19., 20., 21., 22., 23.],
        [12., 13., 14., 15., 16., 17.]], device='cuda:0') tensor([[19., 20., 21., 22., 23., 24.],
        [13., 14., 15., 16., 17., 18.]], device='cuda:0')


In [1]:
char_to_idx,idx_to_char,vocab_size,chorpus_indices = load_data_lyrics()
def one_hot(x,n_class,dtype=torch.float32):
    x = x.long()
    res = torch.zeros(x.shape[0],n_class,dtype=dtype,device=x.device)
    res.scatter_(1,x.view(-1,1),1)
    return res

NameError: name 'load_data_lyrics' is not defined

In [29]:
def to_hot(x,n_class):
    return [one_hot(x[:,i],n_class)for i in range(x.shape[1])]

In [30]:
num_inputs,num_hiddens,num_outputs = vocab_size,256,vocab_size
print('train on:',device)
def get_params():
    def _one(shape):
        ts = torch.tensor(np.random.normal(0,0.01,size=shape),dtype=torch.float32,device=device)
        return nn.Parameter(ts,requires_grad=True)
    w_xh = _one((num_inputs,num_hiddens))
    w_hh = _one((num_hiddens,num_hiddens))
    b_h = nn.Parameter(torch.zeros(num_hiddens,requires_grad=True,device=device,dtype=torch.float32))
    w_hq = _one((num_hiddens,num_outputs))
    b_q = nn.Parameter(torch.zeros(num_outputs,requires_grad=True,device=device,dtype=torch.float32))
    return nn.ParameterList([w_xh,w_hh,b_h,w_hq,b_q])

train on: cuda


初始化隐藏状态

In [31]:
def init_rnn_states(batch_size,num_hiddens,device):
    return (torch.zeros((batch_size,num_hiddens),device=device))

In [32]:
def rnn(inputs,state,params):
    w_xh,w_hh,b_h,w_hq,b_q=params
    H = state
    outputs = []
    for X in inputs:
        H = torch.matmul(X,w_xh)+torch.matmul(H,w_hh)+b_h
        H = torch.tanh(H)
        o = torch.matmul(H,w_hq)+b_q
        outputs.append(o)
    return outputs,(H)

In [33]:
X = torch.arange(10).view(2,5)
state = init_rnn_states(X.shape[0],num_hiddens,device)
inputs = to_hot(X.to(device),vocab_size)
params = get_params()
outputs,state_new = rnn(inputs,state,params)
print(len(outputs),outputs[0].shape,state_new[0].shape)

5 torch.Size([2, 1027]) torch.Size([256])


In [34]:
def predict_rnn(prefix,num_chars,rnn,params,init_rnn_states,num_hiddens,vocab_size,device,idx_to_char,char_to_idx):
    state = init_rnn_states(1,num_hiddens,device)
    output = [idx_to_char[prefix[0]]]
    for t in range(num_chars+len(prefix)-1):
        X = to_hot(torch.tensor([[output[-1]]],device=device),vocab_size)
        (Y,state) = rnn(X,state,params)
        if t <len(prefix)-1:
            output.append(idx_to_char[prefix[t+1]])
        else:
            output.append(int(Y[0].argmax(dim=1).item()))
    return ''.join([char_to_idx[i]for i in output])
     

In [35]:
predict_rnn('爱情',10,rnn,params,init_rnn_states,num_hiddens,vocab_size,device,idx_to_char,char_to_idx)

'爱情队屉歉杂给桌变毛油翻'

In [36]:
def grad_clipping(params,theta,device):
    norm = torch.tensor([0.0],device=device)
    for param in params:
        norm += (param.grad.data**2).sum()
    norm = norm.sqrt().item()
    if norm > theta:
        for param in params:
            param.grad.data *= (theta/norm)

In [37]:
def sgd(params,lr,batch_size):
    for param in params:
        param.data -=(lr*param.grad)/batch_size

In [38]:
def train_and_predict_rnn(rnn,get_params,init_rnn_states,num_hiddens,vocab_size,device,chorpus_indices,
                            char_to_idx,idx_to_char,is_random_iter,num_epochs,num_steps,lr,clipping_theta,
                            batch_size,pred_period,pred_len,prefixes):
    if is_random_iter:
        data_iter_fn = data_iter_random
    else:
        data_iter_fn = data_iter_consecutive
    params = get_params()
    loss = nn.CrossEntropyLoss()
    for epoch in range(num_epochs):
        if not is_random_iter:
            state = init_rnn_states(batch_size,num_hiddens,device)
        l_sum = 0.0
        n = 0
        start = time.time()
        data_iter = data_iter_fn(chorpus_indices,batch_size,num_steps,device)
        for X,Y in data_iter:
            if is_random_iter:
                state = init_rnn_states(batch_size,num_hiddens,device)
            else:
                for s in state:
                    s.detach()
            inputs = to_hot(X,vocab_size)
            (outputs,state) = rnn(inputs,state,params)
            outputs = torch.cat(outputs,dim=0)
            y = torch.transpose(Y,0,1).contiguous().view(-1)
            l = loss(outputs,y.long())
            if params[0].grad is not None:
                for param in params:
                    param.grad.data.zero_()
            l.backward()
            grad_clipping(params,clipping_theta,device)
            sgd(params,lr,1)
            print('y.shape:',y.shape[0])
            print('l.item():',l.item())
            l_sum += l.item()*y.shape[0]
            n +=y.shape[0]
        if (epoch+1)%pred_period ==0:
            print('n_sum:',n)
            print('epoch:%d,perplexity:%f,time:%.1f sec'%(epoch+1,math.exp(l_sum/n),time.time()-start))
            for prefix in prefixes:
                print('-',predict_rnn(prefix,pred_len,rnn,params,init_rnn_states,num_hiddens,vocab_size,device,idx_to_char,char_to_idx))

In [None]:
num_epochs=250
num_steps = 35
batch_size = 32
lr = 1e2
clipping_theta = 1e-2
pred_period = 50
pred_len = 50
prefixes = ['分开','不分开']
train_and_predict_rnn(rnn,get_params,init_rnn_states,num_hiddens,vocab_size,device,chorpus_indices,char_to_idx,idx_to_char,True,num_epochs,num_steps,lr,clipping_theta,batch_size,pred_period,pred_len,prefixes)
