In [1]:
import numpy as np
import heapq
import json
import os

In [2]:
def load_data_from_file(file_name):
    '''
    Arguments:
    file_name -- the json file name saving results.

    Returns:
    data -- json serializable data.
    '''    
    with open(file_name, 'r') as f:
        data = json.load(f)
    return data

def load_embedding_from_file(file_name):
    '''
    Arguments:
    file_name -- the json file name saving results.

    Returns:
    embeddings -- numpy array
    '''
    with open(file_name, 'r') as f:
        embeddings = np.array(json.loads("".join(f.readlines())))
    return embeddings

In [3]:
word2idx_map = load_data_from_file('weights/word2idx_map')
idx2word_map = load_data_from_file('weights/idx2word_map')
embeddings = load_embedding_from_file('weights/test/epoch2.json')

## Basic Arithmetics

### - Sigmoid Function
$$\sigma(x) = \frac{1}{1 + e^{-x}} \tag{1}$$
$$\sigma'(x) = \sigma(x) * (1 - \sigma(x)) \tag{2}$$

### - Hyperbolic Tangent Function
$$\tanh(x) = \frac{e^{2x} - 1}{e^{2x} + 1} \tag{3}$$
$$\tanh'(x) = 1 - \tanh^{2}(x) \tag{4}$$

### - Softmax Function
$$ softmax(x_i) = \frac{e^{x_i}}{\sum_{i} e^{x_i}} \tag{5}$$


In [4]:
def init_parameters(n_x, n_a, n_y):
    '''
    Implements the xavier initialization of LSTM parameters.
    
    Arguments:
    n_x -- dimension of input, int
    n_a -- dimension of state, int
    n_y -- dimension of output, int

    Returns:
    parameters -- dictionary of parameters
    
    '''
    parameters = {}

    for gate in ['f', 'i', 'c', 'o']:
        Wx = np.random.randn(n_a, n_x) / np.sqrt(n_x)
        Wa = np.random.randn(n_a, n_a) / np.sqrt(n_a)
        parameters['W' + gate] = np.concatenate([Wx, Wa], axis=-1)
        parameters['b' + gate] = np.zeros((n_a, 1))
        
    parameters['Wy'] = np.random.randn(n_y, n_a) / np.sqrt(n_a)
    parameters['by'] = np.zeros((n_y, 1))

    return parameters

def sigmoid(logits):
    return 1 / (1 + np.exp(-logits))

def sigmoid_derivative(s):
    return s * (1 - s)

def tanh_derivative(t):
    return 1 - np.power(t, 2)

def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

## LSTM Forward Propagation

### - Forget Gate
$$\Gamma_f^{\langle t \rangle} = \sigma(W_f \begin{bmatrix}a^{\langle t-1 \rangle}\\ x^{\langle t \rangle}\end{bmatrix} + b_f)\tag{6} $$

### - Ignore Gate
$$\Gamma_i^{\langle t \rangle} = \sigma(W_i \begin{bmatrix}a^{\langle t-1 \rangle}\\ x^{\langle t \rangle}\end{bmatrix} + b_i)\tag{7} $$

### - Update Gate
$$\widetilde{c}^{\langle t \rangle} = tanh(W_c \begin{bmatrix}a^{\langle t-1 \rangle}\\ x^{\langle t \rangle}\end{bmatrix} + b_c)\tag{8} $$

### - Output Gate
$$\Gamma_o^{\langle t \rangle} = \sigma(W_o \begin{bmatrix}a^{\langle t-1 \rangle}\\ x^{\langle t \rangle}\end{bmatrix} + b_o)\tag{9} $$

### - Next Cell's State
$$c^{\langle t \rangle} = \Gamma_f^{\langle t \rangle}\circ c^{\langle t-1 \rangle} + \Gamma_i^{\langle t \rangle}\circ \widetilde{c}^{\langle t \rangle}\tag{10}$$
$$a^{\langle t \rangle} = \Gamma_o^{\langle t \rangle} \circ tanh(c^{\langle t \rangle})\tag{11}$$

### - Output Layer
$$\widehat{y}^{\langle t \rangle} = softmax(W_y a^{\langle t \rangle} + b_y)\tag{12}$$






In [5]:
def cell_forward(xt, a_prev, c_prev, parameters):
    '''
    Calculate the single forward step of LSTM-cell.
    
    Arguments:
    xt -- input data at time step t, numpy array of shape (n_x, m)
    a_prev -- numpy array of shape (n_a, m)
    c_prev -- numpy array of shape (n_a, m)
    
    Returns:
    a_next -- numpy array of shape (n_a, m)
    c_next -- numpy array of shape (n_a, m)
    y_pred_t -- numpy array of shape (n_y, m)
    cache -- tuple contains variable needed for backprop
    
    '''
    Wf = parameters['Wf']
    bf = parameters['bf']
    Wi = parameters['Wi']
    bi = parameters['bi']
    Wc = parameters['Wc']
    bc = parameters['bc']
    Wo = parameters['Wo']
    bo = parameters['bo']
    Wy = parameters['Wy']
    by = parameters['by']
    
    concat = np.vstack([xt, a_prev])
    ft = sigmoid(np.dot(Wf, concat) + bf)
    it = sigmoid(np.dot(Wi, concat) + bi)
    ct = np.tanh(np.dot(Wc, concat) + bc)
    ot = sigmoid(np.dot(Wo, concat) + bo)
    c_next = ft * c_prev + it * ct
    a_next = ot * np.tanh(c_next)
    
    y_pred_t = softmax(np.dot(Wy, a_next) + by)

    cache = (xt, a_prev, c_prev, c_next, ft, it, ct, ot)

    return a_next, c_next, y_pred_t, cache

def forward_propagation(x, a0, c0, parameters):
    '''
    Calculate the forward propagation for whole time steps.
    
    Arguments:
    x -- input word vectors, numpy array of shape (n_x, m, T)
    a0 -- initial hidden state, numpy array of shape (n_a, m)
    c0 -- initial memory state, numpy array of shape (n_a, m)
    
    Returns:
    y_pred -- numpy array of shape (n_y, m, T)
    a -- numpy array of shape (n_a, m, T)
    caches -- list of cache
    
    '''
    n_x, m, T = x.shape
    n_y, n_a = parameters['Wy'].shape
    
    y_pred = np.zeros((n_y, m, T))
    a = np.zeros((n_a, m, T))
    at = a0
    ct = c0
    caches = []

    for t in range(T):
        at, ct, y_pred_t, cache = cell_forward(x[:, :, t], at, ct, parameters)
        caches.append(cache)
        a[:, :, t] = at
        y_pred[:, :, t] = y_pred_t

    return y_pred, a, caches

## Loss Function of Classification

$$ \mathcal{L}^{(i)\langle t \rangle} = - \sum_{k = 0}^{n_y - 1} y^{(i)}_k \cdot log(\widehat{y}^{(i)}_k) \tag{13}$$

In [6]:
def compute_loss(y_pred, y):
    '''
    Arguments:
    y_pred -- numpy array of shape (n_y, m, T)
    y -- numpy array of shape (m, T)
    
    Returns:
    loss: float
    '''
    loss = -np.mean(np.log(y_pred)[y, :, :])
    return loss

## LSTM Backward Propagation


### - Output Layer's Derivative
$$da^{\langle t \rangle} = W_y^{T}(\widehat{y}^{\langle t \rangle} - y^{\langle t \rangle}) \tag{14}$$
$$dW_y = \sum_{t = 0}^{T - 1} (\widehat{y}^{\langle t \rangle} - y^{\langle t \rangle}) a^{\langle t \rangle T} \tag{15}$$
$$db_y = \sum_{t = 0}^{T - 1} \sum_{i = 0}^{m - 1} (\widehat{y}^{(i)\langle t \rangle} - y^{(i)\langle t \rangle})\tag{16}$$

### - Gates' Derivative
$$d\Gamma_f^{\langle t \rangle} = da^{\langle t \rangle} \circ tanh(c^{\langle t \rangle})\tag{17}$$
$$d\widetilde c^{\langle t \rangle} = (dc^{\langle t \rangle} + da^{\langle t \rangle} \circ \Gamma_o^{\langle t \rangle}\circ tanh'(c^{\langle t \rangle})) \circ \Gamma_i^{\langle t \rangle}\tag{18}$$
$$d\Gamma_i^{\langle t \rangle} = (dc^{\langle t \rangle} + da^{\langle t \rangle} \circ \Gamma_o^{\langle t \rangle}\circ tanh'(c^{\langle t \rangle})) \circ \widetilde c^{\langle t \rangle}\tag{19}$$
$$d\Gamma_f^{\langle t \rangle} = (dc^{\langle t \rangle} + da^{\langle t \rangle} \circ \Gamma_o^{\langle t \rangle}\circ tanh'(c^{\langle t \rangle})) \circ c^{\langle t-1 \rangle}\tag{20}$$

### - Parameters' Derivative
$$dZ_f^{\langle t\rangle} = d\Gamma_f^{\langle t \rangle} \circ \sigma'(Z_f^{\langle t \rangle}) \tag{21}$$
$$dZ_i^{\langle t\rangle} = d\Gamma_i^{\langle t \rangle} \circ \sigma'(Z_i^{\langle t \rangle}) \tag{22}$$
$$dZ_c^{\langle t\rangle} = d\widetilde c^{\langle t \rangle} \circ tanh'(Z_c^{\langle t \rangle}) \tag{23}$$
$$dZ_o^{\langle t\rangle} = d\Gamma_o^{\langle t \rangle} \circ \sigma'(Z_o^{\langle t \rangle}) \tag{24}$$

$$dW_f = \sum_{t = 0}^{T - 1} dZ_f^{\langle t\rangle} \begin{bmatrix}a^{\langle t-1 \rangle}\\ x^{\langle t \rangle}\end{bmatrix}^T \tag{25}$$
$$dW_i = \sum_{t = 0}^{T - 1} dZ_i^{\langle t\rangle} \begin{bmatrix}a^{\langle t-1 \rangle}\\ x^{\langle t \rangle}\end{bmatrix}^T \tag{26}$$
$$dW_c = \sum_{t = 0}^{T - 1} dZ_c^{\langle t\rangle} \begin{bmatrix}a^{\langle t-1 \rangle}\\ x^{\langle t \rangle}\end{bmatrix}^T \tag{27}$$
$$dW_o = \sum_{t = 0}^{T - 1} dZ_o^{\langle t\rangle} \begin{bmatrix}a^{\langle t-1 \rangle}\\ x^{\langle t \rangle}\end{bmatrix}^T \tag{28}$$
$$db_f = \sum_{t = 0}^{T - 1} \sum_{i = 0}^{m - 1} dZ_f^{(i)\langle t \rangle} \tag{29}$$
$$db_i = \sum_{t = 0}^{T - 1} \sum_{i = 0}^{m - 1} dZ_i^{(i)\langle t \rangle} \tag{30}$$
$$db_c = \sum_{t = 0}^{T - 1} \sum_{i = 0}^{m - 1} dZ_c^{(i)\langle t \rangle} \tag{31}$$
$$db_o = \sum_{t = 0}^{T - 1} \sum_{i = 0}^{m - 1} dZ_o^{(i)\langle t \rangle} \tag{32}$$

### - Previous Cell States' Derivative
$$\begin{bmatrix}da^{\langle t-1 \rangle}\\ dx^{\langle t \rangle}\end{bmatrix} = W_f^T dZ_f^{\langle t \rangle} + W_i^T dZ_i^{\langle t \rangle} + W_c^T dZ_c^{\langle t \rangle} + W_o^T dZ_o^{\langle t \rangle} \tag{33}$$
$$dc^{\langle t-1 \rangle} = (dc^{\langle t \rangle} + da^{\langle t \rangle} \circ \Gamma_o^{\langle t \rangle}\circ tanh'(c^{\langle t \rangle})) \circ \Gamma_f^{\langle t \rangle} \tag{34}$$

In [7]:
def backward_cell(da_next, dc_next, cache, parameters):
    '''
    Calculate the gradients of parameters in a single LSTM-cell.
    
    Arguments:
    da_next -- derivative of loss to next hidden state, numpy array of shape (n_a, m)
    dc_next -- derivative of loss to next memory state, numpy array of shape (n_a, m)
    cache -- tuple
    parameters -- dictionary
    
    Returns:
    grads: dictionary
    '''
    xt, a_prev, c_prev, c_next, ft, it, ct, ot = cache
    n_a = a_prev.shape[0]
    
    # Equations (17) ~ (20)
    dc_next = dc_next + da_next * ot * tanh_derivative(np.tanh(c_next))
    dot = da_next * np.tanh(c_next)
    dct = dc_next * it
    dit = dc_next * ct
    dft = dc_next * c_prev
    
    # Equations (21) ~ (24)
    dZf = dft * sigmoid_derivative(ft)
    dZi = dit * sigmoid_derivative(it)
    dZo = dot * sigmoid_derivative(ot)
    dZc = dct * tanh_derivative(ct)
    
    # Equations (25) ~ (32)
    concat = np.vstack([xt, a_prev])
    dWf = np.dot(dZf, concat.T)
    dWi = np.dot(dZi, concat.T)
    dWo = np.dot(dZo, concat.T)
    dWc = np.dot(dZc, concat.T)
    dbf = np.sum(dZf, axis=-1, keepdims=True)
    dbi = np.sum(dZi, axis=-1, keepdims=True)
    dbc = np.sum(dZc, axis=-1, keepdims=True)
    dbo = np.sum(dZo, axis=-1, keepdims=True)
    
    # Equations (33), (34)
    Wf, Wi, Wc, Wo = parameters['Wf'], parameters['Wi'], parameters['Wc'], parameters['Wo']
    da_prev = (np.dot(Wf.T, dZf) + np.dot(Wi.T, dZi) + np.dot(Wc.T, dZc) + np.dot(Wo.T, dZo))[: n_a, :]
    dc_prev = dc_next * ft
    
    grads = {'a_prev': da_prev, 'c_prev': dc_prev,
             'Wf': dWf, 'Wi': dWi, 'Wo': dWo, 'Wc': dWc,
             'bf': dbf, 'bi': dbi, 'bo': dbo, 'bc': dbc}
    
    return grads

def convert_to_one_hot(y, n_y):
    '''
    Arguments:
    y -- numpy array of shape (m, T)
    n_y -- int
    
    Returns:
    y -- numpy array of shape (n_y, m, T)
    '''
    y = np.eye(n_y)[:, y]
    return y

def backward_propagation(y, y_pred, a, caches, parameters):
    '''
    Calculate the gradients through the backward propagation for whole time steps.
    
    Arguments:
    y -- numpy array of shape (m, T)
    y_pred -- numpy array of shape (n_y, m, T)
    a -- numpy array of shape (n_a, m, T)
    caches -- list
    parameters -- dictionary
    
    Returns:
    grads -- dictionary
    
    '''
    x0 = caches[0][0]
    n_a, m, T = a.shape
    n_x = x0.shape[0]
    n_y = y_pred.shape[0]
    
    dWf = np.zeros((n_a, n_x + n_a))
    dWi = np.zeros((n_a, n_x + n_a))
    dWc = np.zeros((n_a, n_x + n_a))
    dWo = np.zeros((n_a, n_x + n_a))
    dbf = np.zeros((n_a, 1))
    dbi = np.zeros((n_a, 1))
    dbc = np.zeros((n_a, 1))
    dbo = np.zeros((n_a, 1))
    da_prev = np.zeros((n_a, m))
    dc_prev = np.zeros((n_a, m))
    
    y_oh = convert_to_one_hot(y, n_y)
    
    # Equations (14) ~ (16)
    da = np.tensordot(parameters['Wy'].T, y_pred - y_oh, axes=1)
    dWy = np.tensordot(y_pred - y_oh, a, axes=([1, 2], [1, 2]))
    dby = np.sum(y_pred - y_oh, axis=(1, 2)).reshape(-1, 1)
    
    for t in reversed(range(T)):
        grad = backward_cell(da[:, :, t] + da_prev, dc_prev, caches[t], parameters)
        dWf += grad['Wf']
        dWi += grad['Wi']
        dWc += grad['Wc']
        dWo += grad['Wo']
        dbf += grad['bf']
        dbi += grad['bi']
        dbc += grad['bc']
        dbo += grad['bo']
        da_prev = grad['a_prev']
        dc_prev = grad['c_prev']
    
    grads = {'Wf': dWf, 'Wi': dWi, 'Wo': dWo, 'Wc': dWc, 'Wy': dWy,
             'bf': dbf, 'bi': dbi, 'bo': dbo, 'bc': dbc, 'by': dby}
    
    return grads

## Optimizer

$$g_t = \nabla_\theta \mathcal{L}(\theta_{t-1}) \tag{35}$$

- ### Gradient Descent
$$\theta_t = \theta_{t - 1} - \alpha g_t  \tag{36}$$

- ### Adam
reference: https://arxiv.org/pdf/1412.6980v8.pdf
$$m_t = \beta_1 m_{t-1} + (1 - \beta_1) g_t \tag{37}$$
$$v_t = \beta_2 v_{t-1} + (1 - \beta_2) g_t^2 \tag{38}$$
$$\widehat{m}_t = \frac{m_t}{1 - \beta_1^{t}} \tag{39}$$
$$\widehat{v}_t = \frac{v_t}{1 - \beta_2^{t}} \tag{40}$$
$$\theta_t = \theta_{t-1} - \alpha \frac{\widehat{m}_t}{\sqrt{\widehat{v}_t} + \epsilon} \tag{41}$$

In [8]:
class Optimizer:
    def update(self, parameters, grads):
        raise NotImplementedError

class GradientDescentOptimzer(Optimizer):
    def __init__(self, alpha):
        self.alpha = alpha
        
    def update(self, parameters, grads):
        for key in parameters.keys():
            parameters[key] -= self.alpha * grads[key]
            
class Adam(Optimizer):
    def __init__(self, alpha, parameters, beta1=0.9, beta2=0.999, epsilon=1e-8, decay=0.):
        self.m = {}
        self.v = {}
        self.alpha = alpha
        self.beta1 = beta1
        self.beta2 = beta2
        self.epsilon = epsilon
        self.decay = decay
        self.t = 1
        
        for key, param in parameters.items():
            self.m[key] = np.zeros_like(param)
            self.v[key] = np.zeros_like(param)
        
    def update(self, parameters, grads):
        '''
        Arguments:
        parameters -- dictionary
        grads -- dictionary
        '''
        
        lr = self.alpha * (np.sqrt(1. - np.power(self.beta2, self.t)) /
                           (1. - np.power(self.beta1, self.t)))
        if self.decay > 0:
            lr /= 1 + self.decay * (self.t - 1)
        for key in parameters.keys():
            self.m[key] = self.beta1 * self.m[key] + (1. - self.beta1) * grads[key]
            self.v[key] = self.beta2 * self.v[key] + (1. - self.beta2) * np.square(grads[key])
            parameters[key] -= lr * self.m[key] / (np.sqrt(self.v[key]) + self.epsilon)

        self.t += 1
        

## Generate Batch

In [9]:
def read_n_lines(file, n):
    '''
    A generator that read multiple lines from file stream.
    
    Arguments:
    file -- a reading file stream
    n -- int
    
    Yields:
    lines -- list of line
    '''
    while True:
        lines = [file.readline().decode("utf-8").strip() for _ in range(n)]
        if lines[-1]:
            yield lines
        else:
            return

def generate_batch(lines, word2idx_map, embeddings, T):
    '''
    Arguments:
    sentence -- 
    word2idx_map -- dictionary
    word2vec_map -- dictionary
    T -- int
    
    Returns:
    x -- numpy array of shape (n_x, m, T)
    y -- numpy array of shape (m, T)
    '''
    n_x, n_y = embeddings.shape
    m = len(lines)
    
    x_batch = np.zeros((n_x, m, T + 1))
    y_batch = np.zeros((m, T), dtype=int)
    
    for i, line in enumerate(lines):
        words = line.split()
        t = 0
        for w in words:
            idx = word2idx_map.get(w, -1)
            if idx == -1:
                continue
            x_batch[:, i, t + 1] = embeddings[:, idx]
            y_batch[i, t] = idx
            t += 1
    
    return x_batch[:, :, :T], y_batch

## Train Model

In [10]:
def model(file_name, word2idx_map, embeddings, length,
          batch_size=64, lstm_unit=64, n_epochs=2, learning_rate=0.001, optimizer='adam'):
    '''
    Arguments:
    file_name -- string
    word2idx_map -- corresponding row indices of words, dictionary
    embeddings -- embedding vectors of input words, numpy array of shape (embed_size, n_vocab)
    max_length -- the length of sentences, int
    batch_size -- size of mini-batch, int
    lstm_unit -- dimension of states, int
    n_epochs -- int
    learning_rate -- float
    optimizer -- string
    
    Returns:
    parameters -- dictionary
    
    '''
    n_x, n_y = embeddings.shape
    n_a = lstm_unit
    parameters = init_parameters(n_x, n_a, n_y)
    
    if optimizer == 'gd':
        optimizer = GradientDescentOptimzer(learning_rate)
    elif optimizer == 'adam':
        optimizer = Adam(learning_rate, parameters)
    
    a0 = np.zeros((n_a, batch_size))
    c0 = np.zeros((n_a, batch_size))
    
    avg_loss = 0.
    count = 0
    trained_words = 0
    
    for epoch in range(n_epochs):
        with open(file_name, 'rb') as f:
            for lines in read_n_lines(f, batch_size):
                x_batch, y_batch = generate_batch(lines, word2idx_map, embeddings, length)
                y_pred, a_batch, caches = forward_propagation(x_batch, a0, c0, parameters)
                
                loss = compute_loss(y_pred, y_batch)
                trained_words += batch_size * length
                
                avg_loss += loss
                count += 1
                
                if count == 10:
                    print('Trained words: %s, Loss: %.4f'% (trained_words, avg_loss / count))
                    sen, _, _ = generate_poem(parameters, idx2word_map, embeddings, length=length)
                    print(sen)
                    avg_loss = 0.
                    count = 0
                    
                grads = backward_propagation(y_batch, y_pred, a_batch, caches, parameters)
                optimizer.update(parameters, grads)
                
    return parameters      

In [127]:
parameters = model('./data/poem5.txt', word2idx_map, embeddings, 5)

Trained words: 3200, Loss: 8.6054
來俟簀八嚀
Trained words: 6400, Loss: 8.5760
雲陽韻服下
Trained words: 9600, Loss: 8.5399
自萬薨悲至
Trained words: 12800, Loss: 8.4830
功外身春昌
Trained words: 16000, Loss: 8.3629
千四對明歌
Trained words: 19200, Loss: 8.1889
金氣明聲雲
Trained words: 22400, Loss: 7.9879
中水聲望歸
Trained words: 25600, Loss: 7.8251
金時驚生花
Trained words: 28800, Loss: 7.7507
春去春長開
Trained words: 32000, Loss: 7.6968
歌天月來春
Trained words: 35200, Loss: 7.6937
道落去中去
Trained words: 38400, Loss: 7.6265
玉花一月陽
Trained words: 41600, Loss: 7.6603
人為一春人
Trained words: 44800, Loss: 7.4598
為路不雲不
Trained words: 48000, Loss: 7.4273
不下月色中
Trained words: 51200, Loss: 7.3392
一山心相不
Trained words: 54400, Loss: 7.5642
得如君雲日
Trained words: 57600, Loss: 7.6877
行水不中君
Trained words: 60800, Loss: 7.6797
為天明歸門
Trained words: 64000, Loss: 7.6532
清花明月落
Trained words: 67200, Loss: 7.4860
遠飛人天長
Trained words: 70400, Loss: 7.4477
何不秋未何
Trained words: 73600, Loss: 7.4080
流山城雲雲
Trained words: 76800, Loss: 7.5979
人相臨夜月
Trained words: 8000

Trained words: 630400, Loss: 7.1824
日歸江日愁
Trained words: 633600, Loss: 7.2902
一行三自時
Trained words: 636800, Loss: 7.3621
自以是道家
Trained words: 640000, Loss: 7.2643
相在在國貧
Trained words: 643200, Loss: 7.3254
此心從高侯
Trained words: 646400, Loss: 7.4350
自與無在家
Trained words: 649600, Loss: 7.3834
無病亦已道
Trained words: 652800, Loss: 7.4657
風香雪不不
Trained words: 656000, Loss: 7.4343
山望故同國
Trained words: 659200, Loss: 7.6230
獨花照山水
Trained words: 662400, Loss: 7.6826
人將自不病
Trained words: 665600, Loss: 7.5080
應是如風塵
Trained words: 668800, Loss: 7.5150
人來從道名
Trained words: 672000, Loss: 7.5574
無家古山門
Trained words: 675200, Loss: 7.4634
明雲落雲水
Trained words: 678400, Loss: 7.4408
君日思人心
Trained words: 681600, Loss: 7.4773
風秋上遠去
Trained words: 684800, Loss: 7.7512
東地與玉樓
Trained words: 688000, Loss: 7.6424
空明空雲樹
Trained words: 691200, Loss: 7.9095
何言若玉力
Trained words: 694400, Loss: 7.6065
自非朝為金
Trained words: 697600, Loss: 7.7388
青樹向雲樓
Trained words: 700800, Loss: 7.5178
自聞南山去
Trained words: 704000, Loss: 7.592

Trained words: 1251200, Loss: 7.4508
何有此別處
Trained words: 1254400, Loss: 7.6013
猶得更何親
Trained words: 1257600, Loss: 7.3093
時生事已心
Trained words: 1260800, Loss: 7.5733
人時是長道
Trained words: 1264000, Loss: 7.5802
無心應何處
Trained words: 1267200, Loss: 7.8279
一去又空清
Trained words: 1270400, Loss: 7.3442
不聞此遊清
Trained words: 1273600, Loss: 7.1114
無身不見何
Trained words: 1276800, Loss: 7.5547
人為一塵重
Trained words: 1280000, Loss: 7.7723
朝子一重城
Trained words: 1283200, Loss: 7.3441
風雨過白雲
Trained words: 1286400, Loss: 7.4081
歸春在長人
Trained words: 1289600, Loss: 7.6867
未能更相知
Trained words: 1292800, Loss: 7.3344
南陵向漢州
Trained words: 1296000, Loss: 7.4335
何能當此不
Trained words: 1299200, Loss: 7.5997
未得同山來
Trained words: 1302400, Loss: 7.4228
一時不得見
Trained words: 1305600, Loss: 7.5728
不得為雲者
Trained words: 1308800, Loss: 7.3044
朝當天臺風
Trained words: 1312000, Loss: 7.5932
孤山天邊裏
Trained words: 1315200, Loss: 7.3664
何為事相同
Trained words: 1318400, Loss: 7.8209
今家家所外
Trained words: 1321600, Loss: 7.6609
秋雨正一杯
Trained wor

Trained words: 1862400, Loss: 7.6268
還去在天雲
Trained words: 1865600, Loss: 7.7231
千年不見時
Trained words: 1868800, Loss: 7.6695
一言無所以
Trained words: 1872000, Loss: 7.6871
千里忽自難
Trained words: 1875200, Loss: 7.6569
風煙綠綠水
Trained words: 1878400, Loss: 7.6380
獨念君子遠
Trained words: 1881600, Loss: 7.6904
青樓掩玉門
Trained words: 1884800, Loss: 7.7405
相聞西陵客
Trained words: 1888000, Loss: 7.7267
歸馬到天門
Trained words: 1891200, Loss: 7.6721
遠水山西山
Trained words: 1894400, Loss: 7.5535
東川在東閣
Trained words: 1897600, Loss: 7.6284
日上吳家處
Trained words: 1900800, Loss: 7.5756
白玉行丹屋
Trained words: 1904000, Loss: 7.6952
長馬皆相識
Trained words: 1907200, Loss: 7.5872
幽人人遠山
Trained words: 1910400, Loss: 7.6414
白雲過西客
Trained words: 1913600, Loss: 7.8717
誰堪複時時
Trained words: 1916800, Loss: 7.7772
山雲何處有
Trained words: 1920000, Loss: 7.6208
心心有時同
Trained words: 1923200, Loss: 7.7898
明月已日日
Trained words: 1926400, Loss: 7.8553
何人一不見
Trained words: 1929600, Loss: 8.0248
山雲不可見
Trained words: 1932800, Loss: 7.9573
春日暮悠悠
Trained wor

Trained words: 2473600, Loss: 7.8687
清氣多餘病
Trained words: 2476800, Loss: 8.0442
人知是雲去
Trained words: 2480000, Loss: 7.6341
雲霞有青閣
Trained words: 2483200, Loss: 7.5799
東水何如何
Trained words: 2486400, Loss: 7.9270
寒月秋風吟
Trained words: 2489600, Loss: 7.7145
相逢歌遠聲
Trained words: 2492800, Loss: 7.4779
一曲水水中
Trained words: 2496000, Loss: 7.5086
此此清光光
Trained words: 2499200, Loss: 7.4795
不覺白頭生
Trained words: 2502400, Loss: 7.5809
應似身生老
Trained words: 2505600, Loss: 7.4951
心心已已身
Trained words: 2508800, Loss: 7.6266
高樓高望望
Trained words: 2512000, Loss: 7.5134
人有遠尋年
Trained words: 2515200, Loss: 7.7128
應不自相逢
Trained words: 2518400, Loss: 7.6905
君是一杯興
Trained words: 2521600, Loss: 7.7966
不喜春長春
Trained words: 2524800, Loss: 7.6692
何事不見長
Trained words: 2528000, Loss: 7.7409
為君喜子子
Trained words: 2531200, Loss: 7.4706
千丈何當來
Trained words: 2534400, Loss: 7.6074
清景複無心
Trained words: 2537600, Loss: 7.8288
雲外無無路
Trained words: 2540800, Loss: 8.0624
風人入一弦
Trained words: 2544000, Loss: 8.0857
獨問難家事
Trained wor

## Generate Poem

In [46]:
def generate_poem_sentense(parameters, idx2word_map, embeddings, inputs=None, length=7, criteria='beam_random', bwidth=100):
    '''
    Arguments:
    parameters -- dictionary
    inputs -- tuple
    idx2word_map -- list
    embeddings -- numpy array
    length -- int
    
    Returns:
    sen -- string
    '''
    n_x, n_y = embeddings.shape
    n_a = parameters['bf'].shape[0]
    if inputs is not None:
        x, at, ct = inputs
    else:
        x = np.zeros((n_x, 1))
        at = np.zeros((n_a, 1))
        ct = np.zeros((n_a, 1))
    
    sen = ''
    for t in range(length):
        at, ct, y_pred_t, _ = cell_forward(x, at, ct, parameters)
        if criteria == 'random':
            idx = np.random.choice(n_y, p=y_pred_t.ravel())
        elif criteria == 'max':
            idx = np.argmax(y_pred_t)
        elif criteria == 'beam_random':
            sample = heapq.nlargest(bwidth, enumerate(y_pred_t.ravel()), key=lambda x: x[1])
            prob = np.array([x[1] for x in sample])
            sample_idx = [x[0] for x in sample]
            idx = np.random.choice(sample_idx, p=prob / sum(prob))
        elif criteria == 'beam_search':
            pass
            
        x = embeddings[:, [idx]]
        sen += idx2word_map[idx]
        
    return sen, at, ct

In [105]:
n_x, n_y = embeddings.shape
n_a = 64

x = np.zeros((n_x, 1))
a0 = np.zeros((n_a, 1))
c0 = np.zeros((n_a, 1))

for _ in range(4):
    sen, a0, c0 = generate_poem_sentense(parameters, idx2word_map, embeddings, (x, a0, c0),
                                         length=5)
    print(sen)

香林在此山
花菊醒聞笑
熟酒食啼金
亂食盈蟬草


In [12]:
def save_model_to_file(file_name, data):
    '''
    Arguments:
    file_name -- string
    data -- numpy array
    '''
    path = 'output/'
    if not os.path.isdir(path):
        os.makedirs(path)

    data_of_list = {}
    for key, val in data.items():
        data_of_list[key] = val.tolist()
        
    with open(path + file_name, "w") as f:
        f.write(json.dumps(data_of_list, indent=4))
        
def load_model_from_file(file_name):
    '''
    Arguments:
    file_name -- the json file name saving results.

    Returns:
    data -- numpy array
    '''    
    with open(file_name, 'r') as f:
        data = json.load(f)
    for key in data.keys():
        data[key] = np.array(data[key])
    return data

In [13]:
parameters = load_model_from_file('weights/lstm_weights_5.json')