<a href="https://colab.research.google.com/github/ngoda/Conversations/blob/master/ch7BOOK2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


*CHAPTER 7*



 **Implementation of Sentence Generation**

In [27]:
import sys
sys.path.append('..')
import numpy as np
from common.functions import softmax
from rnnlm import Rnnlm
from better_rnnlm import BetterRnnlm

# Define a class for generating text using RNNLM.
class RnnlmGen(Rnnlm):
    # Generate text starting with a given word ID.
    # Arguments:
    # - start_id: The ID of the starting word.
    # - skip_ids: A list of word IDs to skip during generation.
    # - sample_size: The maximum number of words to generate.
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]  # Initialize the list of generated word IDs.

        x = start_id  # Set the starting word ID.
        # Generate text until the sample size is reached.
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)  # Reshape the input for prediction.
            score = self.predict(x)  # Get the prediction scores.
            p = softmax(score.flatten())  # Calculate the softmax probabilities.

            # Sample the next word based on the probabilities.
            sampled = np.random.choice(len(p), size=1, p=p)
            # Check if the sampled word should be skipped.
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled  # Set the sampled word as the next input.
                word_ids.append(int(x))  # Append the sampled word ID to the list.

        return word_ids  # Return the generated sequence of word IDs.

    # Get the current state of the LSTM layer.
    def get_state(self):
        return self.lstm_layer.h, self.lstm_layer.c

    # Set the state of the LSTM layer.
    def set_state(self, state):
        self.lstm_layer.set_state(*state)

# Define a class for generating text using BetterRNNLM.
class BetterRnnlmGen(BetterRnnlm):
    # Generate text starting with a given word ID.
    # Arguments:
    # - start_id: The ID of the starting word.
    # - skip_ids: A list of word IDs to skip during generation.
    # - sample_size: The maximum number of words to generate.
    def generate(self, start_id, skip_ids=None, sample_size=100):
        word_ids = [start_id]  # Initialize the list of generated word IDs.

        x = start_id  # Set the starting word ID.
        # Generate text until the sample size is reached.
        while len(word_ids) < sample_size:
            x = np.array(x).reshape(1, 1)  # Reshape the input for prediction.
            score = self.predict(x).flatten()  # Get the prediction scores.
            p = softmax(score).flatten()  # Calculate the softmax probabilities.

            # Sample the next word based on the probabilities.
            sampled = np.random.choice(len(p), size=1, p=p)
            # Check if the sampled word should be skipped.
            if (skip_ids is None) or (sampled not in skip_ids):
                x = sampled  # Set the sampled word as the next input.
                word_ids.append(int(x))  # Append the sampled word ID to the list.

        return word_ids  # Return the generated sequence of word IDs.

    # Get the current states of all LSTM layers.
    def get_state(self):
        states = []  # Initialize the list of LSTM states.
        # Iterate over all LSTM layers and append their states.
        for layer in self.lstm_layers:
            states.append((layer.h, layer.c))
        return states  # Return the list of LSTM states.

    # Set the states of all LSTM layers.
    def set_state(self, states):
        # Iterate over LSTM layers and set their states using the provided states.
        for layer, state in zip(self.lstm_layers, states):
            layer.set_state(*state)


In [30]:
import sys

# Append parent directory to the system path.
sys.path.append('..')

from rnnlm_gen import RnnlmGen  # Import RnnlmGen class from rnnlm_gen module.
from dataset import ptb  # Import ptb module from dataset package.

# Load the Penn Treebank (PTB) dataset.
corpus, word_to_id, id_to_word = ptb.load_data('train')
vocab_size = len(word_to_id)  # Get the size of the vocabulary.
corpus_size = len(corpus)  # Get the size of the corpus.

# Initialize RnnlmGen model and load pre-trained parameters.
model = RnnlmGen()
model.load_params('./Rnnlm.pkl')

# Set the start word and skip words for text generation.
start_word = 'you'  # Define the starting word.
start_id = word_to_id[start_word]  # Get the ID of the starting word.
skip_words = ['N', '<unk>', '$']  # Define words to skip during generation.
skip_ids = [word_to_id[w] for w in skip_words]  # Get IDs of skip words.

# Generate text using the RnnlmGen model.
word_ids = model.generate(start_id, skip_ids)  # Generate word IDs.
txt = ' '.join([id_to_word[i] for i in word_ids])  # Convert word IDs to text.
txt = txt.replace(' <eos>', '.\n')  # Replace end-of-sentence tags.
print(txt)  # Print the generated text.


you do their yen many business concerns will immediately be used in interest.
 a duck scientific and complains george mitchell will soon operate his positions to meet almost cut due to the league 's economy.
 he noted that the majority of the elderly was always literally too high next season.
 last march mr. competitiveness insisted on toronto a spokesman on industrial products.
 in his letter mr. jones is active as usual in august martin who owns.
 once including evident credentials statistics see and he is like a lawsuits.
 mr. roman attributed his stake and


In [None]:
# coding: utf-8

import sys
sys.path.append('..')
from dataset import sequence
import numpy


(x_train, t_train), (x_test, t_test) = \
  sequence.load_data('addition.txt', seed=1984)
char_to_id, id_to_char = sequence.get_vocab()

print(x_train.shape, t_train.shape)
print(x_test.shape, t_test.shape)
# (45000, 7) (45000, 5)
# (5000, 7) (5000, 5)

print(x_train[0])
print(t_train[0])
# [ 3  0  2  0  0 11  5]
# [ 6  0 11  7  5]

print(''.join([id_to_char[c] for c in x_train[0]]))
print(''.join([id_to_char[c] for c in t_train[0]]))
# 71+118
# _189

 **Implementation of seq2seq**

In [38]:

# Encoder Class
''''This code defines an Encoder class used in sequence-to-sequence models.
The Encoder class contains an embedding layer and an LSTM layer.
It handles the forward and backward passes through the network'''

import sys

# Append parent directory to the system path.
sys.path.append('..')

from common.time_layers import *  # Import classes from common.time_layers module.
from common.base_model import BaseModel  # Import BaseModel class from common.base_model module.


class Encoder:
    # Initialize the Encoder class.
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn

        # Initialize embedding weights, LSTM weights, and LSTM biases.
        embed_W = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')

        # Create TimeEmbedding and TimeLSTM layers.
        self.embed = TimeEmbedding(embed_W)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=False)

        # Combine parameters and gradients.
        self.params = self.embed.params + self.lstm.params  # List of parameters.
        self.grads = self.embed.grads + self.lstm.grads  # List of gradients.
        self.hs = None  # Hidden states.

    # Forward pass of the encoder.
    def forward(self, xs):
        xs = self.embed.forward(xs)
        hs = self.lstm.forward(xs)
        self.hs = hs
        return hs[:, -1, :]  # Return the last hidden state.

    # Backward pass of the encoder.
    def backward(self, dh):
        dhs = np.zeros_like(self.hs)
        dhs[:, -1, :] = dh

        dout = self.lstm.backward(dhs)
        dout = self.embed.backward(dout)
        return dout


In [39]:
class Decoder:
    # Initialize the Decoder class.
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn

        # Initialize embedding, LSTM, and affine layer parameters.
        embed_W = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
        affine_b = np.zeros(V).astype('f')

        # Create TimeEmbedding, TimeLSTM, and TimeAffine layers.
        self.embed = TimeEmbedding(embed_W)
        self.lstm = TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True)
        self.affine = TimeAffine(affine_W, affine_b)

        # Combine parameters and gradients.
        self.params, self.grads = [], []
        for layer in (self.embed, self.lstm, self.affine):
            self.params += layer.params  # List of parameters.
            self.grads += layer.grads  # List of gradients.

    # Forward pass of the decoder.
    def forward(self, xs, h):
        self.lstm.set_state(h)  # Set the initial hidden state of LSTM.

        out = self.embed.forward(xs)  # Forward pass through embedding layer.
        out = self.lstm.forward(out)  # Forward pass through LSTM layer.
        score = self.affine.forward(out)  # Compute scores using affine layer.
        return score  # Return the output scores.

    # Backward pass of the decoder.
    def backward(self, dscore):
        dout = self.affine.backward(dscore)  # Backward pass through affine layer.
        dout = self.lstm.backward(dout)  # Backward pass through LSTM layer.
        dout = self.embed.backward(dout)  # Backward pass through embedding layer.
        dh = self.lstm.dh  # Get the gradient of hidden state.
        return dh  # Return the gradient of hidden state.

    # Generate sequence using the decoder.
    def generate(self, h, start_id, sample_size):
        sampled = []  # Initialize the list to store sampled IDs.
        sample_id = start_id  # Initialize the starting ID.

        self.lstm.set_state(h)  # Set the initial hidden state of LSTM.

        # Generate sequence of specified length.
        for _ in range(sample_size):
            x = np.array(sample_id).reshape((1, 1))  # Reshape the input.
            out = self.embed.forward(x)  # Forward pass through embedding layer.
            out = self.lstm.forward(out)  # Forward pass through LSTM layer.
            score = self.affine.forward(out)  # Compute scores using affine layer.

            sample_id = np.argmax(score.flatten())  # Sample the next ID.
            sampled.append(int(sample_id))  # Append the sampled ID.

        return sampled  # Return the generated sequence.


In [40]:
class Seq2seq(BaseModel):
    # Initialize the Seq2seq model.
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size

        # Initialize encoder and decoder.
        self.encoder = Encoder(V, D, H)
        self.decoder = Decoder(V, D, H)

        # Initialize softmax with loss layer.
        self.softmax = TimeSoftmaxWithLoss()

        # Combine parameters and gradients.
        self.params = self.encoder.params + self.decoder.params  # List of parameters.
        self.grads = self.encoder.grads + self.decoder.grads  # List of gradients.

    # Forward pass of the Seq2seq model.
    def forward(self, xs, ts):
        decoder_xs, decoder_ts = ts[:, :-1], ts[:, 1:]  # Prepare input and target sequences.

        h = self.encoder.forward(xs)  # Forward pass through encoder.
        score = self.decoder.forward(decoder_xs, h)  # Forward pass through decoder.
        loss = self.softmax.forward(score, decoder_ts)  # Compute loss using softmax with loss.
        return loss  # Return the loss.

    # Backward pass of the Seq2seq model.
    def backward(self, dout=1):
        dout = self.softmax.backward(dout)  # Backward pass through softmax with loss.
        dh = self.decoder.backward(dout)  # Backward pass through decoder.
        dout = self.encoder.backward(dh)  # Backward pass through encoder.
        return dout  # Return the gradient.

    # Generate sequence using the Seq2seq model.
    def generate(self, xs, start_id, sample_size):
        h = self.encoder.forward(xs)  # Forward pass through encoder.
        sampled = self.decoder.generate(h, start_id, sample_size)  # Generate sequence using decoder.
        return sampled  # Return the generated sequence.


In [None]:
# Import necessary modules.
import sys

# Append parent directory to the system path.
sys.path.append('..')

# Import libraries for data processing, optimization, training, and evaluation.
import numpy as np
import matplotlib.pyplot as plt
from dataset import sequence
from common.optimizer import Adam
from common.trainer import Trainer
from common.util import eval_seq2seq
from seq2seq import Seq2seq
from peeky_seq2seq import PeekySeq2seq

# Load dataset and get vocabulary dictionaries.
(x_train, t_train), (x_test, t_test) = sequence.load_data('addition.txt')
char_to_id, id_to_char = sequence.get_vocab()

# Reverse input sequences if specified.
is_reverse = True
if is_reverse:
    x_train, x_test = x_train[:, ::-1], x_test[:, ::-1]

# Set hyperparameters.
vocab_size = len(char_to_id)
wordvec_size = 16
hidden_size = 128
batch_size = 128
max_epoch = 25
max_grad = 5.0

# Instantiate Seq2seq or PeekySeq2seq model.
model = PeekySeq2seq(vocab_size, wordvec_size, hidden_size)
optimizer = Adam()
trainer = Trainer(model, optimizer)

peeky_acc_list = []
# Train the model for the specified number of epochs.
for epoch in range(max_epoch):
    # Train the model for one epoch.
    trainer.fit(x_train, t_train, max_epoch=1,
                batch_size=batch_size, max_grad=max_grad, eval_interval=150)

    correct_num = 0
    # Evaluate the model on the test dataset.
    for i in range(len(x_test)):
        question, correct = x_test[[i]], t_test[[i]]
        verbose = i < 10
        correct_num += eval_seq2seq(model, question, correct,
                                    id_to_char, verbose, is_reverse)

    # Calculate and store the accuracy.
    acc = float(correct_num) / len(x_test)
    peeky_acc_list.append(acc)
    print('검증 정확도 %.3f%%' % (acc * 100))

# Plot the accuracy graph.
x_peeky = np.arange(len(peeky_acc_list))
plt.plot(x_peeky, peeky_acc_list, marker='o')
plt.xlabel('에폭')
plt.ylabel('정확도')
plt.ylim(0, 1.0)
plt.show()
