# Exercise 4: Text Generation using LSTM

In [1]:
skip_training = False   # You can set it to True if you want to run inference on your trained model.

In [2]:
import random
import re

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from tqdm import tqdm

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(device)

cuda


### 1. Load and Preprocess the Text Dataset

We will be using *Alice's Adventures in Wonderland* by Lewis Carroll as our dataset. You can download it from [Project Gutenberg](https://www.gutenberg.org/):

[Alice's Adventures in Wonderland by Lewis Carroll (Project Gutenberg Page)](https://www.gutenberg.org/ebooks/11) \
[Direct Text File Download](https://www.gutenberg.org/files/11/11-0.txt)



In [4]:
txt_path = '/content/alice.txt' # replace 'alice.txt' with your txt path

In [5]:
with open(txt_path, 'r') as file:
    raw_text = file.read()

print('===First 1500 characters before any processing:\n\n')
print(raw_text[:1500])

print('\n\n\n===Ending characters before any processing:\n')
print(raw_text[-19000:-17000])

===First 1500 characters before any processing:


*** START OF THE PROJECT GUTENBERG EBOOK 11 ***
[Illustration]




Alice’s Adventures in Wonderland

by Lewis Carroll

THE MILLENNIUM FULCRUM EDITION 3.0

Contents

 CHAPTER I.     Down the Rabbit-Hole
 CHAPTER II.    The Pool of Tears
 CHAPTER III.   A Caucus-Race and a Long Tale
 CHAPTER IV.    The Rabbit Sends in a Little Bill
 CHAPTER V.     Advice from a Caterpillar
 CHAPTER VI.    Pig and Pepper
 CHAPTER VII.   A Mad Tea-Party
 CHAPTER VIII.  The Queen’s Croquet-Ground
 CHAPTER IX.    The Mock Turtle’s Story
 CHAPTER X.     The Lobster Quadrille
 CHAPTER XI.    Who Stole the Tarts?
 CHAPTER XII.   Alice’s Evidence




CHAPTER I.
Down the Rabbit-Hole


Alice was beginning to get very tired of sitting by her sister on the
bank, and of having nothing to do: once or twice she had peeped into
the book her sister was reading, but it had no pictures or
conversations in it, “and what is the use of a book,” thought Alice
“without pictures 

In [6]:
# Removing metadata from the text
start_index = raw_text.find('CHAPTER I.\nDown the Rabbit-Hole')

end_index = raw_text.find('*** END OF THE PROJECT GUTENBERG') # closing markers of Project Gutenberg

trimmed_text = raw_text[start_index:end_index]

print('===Text after removing metadata:\n')
print(trimmed_text[:1500])

===Text after removing metadata:

CHAPTER I.
Down the Rabbit-Hole


Alice was beginning to get very tired of sitting by her sister on the
bank, and of having nothing to do: once or twice she had peeped into
the book her sister was reading, but it had no pictures or
conversations in it, “and what is the use of a book,” thought Alice
“without pictures or conversations?”

So she was considering in her own mind (as well as she could, for the
hot day made her feel very sleepy and stupid), whether the pleasure of
making a daisy-chain would be worth the trouble of getting up and
picking the daisies, when suddenly a White Rabbit with pink eyes ran
close by her.

There was nothing so _very_ remarkable in that; nor did Alice think it
so _very_ much out of the way to hear the Rabbit say to itself, “Oh
dear! Oh dear! I shall be late!” (when she thought it over afterwards,
it occurred to her that she ought to have wondered at this, but at the
time it all seemed quite natural); but when the Rabbit a

In [7]:
def preprocess_text(text):
    """
    Preprocesses the input text by i. converting it to lowercase,
    ii. removing non-alphanumeric characters (except spaces),
    iii. and normalizing spaces.

    Args:
    text -- The raw input text as a string

    Returns:
    cleaned_text -- The processed text where all the preprocessing steps are applied
    """
    # 1. Convert text to lowercase
    # 2. Remove special characters
    # 3. Remove double spaces
    cleaned_text = text.lower()
    cleaned_text = re.sub(r'[^a-z0-9\s]', ' ', cleaned_text)
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text)

    return cleaned_text

cleaned_text = preprocess_text(trimmed_text)
print('Text after cleaning and converting to lowercase:\n')
print(cleaned_text[:1000])


Text after cleaning and converting to lowercase:

chapter i down the rabbit hole alice was beginning to get very tired of sitting by her sister on the bank and of having nothing to do once or twice she had peeped into the book her sister was reading but it had no pictures or conversations in it and what is the use of a book thought alice without pictures or conversations so she was considering in her own mind as well as she could for the hot day made her feel very sleepy and stupid whether the pleasure of making a daisy chain would be worth the trouble of getting up and picking the daisies when suddenly a white rabbit with pink eyes ran close by her there was nothing so very remarkable in that nor did alice think it so very much out of the way to hear the rabbit say to itself oh dear oh dear i shall be late when she thought it over afterwards it occurred to her that she ought to have wondered at this but at the time it all seemed quite natural but when the rabbit actually took a watch 

### 2. Character-Level Encoding



In [8]:
def create_char_mappings(cleaned_text):
    """
    Creates character-to-integer and integer-to-character mappings from the cleaned text.

    Args:
    cleaned_text -- The cleaned input text as a string

    Returns:
    char_to_int -- A dictionary mapping each unique character to an integer
    int_to_char -- A dictionary mapping each integer back to its corresponding character
    """
    unique_chars = sorted(set(cleaned_text))
    char_to_int = {char: idx for idx, char in enumerate(unique_chars)}
    int_to_char = {idx: char for char, idx in char_to_int.items()}

    return char_to_int, int_to_char

char_to_int, int_to_char = create_char_mappings(cleaned_text)
print('Character to Integer Mapping:')
for char, idx in list(char_to_int.items()):
    print(f"'{char}' : {idx}")

Character to Integer Mapping:
' ' : 0
'a' : 1
'b' : 2
'c' : 3
'd' : 4
'e' : 5
'f' : 6
'g' : 7
'h' : 8
'i' : 9
'j' : 10
'k' : 11
'l' : 12
'm' : 13
'n' : 14
'o' : 15
'p' : 16
'q' : 17
'r' : 18
's' : 19
't' : 20
'u' : 21
'v' : 22
'w' : 23
'x' : 24
'y' : 25
'z' : 26


#### 2.2 Encode the Text into Integers

In [9]:
def encode_text(cleaned_text, char_to_int):
    """
    Encodes the cleaned text into an array of integers.

    Args:
    cleaned_text -- The cleaned input text as a string
    char_to_int -- Characters to integer mapping

    Returns:
    encoded_chars -- Numpy array of integers representing the encoded characters from the text
    """
    encoded_chars = np.array([char_to_int[char] for char in cleaned_text])

    return encoded_chars

encoded_chars = encode_text(cleaned_text, char_to_int)
print('First 100 encoded characters:')
print(encoded_chars[:100])

First 100 encoded characters:
[ 3  8  1 16 20  5 18  0  9  0  4 15 23 14  0 20  8  5  0 18  1  2  2  9
 20  0  8 15 12  5  0  1 12  9  3  5  0 23  1 19  0  2  5  7  9 14 14  9
 14  7  0 20 15  0  7  5 20  0 22  5 18 25  0 20  9 18  5  4  0 15  6  0
 19  9 20 20  9 14  7  0  2 25  0  8  5 18  0 19  9 19 20  5 18  0 15 14
  0 20  8  5]


### 3. Batch Generation for Training


In [10]:
def get_batches(arr, batch_size, seq_length, step_size=None):
    """
    Generates batches of input and target sequences from the given array.

    Args:
        arr (array-like): Encoded text as an array of integers.
        batch_size (int): Number of sequences per batch.
        seq_length (int): Number of characters in each sequence.
        step_size (int, optional): Steps to move the window for the next sequence.

    Returns:
        tuple (x_batches, y_batches): A tuple of numpy arrays of input and target sequences,
                                      each has shape (num_batches, batch_size, seq_length)
    """
    if step_size is None:
        step_size = seq_length

    # Calculate the number of batches
    n_batches = (len(arr) - seq_length) // (batch_size * step_size)

    # Trim the array to fit the number of batches
    arr = arr[:n_batches * batch_size * step_size + seq_length]

    # Initialize the batches
    x_batches = []
    y_batches = []

    # Fill batches
    for i in range(0, len(arr) - seq_length, step_size):
        x = arr[i:i + seq_length]
        y = arr[i + 1:i + seq_length + 1]
        x_batches.append(x)
        y_batches.append(y)

    # Convert to numpy arrays and reshape
    x_batches = np.array(x_batches).reshape(-1, batch_size, seq_length)
    y_batches = np.array(y_batches).reshape(-1, batch_size, seq_length)

    return x_batches, y_batches

In [11]:
# Display for y shift and  step_size
def display_batch_generation(arr, char_to_int, int_to_char):
    batch_size, seq_length, step_size = 8, 10, 5  # Setting step_size for overlap between sequences

    x_batches, y_batches = get_batches(arr, batch_size, seq_length, step_size)

    # Display batch number 10
    x_chars = ''.join([int_to_char[idx] for idx in x_batches[10][0]])
    y_chars = ''.join([int_to_char[idx] for idx in y_batches[10][0]])

    print('='*50)
    print('Displaying a Single Batch')
    print('='*50)
    for i in range(batch_size):
        x_chars = ''.join([int_to_char[idx] for idx in x_batches[10][i]])
        y_chars = ''.join([int_to_char[idx] for idx in y_batches[10][i]])

        print(f"[{x_chars}]  -->  [{y_chars}]")
    print('='*50)
display_batch_generation(encoded_chars, char_to_int, int_to_char )

Displaying a Single Batch
[made her f]  -->  [ade her fe]
[her feel v]  -->  [er feel ve]
[eel very s]  -->  [el very sl]
[ery sleepy]  -->  [ry sleepy ]
[leepy and ]  -->  [eepy and s]
[ and stupi]  -->  [and stupid]
[stupid whe]  -->  [tupid whet]
[d whether ]  -->  [ whether t]


### 4. Define the Character-Level LSTM Model

In [12]:
class CharLSTM(nn.Module):
    """
    Character-Level Multi-Layer LSTM Model

    This model processes sequences of characters and predicts the next character in the sequence.
    """

    def __init__(self, num_layers, input_dim, hidden_dim, output_dim, dropout_prob):
        """
        Initializes the CharLSTM model with the specified parameters.

        Args:
            num_layers (int): Number of LSTM layers
            input_dim (int): Dimensionality of the input (e.g. one-hot encoded input size)
            hidden_dim (int): Dimensionality of the LSTM hidden layer.
            output_dim (int): Dimensionality of the output.
            dropout_prob (float): Dropout after each layer.
        """
        super(CharLSTM, self).__init__()

        # Save hidden dimension and number of layers for hidden state initialization
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers

        # Define a single LSTM module configured as a multi-layer LSTM
        # Internal dropout of dropout_prob is applied between layers
        # Apply additional dropout after LSTM
        # Apply dense layer at the end
        self.LSTM = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True, dropout=dropout_prob)
        self.dropout = nn.Dropout(dropout_prob)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x, hidden):
        """
        Performs the forward pass of the CharLSTM model.

        Args:
            x (torch.Tensor): Input tensor of shape (batch_size, seq_length, input_dim).
            hidden (tuple): Tuple of (h0, c0), where each is a tensor of shape (num_layers, batch_size, hidden_dim).

        Returns:
            out (torch.Tensor): Output tensor of shape (batch_size, seq_length, output_dim).
            (h, c) (tuple): Updated hidden states (h, c) for each LSTM layer.
                - h (torch.Tensor): Final hidden state
                - c (torch.Tensor): Final cell state
        """

        # Pass the input through the LSTM
        # The LSTM output 'out' has shape (batch_size, seq_length, hidden_dim)
        # 'h' and 'c' represent the final hidden and cell states for each layer
        out, (h, c)= self.LSTM(x, hidden)
        out = self.dropout(out)
        out = self.fc(out)

        # Return the final output and the updated hidden states
        return out, (h, c)

    def init_hidden(self, batch_size):
        """
        Initializes the hidden and cell states to zeros for each LSTM layer.

        Args:
            batch_size (int): The batch size for the current data.

        Returns:
            (h0, c0) (tuple): Tuple of initial hidden states (h0, c0) for each LSTM layer.
            - h0 (torch.Tensor): Initial hidden state
            - c0 (torch.Tensor): Initial cell state
        """
        # Set the device to match the model's device to prevent device mismatch errors
        device = next(self.parameters()).device

        # Initialize hidden state (h0) and cell state (c0) to zeros
        # Check the expected shape of hidden states in pytorch nn.LSTM documentation
        h0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(device)
        c0 = torch.zeros(self.num_layers, batch_size, self.hidden_dim).to(device)

        return (h0, c0)

### 5. Train the Model

In [13]:
def train(model, encoded_chars, vocab_size, num_epochs, batch_size,
          seq_length, step_size, learning_rate, save_path=None, verbose=True):
    """
    Train the CharLSTM model on encoded text data.

    Arguments:
    model -- The LSTM model
    encoded_chars -- Encoded data (characters)
    vocab_size -- Size of the vocabulary
    num_epochs -- Number of training epochs
    batch_size -- Batch size for training
    seq_length -- Sequence length for each batch
    learning_rate -- Learning rate for the optimizer
    save_path -- Path to save the trained model (optional)
    """

    model.train()  # Set model to training mode
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)  # Initialize Adam optimizer
    criterion = nn.CrossEntropyLoss()  # Cross entropy loss function

    # Prepare batches
    x_batches, y_batches = get_batches(encoded_chars, batch_size, seq_length, step_size)
    num_batches = len(x_batches)

    for epoch in range(num_epochs):
        total_loss = 0
        # Progress bar for the current epoch
        batch_loader = tqdm(zip(x_batches, y_batches), total=num_batches,
                            leave=True, desc=f'Epoch {epoch+1}/{num_epochs}')

        # Initialize hidden states for both LSTM layers
        hidden = model.init_hidden(batch_size)

        for x, y in batch_loader:
            x = torch.as_tensor(x, dtype=torch.long).to(device)
            y = torch.as_tensor(y, dtype=torch.long).to(device) # target

            hidden = tuple([each.detach() for each in hidden])

            optimizer.zero_grad()
            x_one_hot = F.one_hot(x, num_classes=vocab_size).float()
            output, hidden = model(x_one_hot, hidden)
            loss = criterion(output.view(-1, vocab_size), y.view(-1))
            loss.backward()
            optimizer.step()

            total_loss += loss.item()

        # Print the average loss for the current epoch
        if verbose:
            print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / num_batches:.4f}')

        # Optional model saving
        # Let us save it each epoch since training takes a while and you want to stop in the middle
        if save_path:
            torch.save(model.state_dict(), save_path)
            print(f'Your trained model at epoch {epoch} is saved successfully!')

    return total_loss / num_batches

#### Model Initialization

In [20]:
hidden_dim = 400
dropout_prob=0.05
num_layers=4
vocab_size = len(char_to_int)
model = CharLSTM(num_layers, vocab_size, hidden_dim, vocab_size, dropout_prob)
model = model.to(device)
print(model)

CharLSTM(
  (LSTM): LSTM(27, 400, num_layers=4, batch_first=True, dropout=0.05)
  (dropout): Dropout(p=0.05, inplace=False)
  (fc): Linear(in_features=400, out_features=27, bias=True)
)


In [21]:
num_epochs = 50
batch_size = 50
seq_length=100
step_size=50
learning_rate=0.001
if not skip_training:
    loss = train(
        model=model,
        encoded_chars=encoded_chars,
        vocab_size=vocab_size,
        num_epochs=num_epochs,
        batch_size=batch_size,
        seq_length=seq_length,
        step_size=step_size,
        learning_rate=learning_rate,
        save_path='best_model.pth'
    )
else:
    model.load_state_dict(torch.load('best_model.pth', weights_only=False, map_location=device))
    print('Loaded weights from your saved model successfully!')

Epoch 1/50: 100%|██████████| 53/53 [00:03<00:00, 14.01it/s]


Epoch 1/50, Loss: 2.8512
Your trained model at epoch 0 is saved successfully!


Epoch 2/50: 100%|██████████| 53/53 [00:03<00:00, 14.20it/s]


Epoch 2/50, Loss: 2.8082
Your trained model at epoch 1 is saved successfully!


Epoch 3/50: 100%|██████████| 53/53 [00:03<00:00, 14.10it/s]


Epoch 3/50, Loss: 2.8074
Your trained model at epoch 2 is saved successfully!


Epoch 4/50: 100%|██████████| 53/53 [00:03<00:00, 14.08it/s]


Epoch 4/50, Loss: 2.7783
Your trained model at epoch 3 is saved successfully!


Epoch 5/50: 100%|██████████| 53/53 [00:03<00:00, 14.10it/s]


Epoch 5/50, Loss: 2.4978
Your trained model at epoch 4 is saved successfully!


Epoch 6/50: 100%|██████████| 53/53 [00:03<00:00, 13.94it/s]


Epoch 6/50, Loss: 2.2357
Your trained model at epoch 5 is saved successfully!


Epoch 7/50: 100%|██████████| 53/53 [00:03<00:00, 14.03it/s]


Epoch 7/50, Loss: 2.0672
Your trained model at epoch 6 is saved successfully!


Epoch 8/50: 100%|██████████| 53/53 [00:03<00:00, 14.07it/s]


Epoch 8/50, Loss: 1.9465
Your trained model at epoch 7 is saved successfully!


Epoch 9/50: 100%|██████████| 53/53 [00:03<00:00, 13.92it/s]


Epoch 9/50, Loss: 1.8551
Your trained model at epoch 8 is saved successfully!


Epoch 10/50: 100%|██████████| 53/53 [00:03<00:00, 13.90it/s]


Epoch 10/50, Loss: 1.7737
Your trained model at epoch 9 is saved successfully!


Epoch 11/50: 100%|██████████| 53/53 [00:03<00:00, 13.91it/s]


Epoch 11/50, Loss: 1.6933
Your trained model at epoch 10 is saved successfully!


Epoch 12/50: 100%|██████████| 53/53 [00:03<00:00, 13.93it/s]


Epoch 12/50, Loss: 1.6224
Your trained model at epoch 11 is saved successfully!


Epoch 13/50: 100%|██████████| 53/53 [00:03<00:00, 13.89it/s]


Epoch 13/50, Loss: 1.5574
Your trained model at epoch 12 is saved successfully!


Epoch 14/50: 100%|██████████| 53/53 [00:03<00:00, 13.87it/s]


Epoch 14/50, Loss: 1.5016
Your trained model at epoch 13 is saved successfully!


Epoch 15/50: 100%|██████████| 53/53 [00:03<00:00, 13.90it/s]


Epoch 15/50, Loss: 1.4535
Your trained model at epoch 14 is saved successfully!


Epoch 16/50: 100%|██████████| 53/53 [00:03<00:00, 13.91it/s]


Epoch 16/50, Loss: 1.4164
Your trained model at epoch 15 is saved successfully!


Epoch 17/50: 100%|██████████| 53/53 [00:03<00:00, 13.85it/s]


Epoch 17/50, Loss: 1.3816
Your trained model at epoch 16 is saved successfully!


Epoch 18/50: 100%|██████████| 53/53 [00:03<00:00, 13.85it/s]


Epoch 18/50, Loss: 1.3379
Your trained model at epoch 17 is saved successfully!


Epoch 19/50: 100%|██████████| 53/53 [00:03<00:00, 13.85it/s]


Epoch 19/50, Loss: 1.3009
Your trained model at epoch 18 is saved successfully!


Epoch 20/50: 100%|██████████| 53/53 [00:03<00:00, 13.80it/s]


Epoch 20/50, Loss: 1.2704
Your trained model at epoch 19 is saved successfully!


Epoch 21/50: 100%|██████████| 53/53 [00:03<00:00, 13.81it/s]


Epoch 21/50, Loss: 1.2446
Your trained model at epoch 20 is saved successfully!


Epoch 22/50: 100%|██████████| 53/53 [00:03<00:00, 13.84it/s]


Epoch 22/50, Loss: 1.2220
Your trained model at epoch 21 is saved successfully!


Epoch 23/50: 100%|██████████| 53/53 [00:03<00:00, 13.86it/s]


Epoch 23/50, Loss: 1.1945
Your trained model at epoch 22 is saved successfully!


Epoch 24/50: 100%|██████████| 53/53 [00:03<00:00, 13.86it/s]


Epoch 24/50, Loss: 1.1637
Your trained model at epoch 23 is saved successfully!


Epoch 25/50: 100%|██████████| 53/53 [00:03<00:00, 13.87it/s]


Epoch 25/50, Loss: 1.1373
Your trained model at epoch 24 is saved successfully!


Epoch 26/50: 100%|██████████| 53/53 [00:03<00:00, 13.79it/s]


Epoch 26/50, Loss: 1.1170
Your trained model at epoch 25 is saved successfully!


Epoch 27/50: 100%|██████████| 53/53 [00:03<00:00, 13.86it/s]


Epoch 27/50, Loss: 1.0977
Your trained model at epoch 26 is saved successfully!


Epoch 28/50: 100%|██████████| 53/53 [00:03<00:00, 13.86it/s]


Epoch 28/50, Loss: 1.0847
Your trained model at epoch 27 is saved successfully!


Epoch 29/50: 100%|██████████| 53/53 [00:03<00:00, 13.88it/s]


Epoch 29/50, Loss: 1.0625
Your trained model at epoch 28 is saved successfully!


Epoch 30/50: 100%|██████████| 53/53 [00:03<00:00, 13.81it/s]


Epoch 30/50, Loss: 1.0468
Your trained model at epoch 29 is saved successfully!


Epoch 31/50: 100%|██████████| 53/53 [00:03<00:00, 13.84it/s]


Epoch 31/50, Loss: 1.0190
Your trained model at epoch 30 is saved successfully!


Epoch 32/50: 100%|██████████| 53/53 [00:03<00:00, 13.94it/s]


Epoch 32/50, Loss: 0.9957
Your trained model at epoch 31 is saved successfully!


Epoch 33/50: 100%|██████████| 53/53 [00:03<00:00, 13.74it/s]


Epoch 33/50, Loss: 0.9769
Your trained model at epoch 32 is saved successfully!


Epoch 34/50: 100%|██████████| 53/53 [00:03<00:00, 13.82it/s]


Epoch 34/50, Loss: 0.9598
Your trained model at epoch 33 is saved successfully!


Epoch 35/50: 100%|██████████| 53/53 [00:03<00:00, 13.93it/s]


Epoch 35/50, Loss: 0.9400
Your trained model at epoch 34 is saved successfully!


Epoch 36/50: 100%|██████████| 53/53 [00:03<00:00, 13.84it/s]


Epoch 36/50, Loss: 0.9183
Your trained model at epoch 35 is saved successfully!


Epoch 37/50: 100%|██████████| 53/53 [00:03<00:00, 13.92it/s]


Epoch 37/50, Loss: 0.8884
Your trained model at epoch 36 is saved successfully!


Epoch 38/50: 100%|██████████| 53/53 [00:03<00:00, 13.85it/s]


Epoch 38/50, Loss: 0.8665
Your trained model at epoch 37 is saved successfully!


Epoch 39/50: 100%|██████████| 53/53 [00:03<00:00, 13.87it/s]


Epoch 39/50, Loss: 0.8450
Your trained model at epoch 38 is saved successfully!


Epoch 40/50: 100%|██████████| 53/53 [00:03<00:00, 13.94it/s]


Epoch 40/50, Loss: 0.8225
Your trained model at epoch 39 is saved successfully!


Epoch 41/50: 100%|██████████| 53/53 [00:03<00:00, 13.81it/s]


Epoch 41/50, Loss: 0.7936
Your trained model at epoch 40 is saved successfully!


Epoch 42/50: 100%|██████████| 53/53 [00:03<00:00, 14.00it/s]


Epoch 42/50, Loss: 0.7663
Your trained model at epoch 41 is saved successfully!


Epoch 43/50: 100%|██████████| 53/53 [00:03<00:00, 13.89it/s]


Epoch 43/50, Loss: 0.7414
Your trained model at epoch 42 is saved successfully!


Epoch 44/50: 100%|██████████| 53/53 [00:03<00:00, 13.97it/s]


Epoch 44/50, Loss: 0.7160
Your trained model at epoch 43 is saved successfully!


Epoch 45/50: 100%|██████████| 53/53 [00:03<00:00, 13.96it/s]


Epoch 45/50, Loss: 0.6894
Your trained model at epoch 44 is saved successfully!


Epoch 46/50: 100%|██████████| 53/53 [00:03<00:00, 13.94it/s]


Epoch 46/50, Loss: 0.6655
Your trained model at epoch 45 is saved successfully!


Epoch 47/50: 100%|██████████| 53/53 [00:03<00:00, 13.83it/s]


Epoch 47/50, Loss: 0.6427
Your trained model at epoch 46 is saved successfully!


Epoch 48/50: 100%|██████████| 53/53 [00:03<00:00, 13.87it/s]


Epoch 48/50, Loss: 0.6277
Your trained model at epoch 47 is saved successfully!


Epoch 49/50: 100%|██████████| 53/53 [00:03<00:00, 13.91it/s]


Epoch 49/50, Loss: 0.6199
Your trained model at epoch 48 is saved successfully!


Epoch 50/50: 100%|██████████| 53/53 [00:03<00:00, 13.87it/s]

Epoch 50/50, Loss: 0.6072
Your trained model at epoch 49 is saved successfully!





### Step 6: Text Generation

In [22]:
def generate_text(model, start_str, char_to_int, int_to_char, vocab_size, predict_len=100, temperature=1.0):
    """
    Generate text using the trained model.

    Arguments:
    model -- Trained RNN model
    start_str -- String to start generating from
    char_to_int -- Dictionary mapping characters to integers
    int_to_char -- Dictionary mapping integers back to characters
    vocab_size -- Size of the vocabulary
    predict_len -- Number of characters to generate
    temperature -- Float controlling randomness in predictions (higher is more random)

    Returns:
    generated_text -- The generated text as a string
    """
    model.eval()  # Set model to evaluation mode

    # Encode the starting string
    input_seq = [char_to_int[char] for char in start_str]
    input_seq = torch.tensor(input_seq).long().to(device).unsqueeze(0)  # Add batch dimension

    hidden = model.init_hidden(1)  # Batch size of 1 for generating text

    generated_text = start_str

    with torch.no_grad():  # inference
        for _ in range(predict_len):

            # Make sure to have consistent variable naming with the rest of the code
            x_one_hot = F.one_hot(input_seq, num_classes=vocab_size).float()
            output, hidden = model(x_one_hot, hidden)
            output = output[:, -1, :] / temperature

            # Convert output to probabilities using softmax
            probabilities = F.softmax(output, dim=-1).detach().cpu().numpy()

            # Randomly sample based on the output probabilities
            next_char_index = np.random.choice(range(vocab_size), p=probabilities.ravel())

            # Add the predicted character to the generated text
            next_char = int_to_char[next_char_index]
            generated_text += next_char

            # Update the input sequence - shift left to preserve the input length and add the new character
            input_seq = torch.cat([input_seq[:, 1:], torch.tensor([[next_char_index]]).to(device)], dim=1)

    return generated_text

In [26]:
start_str = 'alice was '
predict_len = 1000
temperature = 0.5
generated_text = generate_text(model,
                               start_str,
                               char_to_int,
                               int_to_char,
                               vocab_size,
                               predict_len=predict_len,
                               temperature=temperature)
print(generated_text)

alice was going to alice s very sore said the king heard it must be a lobster as the dormouse say the queen and the dormouse found herself would not could not so mad that s the mock turtle sound on the lizard said the hatter he was only tasted away some tarts and said to the table says and the queen and the first came to mine the gryphon repeat the queen said the king said the king heard it means to make out what i see said the king the queen said the king heard it be the time the queen said the mock turtle said alice was of very wide but she was not a bit said the king the book cauce of course the king heard it he turn the other suddenly that s the mock turtle said alice went to see what was the white rabbit mad be said the king went on the game of we said as she had got so she helt serpent she did not dare said alice was going to alice the mock turtle nive with the queen said the king shan t got in the window that s give it ever said the king said alice was just begins with his note 