In [9]:
import torch
import torch.nn as nn
import numpy as np

In [10]:
# Sample text data for training the model
text = "hello world\nhello PyTorch\nhello AI\nhello chatbot\nhello language model"

In [11]:
text

'hello world\nhello PyTorch\nhello AI\nhello chatbot\nhello language model'

In [12]:
# Create a mapping from characters to indices
chars = sorted(list(set(text)))  # Get unique characters in the text and sort them
int_to_char = {i: c for i, c in enumerate(chars)}  # Map index to character
char_to_int = {c: i for i, c in int_to_char.items()}  # Map character to index

In [16]:
chars #prints all unique characters 
int_to_char#gives index to all unique characters 
char_to_int#gives a character to index

{'\n': 0,
 ' ': 1,
 'A': 2,
 'I': 3,
 'P': 4,
 'T': 5,
 'a': 6,
 'b': 7,
 'c': 8,
 'd': 9,
 'e': 10,
 'g': 11,
 'h': 12,
 'l': 13,
 'm': 14,
 'n': 15,
 'o': 16,
 'r': 17,
 't': 18,
 'u': 19,
 'w': 20,
 'y': 21}

In [17]:
# Prepare the training data
seq_length = 5  # Length of the input sequences
step = 1  # Step size for generating sequences

In [18]:
seq_length 

5

In [19]:
step

1

In [20]:
# Generate sequences and targets
sequences = []  # List to hold input sequences
targets = []  # List to hold target characters
for i in range(0, len(text) - seq_length, step):  # Loop through the text
    sequences.append(text[i:i + seq_length])  # Append sequence of characters
    targets.append(text[i + seq_length])  # Append the target character

In [21]:
sequences

['hello',
 'ello ',
 'llo w',
 'lo wo',
 'o wor',
 ' worl',
 'world',
 'orld\n',
 'rld\nh',
 'ld\nhe',
 'd\nhel',
 '\nhell',
 'hello',
 'ello ',
 'llo P',
 'lo Py',
 'o PyT',
 ' PyTo',
 'PyTor',
 'yTorc',
 'Torch',
 'orch\n',
 'rch\nh',
 'ch\nhe',
 'h\nhel',
 '\nhell',
 'hello',
 'ello ',
 'llo A',
 'lo AI',
 'o AI\n',
 ' AI\nh',
 'AI\nhe',
 'I\nhel',
 '\nhell',
 'hello',
 'ello ',
 'llo c',
 'lo ch',
 'o cha',
 ' chat',
 'chatb',
 'hatbo',
 'atbot',
 'tbot\n',
 'bot\nh',
 'ot\nhe',
 't\nhel',
 '\nhell',
 'hello',
 'ello ',
 'llo l',
 'lo la',
 'o lan',
 ' lang',
 'langu',
 'angua',
 'nguag',
 'guage',
 'uage ',
 'age m',
 'ge mo',
 'e mod',
 ' mode']

In [22]:
targets

[' ',
 'w',
 'o',
 'r',
 'l',
 'd',
 '\n',
 'h',
 'e',
 'l',
 'l',
 'o',
 ' ',
 'P',
 'y',
 'T',
 'o',
 'r',
 'c',
 'h',
 '\n',
 'h',
 'e',
 'l',
 'l',
 'o',
 ' ',
 'A',
 'I',
 '\n',
 'h',
 'e',
 'l',
 'l',
 'o',
 ' ',
 'c',
 'h',
 'a',
 't',
 'b',
 'o',
 't',
 '\n',
 'h',
 'e',
 'l',
 'l',
 'o',
 ' ',
 'l',
 'a',
 'n',
 'g',
 'u',
 'a',
 'g',
 'e',
 ' ',
 'm',
 'o',
 'd',
 'e',
 'l']

In [24]:
# Convert sequences and targets to integers
X = np.array([[char_to_int[char] for char in seq] for seq in sequences])  # Convert sequences to int
y = np.array([char_to_int[char] for char in targets])  # Convert targets to int

In [25]:
# Convert to PyTorch tensors
X = torch.tensor(X, dtype=torch.long)  # Input tensor
y = torch.tensor(y, dtype=torch.long)  # Target tensor

In [26]:
X

tensor([[12, 10, 13, 13, 16],
        [10, 13, 13, 16,  1],
        [13, 13, 16,  1, 20],
        [13, 16,  1, 20, 16],
        [16,  1, 20, 16, 17],
        [ 1, 20, 16, 17, 13],
        [20, 16, 17, 13,  9],
        [16, 17, 13,  9,  0],
        [17, 13,  9,  0, 12],
        [13,  9,  0, 12, 10],
        [ 9,  0, 12, 10, 13],
        [ 0, 12, 10, 13, 13],
        [12, 10, 13, 13, 16],
        [10, 13, 13, 16,  1],
        [13, 13, 16,  1,  4],
        [13, 16,  1,  4, 21],
        [16,  1,  4, 21,  5],
        [ 1,  4, 21,  5, 16],
        [ 4, 21,  5, 16, 17],
        [21,  5, 16, 17,  8],
        [ 5, 16, 17,  8, 12],
        [16, 17,  8, 12,  0],
        [17,  8, 12,  0, 12],
        [ 8, 12,  0, 12, 10],
        [12,  0, 12, 10, 13],
        [ 0, 12, 10, 13, 13],
        [12, 10, 13, 13, 16],
        [10, 13, 13, 16,  1],
        [13, 13, 16,  1,  2],
        [13, 16,  1,  2,  3],
        [16,  1,  2,  3,  0],
        [ 1,  2,  3,  0, 12],
        [ 2,  3,  0, 12, 10],
        [ 

In [27]:
y

tensor([ 1, 20, 16, 17, 13,  9,  0, 12, 10, 13, 13, 16,  1,  4, 21,  5, 16, 17,
         8, 12,  0, 12, 10, 13, 13, 16,  1,  2,  3,  0, 12, 10, 13, 13, 16,  1,
         8, 12,  6, 18,  7, 16, 18,  0, 12, 10, 13, 13, 16,  1, 13,  6, 15, 11,
        19,  6, 11, 10,  1, 14, 16,  9, 10, 13])

In [28]:
# Define the RNN model
class RNN(nn.Module):  # Inherit from nn.Module
    def __init__(self, input_size, hidden_size, output_size):
        super(RNN, self).__init__()  # Initialize base class
        self.hidden_size = hidden_size  # Set hidden layer size
        self.rnn = nn.RNN(input_size, hidden_size, batch_first=True)  # RNN layer
        self.fc = nn.Linear(hidden_size, output_size)  # Fully connected layer for output

    def forward(self, x):  # Define the forward pass
        h0 = torch.zeros(1, x.size(0), self.hidden_size)  # Initialize hidden state
        out, _ = self.rnn(x, h0)  # Pass input and hidden state through RNN
        out = self.fc(out[:, -1, :])  # Take the last time step's output
        return out  # Return the output

In [29]:
# Model parameters
input_size = len(chars)  # Number of unique characters
hidden_size = 10  # Size of the hidden layer
output_size = len(chars)  # Number of unique characters for output

In [30]:
# Instantiate the model, define loss and optimizer
model = RNN(input_size, hidden_size, output_size)  # Create the model instance
criterion = nn.CrossEntropyLoss()  # Loss function for multi-class classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)  # Adam optimizer for training


In [31]:
model

RNN(
  (rnn): RNN(22, 10, batch_first=True)
  (fc): Linear(in_features=10, out_features=22, bias=True)
)

In [32]:
criterion

CrossEntropyLoss()

In [33]:
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    weight_decay: 0
)

In [34]:
# Prepare the input for RNN by one-hot encoding
X_one_hot = torch.nn.functional.one_hot(X, num_classes=input_size).float()  # One-hot encode the input


In [35]:
# Train the model
num_epochs = 1000  # Total number of training epochs
for epoch in range(num_epochs):  # Loop over epochs
    model.train()  # Set model to training mode
    optimizer.zero_grad()  # Clear gradients from previous step
    outputs = model(X_one_hot)  # Forward pass with one-hot encoded input
    loss = criterion(outputs, y)  # Compute loss
    loss.backward()  # Backward pass to compute gradients
    optimizer.step()  # Update weights based on gradients

    if (epoch + 1) % 100 == 0:  # Print loss every 100 epochs
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [100/1000], Loss: 0.3633
Epoch [200/1000], Loss: 0.1595
Epoch [300/1000], Loss: 0.1403
Epoch [400/1000], Loss: 0.1342
Epoch [500/1000], Loss: 0.1314
Epoch [600/1000], Loss: 0.1299
Epoch [700/1000], Loss: 0.1289
Epoch [800/1000], Loss: 0.1283
Epoch [900/1000], Loss: 0.1278
Epoch [1000/1000], Loss: 0.1275


In [38]:
# Text generation function
def generate_text(model, start_string, gen_length=20):  # Function to generate text
    model.eval()  # Set model to evaluation mode
    input_eval = [char_to_int[s] for s in start_string]  # Convert start string to indices
    input_eval = torch.tensor(input_eval, dtype=torch.long).unsqueeze(0)  # Reshape to (1, seq_length)
    
    generated_text = start_string  # Initialize generated text with start string
    with torch.no_grad():  # Disable gradient calculations for evaluation
        for _ in range(gen_length):  # Generate characters
            input_eval_one_hot = torch.nn.functional.one_hot(input_eval, num_classes=input_size).float()  # One-hot encode
            
            predictions = model(input_eval_one_hot)  # Get predictions from the model
            predicted_char_idx = torch.argmax(predictions[-1]).item()  # Get index of the predicted character
            generated_text += int_to_char[predicted_char_idx]  # Append predicted character to generated text
            
            # Shift input for next prediction
            input_eval = torch.cat((input_eval[:, 1:], torch.tensor([[predicted_char_idx]], dtype=torch.long)), dim=1)

    return generated_text  # Return the generated text
    

In [39]:
# Generate text
start_string = "hello"  # Start string for text generation
generated = generate_text(model, start_string)  # Generate text
print(f'Generated text: {generated}')  # Print the generated text

Generated text: hello language modelll lo
