In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random

In [None]:
# Set the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using {device} device")

In [None]:
# Preprocessing function to convert text into a usable form
def preprocess_text(text):
    chars = sorted(list(set(text)))  # Get all unique characters
    char_to_idx = {ch: i for i, ch in enumerate(chars)}  # Char -> index
    idx_to_char = {i: ch for i, ch in enumerate(chars)}  # Index -> char

    return chars, char_to_idx, idx_to_char

In [None]:
# Sample text to work with
with open('obama.txt', 'r') as f:
    text = f.read()

In [None]:
# Preprocessing
chars, char_to_idx, idx_to_char = preprocess_text(text)
input_size = len(chars)  # Total unique characters in the text
hidden_size = 128        # Size of the hidden layers
seq_length = 100         # Length of the input sequence (can be adjusted)
batch_size = 64          # Batch size
learning_rate = 0.002    # Learning rate

In [None]:
# Convert text to a sequence of integers
data = [char_to_idx[ch] for ch in text]

In [None]:
# Define LSTM Model
class LSTMTextGenerator(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMTextGenerator, self).__init__()
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        out, hidden = self.lstm(x, hidden)
        out = self.fc(out)
        return out, hidden

    def init_hidden(self, batch_size):
        # Initialize hidden and cell states to zeros
        return (torch.zeros(1, batch_size, self.hidden_size),
                torch.zeros(1, batch_size, self.hidden_size))

In [None]:
# One-hot encode the character indices
def one_hot_encode(sequence, n_labels):
    one_hot = np.zeros((len(sequence), n_labels), dtype=np.float32)
    for i, value in enumerate(sequence):
        one_hot[i, value] = 1.0
    return one_hot

In [None]:
# Function to generate text
def generate_text(model, start_str, char_to_idx, idx_to_char, length=100):
    model.eval()  # Set model to evaluation mode

    # Convert the starting string to tensor
    input_data = [char_to_idx[ch] for ch in start_str]
    input_tensor = torch.tensor(one_hot_encode(input_data, input_size)).unsqueeze(0)

    hidden = model.init_hidden(1)
    predicted_text = start_str

    for _ in range(length):
        output, hidden = model(input_tensor, hidden)
        output = output[:, -1, :]  # Get the last prediction
        _, top_idx = torch.topk(output, k=1)
        predicted_char = idx_to_char[top_idx.item()]
        predicted_text += predicted_char

        # Prepare next input
        input_tensor = torch.tensor(one_hot_encode([top_idx.item()], input_size)).unsqueeze(0)

    return predicted_text

In [None]:
# Hyperparameters and data preparation
n_epochs = 100
model = LSTMTextGenerator(input_size, hidden_size, input_size)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [None]:
# Prepare data for training (sliding window approach)
def get_batches(data, seq_length, batch_size):
    n_batches = len(data) // (seq_length * batch_size)
    data = data[:n_batches * batch_size * seq_length]
    data = np.array(data)
    data = data.reshape((batch_size, -1))
    for i in range(0, data.shape[1], seq_length):
        x = data[:, i:i+seq_length]
        y = np.roll(x, shift=-1, axis=1)  # Shift the input sequence to get the target
        yield x, y