# Import Libaries and Data

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from sklearn.model_selection import train_test_split
import time

# Problem 1

In this homework, we focus on the language model we did in the lectures. However, we expand it to the much longer sequence.

Inspired by the course example, train and validate rnn.RNN, rnn.LSTM and rnn.GRU for learning the above sequence. Use sequences of 10, 20, and 30 for your training. Feel free to adjust other network parameters. Report and compare training loss, validation accuracy, execution time for training, and computational and mode size complexities across the three models over various lengths of sequence.

RNN Sequences of 10

In [2]:
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocabulary
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Preparing the dataset
max_length = 10  # Maximum length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Defining the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

# Hyperparameters
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharRNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")


Epoch 10, Loss: 2.2503044605255127, Validation Loss: 2.281165361404419, Validation Accuracy: 0.39915966987609863
Epoch 20, Loss: 1.7869393825531006, Validation Loss: 2.0383858680725098, Validation Accuracy: 0.45798319578170776
Epoch 30, Loss: 1.4378474950790405, Validation Loss: 1.9126826524734497, Validation Accuracy: 0.4957983195781708
Epoch 40, Loss: 1.1299469470977783, Validation Loss: 1.8903419971466064, Validation Accuracy: 0.5
Epoch 50, Loss: 0.8496050834655762, Validation Loss: 1.9146820306777954, Validation Accuracy: 0.48739495873451233
Epoch 60, Loss: 0.5978606343269348, Validation Loss: 1.9932161569595337, Validation Accuracy: 0.5189075469970703
Epoch 70, Loss: 0.39333173632621765, Validation Loss: 2.1299502849578857, Validation Accuracy: 0.4978991448879242
Epoch 80, Loss: 0.2445870339870453, Validation Loss: 2.251969814300537, Validation Accuracy: 0.5126050710678101
Epoch 90, Loss: 0.14962594211101532, Validation Loss: 2.428340435028076, Validation Accuracy: 0.4852941036224

RNN Sequences of 20

In [3]:
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocabulary
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Preparing the dataset
max_length = 20  # Maximum length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Defining the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

# Hyperparameters
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharRNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.2529842853546143, Validation Loss: 2.309706211090088, Validation Accuracy: 0.3734177350997925
Epoch 20, Loss: 1.801230788230896, Validation Loss: 2.0518007278442383, Validation Accuracy: 0.44936707615852356
Epoch 30, Loss: 1.450972557067871, Validation Loss: 1.9145021438598633, Validation Accuracy: 0.49367088079452515
Epoch 40, Loss: 1.1276347637176514, Validation Loss: 1.8907785415649414, Validation Accuracy: 0.5063291192054749
Epoch 50, Loss: 0.8296688199043274, Validation Loss: 1.9390400648117065, Validation Accuracy: 0.5316455960273743
Epoch 60, Loss: 0.5839484333992004, Validation Loss: 2.009227991104126, Validation Accuracy: 0.5316455960273743
Epoch 70, Loss: 0.3814847469329834, Validation Loss: 2.121345043182373, Validation Accuracy: 0.5274261832237244
Epoch 80, Loss: 0.2670614719390869, Validation Loss: 2.2854442596435547, Validation Accuracy: 0.5210970640182495
Epoch 90, Loss: 0.15211667120456696, Validation Loss: 2.446859836578369, Validation Accuracy: 0.518

RNN Sequences of 30

In [4]:
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocabulary
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Preparing the dataset
max_length = 30  # Maximum length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Defining the RNN model
class CharRNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharRNN, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.RNN(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

# Hyperparameters
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharRNN(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.237025022506714, Validation Loss: 2.388532876968384, Validation Accuracy: 0.3432203531265259
Epoch 20, Loss: 1.7770357131958008, Validation Loss: 2.169269561767578, Validation Accuracy: 0.42161017656326294
Epoch 30, Loss: 1.4236152172088623, Validation Loss: 2.045292615890503, Validation Accuracy: 0.4661017060279846
Epoch 40, Loss: 1.1040335893630981, Validation Loss: 2.0186235904693604, Validation Accuracy: 0.4788135588169098
Epoch 50, Loss: 0.8126319050788879, Validation Loss: 2.0503642559051514, Validation Accuracy: 0.48728814721107483
Epoch 60, Loss: 0.5721544027328491, Validation Loss: 2.1533901691436768, Validation Accuracy: 0.4788135588169098
Epoch 70, Loss: 0.37932682037353516, Validation Loss: 2.2970805168151855, Validation Accuracy: 0.4936440587043762
Epoch 80, Loss: 0.24177302420139313, Validation Loss: 2.4528722763061523, Validation Accuracy: 0.49152541160583496
Epoch 90, Loss: 0.15517647564411163, Validation Loss: 2.60849928855896, Validation Accuracy: 0.

LSTM Sequences of 10

In [5]:
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocabulary
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Preparing the dataset
max_length = 10  # Maximum length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Defining the LSTM model
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        # Initialize both hidden state and cell state with zeros
        return (torch.zeros(1, batch_size, self.hidden_size), torch.zeros(1, batch_size, self.hidden_size))

# Hyperparameters
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharLSTM(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.5262839794158936, Validation Loss: 2.477555751800537, Validation Accuracy: 0.33193278312683105
Epoch 20, Loss: 1.9959288835525513, Validation Loss: 2.1232945919036865, Validation Accuracy: 0.4264705777168274
Epoch 30, Loss: 1.5944844484329224, Validation Loss: 1.9876195192337036, Validation Accuracy: 0.46848738193511963
Epoch 40, Loss: 1.2297937870025635, Validation Loss: 1.9251786470413208, Validation Accuracy: 0.5021008253097534
Epoch 50, Loss: 0.898880124092102, Validation Loss: 1.9364831447601318, Validation Accuracy: 0.4978991448879242
Epoch 60, Loss: 0.6058414578437805, Validation Loss: 2.0113894939422607, Validation Accuracy: 0.4957983195781708
Epoch 70, Loss: 0.37681594491004944, Validation Loss: 2.111269950866699, Validation Accuracy: 0.506302535533905
Epoch 80, Loss: 0.21718822419643402, Validation Loss: 2.2393157482147217, Validation Accuracy: 0.49369746446609497
Epoch 90, Loss: 0.13063739240169525, Validation Loss: 2.3719980716705322, Validation Accuracy: 

LSTM Sequences of 20

In [6]:
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocabulary
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Preparing the dataset
max_length = 20  # Maximum length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Defining the LSTM model
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        # Initialize both hidden state and cell state with zeros
        return (torch.zeros(1, batch_size, self.hidden_size), torch.zeros(1, batch_size, self.hidden_size))

# Hyperparameters
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharLSTM(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.5191562175750732, Validation Loss: 2.484652519226074, Validation Accuracy: 0.27426159381866455
Epoch 20, Loss: 2.0386483669281006, Validation Loss: 2.158881425857544, Validation Accuracy: 0.41983121633529663
Epoch 30, Loss: 1.655963659286499, Validation Loss: 1.9918053150177002, Validation Accuracy: 0.4810126721858978
Epoch 40, Loss: 1.3109047412872314, Validation Loss: 1.8871471881866455, Validation Accuracy: 0.4915611743927002
Epoch 50, Loss: 0.9968366026878357, Validation Loss: 1.8469796180725098, Validation Accuracy: 0.5210970640182495
Epoch 60, Loss: 0.7323595881462097, Validation Loss: 1.8813544511795044, Validation Accuracy: 0.5210970640182495
Epoch 70, Loss: 0.5028286576271057, Validation Loss: 1.9865748882293701, Validation Accuracy: 0.502109706401825
Epoch 80, Loss: 0.32843732833862305, Validation Loss: 2.0816776752471924, Validation Accuracy: 0.48945146799087524
Epoch 90, Loss: 0.2636650502681732, Validation Loss: 2.1985042095184326, Validation Accuracy: 0.

LSTM Sequences of 30

In [7]:
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocabulary
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Preparing the dataset
max_length = 30  # Maximum length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Defining the LSTM model
class CharLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.LSTM(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        # Initialize both hidden state and cell state with zeros
        return (torch.zeros(1, batch_size, self.hidden_size), torch.zeros(1, batch_size, self.hidden_size))

# Hyperparameters
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharLSTM(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.526428699493408, Validation Loss: 2.564789056777954, Validation Accuracy: 0.3156779706478119
Epoch 20, Loss: 2.015021562576294, Validation Loss: 2.212977170944214, Validation Accuracy: 0.3919491469860077
Epoch 30, Loss: 1.6171483993530273, Validation Loss: 2.0310516357421875, Validation Accuracy: 0.4512711763381958
Epoch 40, Loss: 1.2591001987457275, Validation Loss: 1.946043848991394, Validation Accuracy: 0.48728814721107483
Epoch 50, Loss: 0.9404497742652893, Validation Loss: 1.9347310066223145, Validation Accuracy: 0.48516950011253357
Epoch 60, Loss: 0.6672124862670898, Validation Loss: 1.9678969383239746, Validation Accuracy: 0.48516950011253357
Epoch 70, Loss: 0.44470223784446716, Validation Loss: 2.083030939102173, Validation Accuracy: 0.48728814721107483
Epoch 80, Loss: 0.2927740812301636, Validation Loss: 2.22503924369812, Validation Accuracy: 0.4830508530139923
Epoch 90, Loss: 0.18428395688533783, Validation Loss: 2.3115718364715576, Validation Accuracy: 0.48

GRU for Sequences of 10

In [8]:
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocabulary
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Preparing the dataset
max_length = 10  # Maximum length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Defining the GRU model
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

# Hyperparameters
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharGRU(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.4048380851745605, Validation Loss: 2.364834785461426, Validation Accuracy: 0.3718487322330475
Epoch 20, Loss: 1.887921929359436, Validation Loss: 2.08847975730896, Validation Accuracy: 0.4432772994041443
Epoch 30, Loss: 1.4724527597427368, Validation Loss: 1.9348090887069702, Validation Accuracy: 0.47058823704719543
Epoch 40, Loss: 1.1023738384246826, Validation Loss: 1.8408879041671753, Validation Accuracy: 0.5105041861534119
Epoch 50, Loss: 0.765838623046875, Validation Loss: 1.8789145946502686, Validation Accuracy: 0.5252100825309753
Epoch 60, Loss: 0.488158643245697, Validation Loss: 1.985790729522705, Validation Accuracy: 0.5147058963775635
Epoch 70, Loss: 0.2860977053642273, Validation Loss: 2.117770195007324, Validation Accuracy: 0.5252100825309753
Epoch 80, Loss: 0.16263727843761444, Validation Loss: 2.2770540714263916, Validation Accuracy: 0.5042017102241516
Epoch 90, Loss: 0.09941940754652023, Validation Loss: 2.407747268676758, Validation Accuracy: 0.514705

GRU for Sequences of 20

In [9]:
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocabulary
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Preparing the dataset
max_length = 20  # Maximum length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Defining the GRU model
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

# Hyperparameters
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharGRU(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.384937047958374, Validation Loss: 2.3950486183166504, Validation Accuracy: 0.3459915518760681
Epoch 20, Loss: 1.876861333847046, Validation Loss: 2.0800211429595947, Validation Accuracy: 0.4472573697566986
Epoch 30, Loss: 1.466163158416748, Validation Loss: 1.92085599899292, Validation Accuracy: 0.5063291192054749
Epoch 40, Loss: 1.0945879220962524, Validation Loss: 1.8198952674865723, Validation Accuracy: 0.5379746556282043
Epoch 50, Loss: 0.7596117854118347, Validation Loss: 1.8273712396621704, Validation Accuracy: 0.5464134812355042
Epoch 60, Loss: 0.4822719991207123, Validation Loss: 1.886910080909729, Validation Accuracy: 0.554852306842804
Epoch 70, Loss: 0.27956363558769226, Validation Loss: 2.0204954147338867, Validation Accuracy: 0.5443037748336792
Epoch 80, Loss: 0.1528216302394867, Validation Loss: 2.170624017715454, Validation Accuracy: 0.5274261832237244
Epoch 90, Loss: 0.08455592393875122, Validation Loss: 2.2901408672332764, Validation Accuracy: 0.527426

GRU for Sequences of 30

In [10]:
# Sample text
text = "Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text. At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model. One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks. Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time. Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants. In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology."

# Creating character vocabulary
chars = sorted(list(set(text)))
ix_to_char = {i: ch for i, ch in enumerate(chars)}
char_to_ix = {ch: i for i, ch in enumerate(chars)}

# Preparing the dataset
max_length = 30  # Maximum length of input sequences

X = []
y = []
for i in range(len(text) - max_length):
    sequence = text[i:i + max_length]
    label = text[i + max_length]
    X.append([char_to_ix[char] for char in sequence])
    y.append(char_to_ix[label])

X = np.array(X)
y = np.array(y)

# Splitting the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Converting data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Defining the GRU model
class CharGRU(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(CharGRU, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(input_size, hidden_size)
        self.rnn = nn.GRU(hidden_size, hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, hidden):
        embedded = self.embedding(x)
        output, hidden = self.rnn(embedded, hidden)
        output = self.fc(output[:, -1, :])
        return output, hidden

    def init_hidden(self, batch_size):
        return torch.zeros(1, batch_size, self.hidden_size)

# Hyperparameters
input_size = len(chars)
hidden_size = 128
output_size = len(chars)
learning_rate = 0.005
epochs = 100

# Model, loss, and optimizer
model = CharGRU(input_size, hidden_size, output_size)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

start_time = time.time()

# Training the model
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    hidden = model.init_hidden(X_train.size(0))
    output, hidden = model(X_train, hidden)
    loss = criterion(output, y_train)
    loss.backward()
    optimizer.step()

    # Validation
    model.eval()
    with torch.no_grad():
        hidden_val = model.init_hidden(X_val.size(0))
        val_output, _ = model(X_val, hidden_val)
        val_loss = criterion(val_output, y_val)
        _, predicted = torch.max(val_output, 1)
        val_accuracy = (predicted == y_val).float().mean()

    if (epoch+1) % 10 == 0:
        print(f'Epoch {epoch+1}, Loss: {loss.item()}, Validation Loss: {val_loss.item()}, Validation Accuracy: {val_accuracy.item()}')

end_time = time.time()
execution_time = end_time - start_time
print(f"Total execution time for training: {execution_time} seconds")

Epoch 10, Loss: 2.3824710845947266, Validation Loss: 2.460952043533325, Validation Accuracy: 0.29025423526763916
Epoch 20, Loss: 1.877109169960022, Validation Loss: 2.1570608615875244, Validation Accuracy: 0.40042373538017273
Epoch 30, Loss: 1.4780375957489014, Validation Loss: 2.0080204010009766, Validation Accuracy: 0.4576271176338196
Epoch 40, Loss: 1.1105031967163086, Validation Loss: 1.9459691047668457, Validation Accuracy: 0.508474588394165
Epoch 50, Loss: 0.7805212736129761, Validation Loss: 1.972680687904358, Validation Accuracy: 0.5233050584793091
Epoch 60, Loss: 0.5116984248161316, Validation Loss: 2.0537493228912354, Validation Accuracy: 0.5254237055778503
Epoch 70, Loss: 0.3155870735645294, Validation Loss: 2.1859121322631836, Validation Accuracy: 0.5233050584793091
Epoch 80, Loss: 0.18457546830177307, Validation Loss: 2.320556402206421, Validation Accuracy: 0.5254237055778503
Epoch 90, Loss: 0.10672536492347717, Validation Loss: 2.463252544403076, Validation Accuracy: 0.52